From 788acd17adf6b3d605b5ea66cf394eb81fc086a9 Mon Sep 17 00:00:00 2001 From: "pbos@webrtc.org" Date: Mon, 15 Dec 2014 09:41:24 +0000 Subject: [PATCH] Merge audio_processing changes. R=aluebs@webrtc.org, bjornv@webrtc.org BUG= Review URL: https://webrtc-codereview.appspot.com/32769004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7893 4adac7df-926f-26a2-2b94-8c16560cd09d --- .gitignore | 3 +- .../audio_processing/agc/agc_audio.pcm.sha1 | 1 + .../agc/agc_no_circular_buffer.dat.sha1 | 1 + .../agc/agc_pitch_gain.dat.sha1 | 1 + .../agc/agc_pitch_lag.dat.sha1 | 1 + .../agc/agc_spectral_peak.dat.sha1 | 1 + .../audio_processing/agc/agc_vad.dat.sha1 | 1 + .../agc/agc_voicing_prob.dat.sha1 | 1 + .../agc/agc_with_circular_buffer.dat.sha1 | 1 + .../transient/ajm-macbook-1-spke.gai.sha1 | 1 + .../transient/ajm-macbook-1-spke16m.pcm.sha1 | 1 + ...6m_chunk_10_transient_30_rational.dat.sha1 | 1 + .../transient/audio16kHz.pcm.sha1 | 1 + .../transient/audio32kHz.pcm.sha1 | 1 + .../transient/audio48kHz.pcm.sha1 | 1 + .../transient/audio8kHz.pcm.sha1 | 1 + .../transient/detect16kHz.dat.sha1 | 1 + .../transient/detect32kHz.dat.sha1 | 1 + .../transient/detect48kHz.dat.sha1 | 1 + .../transient/detect8kHz.dat.sha1 | 1 + .../transient/double-utils.dat.sha1 | 1 + .../transient/float-utils.dat.sha1 | 1 + .../transient/suppressed16kHz.pcm.sha1 | 1 + .../transient/suppressed32kHz.pcm.sha1 | 1 + .../transient/suppressed8kHz.pcm.sha1 | 1 + .../audio_processing/transient/wpd0.dat.sha1 | 1 + .../audio_processing/transient/wpd1.dat.sha1 | 1 + .../audio_processing/transient/wpd2.dat.sha1 | 1 + .../audio_processing/transient/wpd3.dat.sha1 | 1 + .../audio_processing/transient/wpd4.dat.sha1 | 1 + .../audio_processing/transient/wpd5.dat.sha1 | 1 + .../audio_processing/transient/wpd6.dat.sha1 | 1 + .../audio_processing/transient/wpd7.dat.sha1 | 1 + webrtc/modules/audio_processing/BUILD.gn | 62 ++- webrtc/modules/audio_processing/agc/agc.cc | 161 ++++++ webrtc/modules/audio_processing/agc/agc.h | 69 +++ .../audio_processing/agc/agc_audio_proc.cc | 270 ++++++++++ .../audio_processing/agc/agc_audio_proc.h | 83 +++ .../agc/agc_audio_proc_internal.h | 81 +++ .../agc/agc_audio_proc_unittest.cc | 61 +++ .../agc/agc_manager_direct.cc | 436 ++++++++++++++++ .../audio_processing/agc/agc_manager_direct.h | 98 ++++ .../audio_processing/agc/agc_unittest.cc | 162 ++++++ .../audio_processing/agc/circular_buffer.cc | 136 +++++ .../audio_processing/agc/circular_buffer.h | 69 +++ .../agc/circular_buffer_unittest.cc | 132 +++++ webrtc/modules/audio_processing/agc/common.h | 27 + .../audio_processing/agc/gain_map_internal.h | 275 ++++++++++ webrtc/modules/audio_processing/agc/gmm.cc | 61 +++ webrtc/modules/audio_processing/agc/gmm.h | 45 ++ .../audio_processing/agc/gmm_unittest.cc | 65 +++ .../modules/audio_processing/agc/histogram.cc | 228 +++++++++ .../modules/audio_processing/agc/histogram.h | 91 ++++ .../agc/histogram_unittest.cc | 104 ++++ .../modules/audio_processing/agc/mock_agc.h | 36 ++ .../audio_processing/agc/noise_gmm_tables.h | 77 +++ .../audio_processing/agc/pitch_based_vad.cc | 123 +++++ .../audio_processing/agc/pitch_based_vad.h | 56 ++ .../agc/pitch_based_vad_unittest.cc | 71 +++ .../audio_processing/agc/pitch_internal.cc | 52 ++ .../audio_processing/agc/pitch_internal.h | 26 + .../agc/pitch_internal_unittest.cc | 50 ++ .../audio_processing/agc/pole_zero_filter.cc | 111 ++++ .../audio_processing/agc/pole_zero_filter.h | 50 ++ .../agc/pole_zero_filter_unittest.cc | 98 ++++ .../audio_processing/agc/standalone_vad.cc | 96 ++++ .../audio_processing/agc/standalone_vad.h | 70 +++ .../agc/standalone_vad_unittest.cc | 103 ++++ .../audio_processing/agc/test/fake_agc.h | 46 ++ .../audio_processing/agc/test/test_utils.cc | 63 +++ .../audio_processing/agc/test/test_utils.h | 28 + .../modules/audio_processing/agc/utility.cc | 35 ++ webrtc/modules/audio_processing/agc/utility.h | 23 + .../audio_processing/agc/voice_gmm_tables.h | 77 +++ .../audio_processing/audio_processing.gypi | 67 ++- .../audio_processing/audio_processing_impl.cc | 179 ++++++- .../audio_processing/audio_processing_impl.h | 32 +- .../audio_processing_impl_unittest.cc | 4 +- .../audio_processing_tests.gypi | 27 + .../transient/click_annotate.cc | 114 +++++ .../audio_processing/transient/common.h | 27 + .../transient/daubechies_8_wavelet_coeffs.h | 63 +++ .../transient/dyadic_decimator.h | 70 +++ .../transient/dyadic_decimator_unittest.cc | 126 +++++ .../audio_processing/transient/file_utils.cc | 257 ++++++++++ .../audio_processing/transient/file_utils.h | 119 +++++ .../transient/file_utils_unittest.cc | 484 ++++++++++++++++++ .../transient/moving_moments.cc | 49 ++ .../transient/moving_moments.h | 52 ++ .../transient/moving_moments_unittest.cc | 206 ++++++++ .../transient/test/plotDetection.m | 12 + .../transient/test/readDetection.m | 16 + .../audio_processing/transient/test/readPCM.m | 16 + .../transient/transient_detector.cc | 173 +++++++ .../transient/transient_detector.h | 87 ++++ .../transient/transient_detector_unittest.cc | 104 ++++ .../transient/transient_suppression_test.cc | 250 +++++++++ .../transient/transient_suppressor.cc | 424 +++++++++++++++ .../transient/transient_suppressor.h | 120 +++++ .../transient_suppressor_unittest.cc | 85 +++ .../audio_processing/transient/wpd_node.cc | 71 +++ .../audio_processing/transient/wpd_node.h | 46 ++ .../transient/wpd_node_unittest.cc | 66 +++ .../audio_processing/transient/wpd_tree.cc | 119 +++++ .../audio_processing/transient/wpd_tree.h | 91 ++++ .../transient/wpd_tree_unittest.cc | 198 +++++++ webrtc/modules/modules.gyp | 45 +- webrtc/modules/modules_unittests.isolate | 32 ++ 108 files changed, 7822 insertions(+), 51 deletions(-) create mode 100644 resources/audio_processing/agc/agc_audio.pcm.sha1 create mode 100644 resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_pitch_gain.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_pitch_lag.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_spectral_peak.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_vad.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_voicing_prob.dat.sha1 create mode 100644 resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1 create mode 100644 resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1 create mode 100644 resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1 create mode 100644 resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1 create mode 100644 resources/audio_processing/transient/audio16kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/audio32kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/audio48kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/audio8kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/detect16kHz.dat.sha1 create mode 100644 resources/audio_processing/transient/detect32kHz.dat.sha1 create mode 100644 resources/audio_processing/transient/detect48kHz.dat.sha1 create mode 100644 resources/audio_processing/transient/detect8kHz.dat.sha1 create mode 100644 resources/audio_processing/transient/double-utils.dat.sha1 create mode 100644 resources/audio_processing/transient/float-utils.dat.sha1 create mode 100644 resources/audio_processing/transient/suppressed16kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/suppressed32kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/suppressed8kHz.pcm.sha1 create mode 100644 resources/audio_processing/transient/wpd0.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd1.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd2.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd3.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd4.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd5.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd6.dat.sha1 create mode 100644 resources/audio_processing/transient/wpd7.dat.sha1 create mode 100644 webrtc/modules/audio_processing/agc/agc.cc create mode 100644 webrtc/modules/audio_processing/agc/agc.h create mode 100644 webrtc/modules/audio_processing/agc/agc_audio_proc.cc create mode 100644 webrtc/modules/audio_processing/agc/agc_audio_proc.h create mode 100644 webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h create mode 100644 webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/agc_manager_direct.cc create mode 100644 webrtc/modules/audio_processing/agc/agc_manager_direct.h create mode 100644 webrtc/modules/audio_processing/agc/agc_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/circular_buffer.cc create mode 100644 webrtc/modules/audio_processing/agc/circular_buffer.h create mode 100644 webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/common.h create mode 100644 webrtc/modules/audio_processing/agc/gain_map_internal.h create mode 100644 webrtc/modules/audio_processing/agc/gmm.cc create mode 100644 webrtc/modules/audio_processing/agc/gmm.h create mode 100644 webrtc/modules/audio_processing/agc/gmm_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/histogram.cc create mode 100644 webrtc/modules/audio_processing/agc/histogram.h create mode 100644 webrtc/modules/audio_processing/agc/histogram_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/mock_agc.h create mode 100644 webrtc/modules/audio_processing/agc/noise_gmm_tables.h create mode 100644 webrtc/modules/audio_processing/agc/pitch_based_vad.cc create mode 100644 webrtc/modules/audio_processing/agc/pitch_based_vad.h create mode 100644 webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/pitch_internal.cc create mode 100644 webrtc/modules/audio_processing/agc/pitch_internal.h create mode 100644 webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/pole_zero_filter.cc create mode 100644 webrtc/modules/audio_processing/agc/pole_zero_filter.h create mode 100644 webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/standalone_vad.cc create mode 100644 webrtc/modules/audio_processing/agc/standalone_vad.h create mode 100644 webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc create mode 100644 webrtc/modules/audio_processing/agc/test/fake_agc.h create mode 100644 webrtc/modules/audio_processing/agc/test/test_utils.cc create mode 100644 webrtc/modules/audio_processing/agc/test/test_utils.h create mode 100644 webrtc/modules/audio_processing/agc/utility.cc create mode 100644 webrtc/modules/audio_processing/agc/utility.h create mode 100644 webrtc/modules/audio_processing/agc/voice_gmm_tables.h create mode 100644 webrtc/modules/audio_processing/transient/click_annotate.cc create mode 100644 webrtc/modules/audio_processing/transient/common.h create mode 100644 webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h create mode 100644 webrtc/modules/audio_processing/transient/dyadic_decimator.h create mode 100644 webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/file_utils.cc create mode 100644 webrtc/modules/audio_processing/transient/file_utils.h create mode 100644 webrtc/modules/audio_processing/transient/file_utils_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/moving_moments.cc create mode 100644 webrtc/modules/audio_processing/transient/moving_moments.h create mode 100644 webrtc/modules/audio_processing/transient/moving_moments_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/test/plotDetection.m create mode 100644 webrtc/modules/audio_processing/transient/test/readDetection.m create mode 100644 webrtc/modules/audio_processing/transient/test/readPCM.m create mode 100644 webrtc/modules/audio_processing/transient/transient_detector.cc create mode 100644 webrtc/modules/audio_processing/transient/transient_detector.h create mode 100644 webrtc/modules/audio_processing/transient/transient_detector_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/transient_suppression_test.cc create mode 100644 webrtc/modules/audio_processing/transient/transient_suppressor.cc create mode 100644 webrtc/modules/audio_processing/transient/transient_suppressor.h create mode 100644 webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/wpd_node.cc create mode 100644 webrtc/modules/audio_processing/transient/wpd_node.h create mode 100644 webrtc/modules/audio_processing/transient/wpd_node_unittest.cc create mode 100644 webrtc/modules/audio_processing/transient/wpd_tree.cc create mode 100644 webrtc/modules/audio_processing/transient/wpd_tree.h create mode 100644 webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc diff --git a/.gitignore b/.gitignore index 581c3bfcfd..1082352c1b 100644 --- a/.gitignore +++ b/.gitignore @@ -48,8 +48,7 @@ /links.db /net /out -/resources/*.* -/resources/*/*.* +/resources /talk/examples/android/bin /talk/examples/android/gen /talk/examples/android/libs diff --git a/resources/audio_processing/agc/agc_audio.pcm.sha1 b/resources/audio_processing/agc/agc_audio.pcm.sha1 new file mode 100644 index 0000000000..583d38ffbc --- /dev/null +++ b/resources/audio_processing/agc/agc_audio.pcm.sha1 @@ -0,0 +1 @@ +10a52dc6d6f15242a1aa549205657f2834353673 \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1 b/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1 new file mode 100644 index 0000000000..c413bb0755 --- /dev/null +++ b/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1 @@ -0,0 +1 @@ +61219028e15606a3adbbc61d393575ab36b4078b \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_pitch_gain.dat.sha1 b/resources/audio_processing/agc/agc_pitch_gain.dat.sha1 new file mode 100644 index 0000000000..734005a11d --- /dev/null +++ b/resources/audio_processing/agc/agc_pitch_gain.dat.sha1 @@ -0,0 +1 @@ +ba0c6e93a5e6d351d95385699fb9a719b6a6d0cc \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_pitch_lag.dat.sha1 b/resources/audio_processing/agc/agc_pitch_lag.dat.sha1 new file mode 100644 index 0000000000..781e7a8f9d --- /dev/null +++ b/resources/audio_processing/agc/agc_pitch_lag.dat.sha1 @@ -0,0 +1 @@ +590c6fe033665d11fa70dbbbd3e7d8f0b8a616ce \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_spectral_peak.dat.sha1 b/resources/audio_processing/agc/agc_spectral_peak.dat.sha1 new file mode 100644 index 0000000000..473becc079 --- /dev/null +++ b/resources/audio_processing/agc/agc_spectral_peak.dat.sha1 @@ -0,0 +1 @@ +3a5a28763e3ad5cd0f2833a90b685f4da97c2002 \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_vad.dat.sha1 b/resources/audio_processing/agc/agc_vad.dat.sha1 new file mode 100644 index 0000000000..fd704a035e --- /dev/null +++ b/resources/audio_processing/agc/agc_vad.dat.sha1 @@ -0,0 +1 @@ +7cae05c6902812609fa23ac04037485503b0924d \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_voicing_prob.dat.sha1 b/resources/audio_processing/agc/agc_voicing_prob.dat.sha1 new file mode 100644 index 0000000000..f0d1d106db --- /dev/null +++ b/resources/audio_processing/agc/agc_voicing_prob.dat.sha1 @@ -0,0 +1 @@ +b1ea860f0bfad3e86fedc43cd8752821e0d75a46 \ No newline at end of file diff --git a/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1 b/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1 new file mode 100644 index 0000000000..996a4ceac4 --- /dev/null +++ b/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1 @@ -0,0 +1 @@ +49402cfaa36be32320167a65c8e96f70548f5257 \ No newline at end of file diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1 new file mode 100644 index 0000000000..e929ad4354 --- /dev/null +++ b/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1 @@ -0,0 +1 @@ +7c80af623675b2284f4081cfd2df9a0227bbc2a0 \ No newline at end of file diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1 new file mode 100644 index 0000000000..654fad57b6 --- /dev/null +++ b/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1 @@ -0,0 +1 @@ +04155a7e186deb7524e3013476de3eaabd59a1f8 \ No newline at end of file diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1 new file mode 100644 index 0000000000..762ae4c90f --- /dev/null +++ b/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1 @@ -0,0 +1 @@ +6c33b25be2eb9b441429aabf203d5b4a9e734c63 \ No newline at end of file diff --git a/resources/audio_processing/transient/audio16kHz.pcm.sha1 b/resources/audio_processing/transient/audio16kHz.pcm.sha1 new file mode 100644 index 0000000000..b35750cdf7 --- /dev/null +++ b/resources/audio_processing/transient/audio16kHz.pcm.sha1 @@ -0,0 +1 @@ +81cb7e547fad2894b5702fa571f9eb55ed6e1096 \ No newline at end of file diff --git a/resources/audio_processing/transient/audio32kHz.pcm.sha1 b/resources/audio_processing/transient/audio32kHz.pcm.sha1 new file mode 100644 index 0000000000..f6728e7c92 --- /dev/null +++ b/resources/audio_processing/transient/audio32kHz.pcm.sha1 @@ -0,0 +1 @@ +81cfcff6b0d70938fe74060ba0303504c31c6d7e \ No newline at end of file diff --git a/resources/audio_processing/transient/audio48kHz.pcm.sha1 b/resources/audio_processing/transient/audio48kHz.pcm.sha1 new file mode 100644 index 0000000000..126ff8501c --- /dev/null +++ b/resources/audio_processing/transient/audio48kHz.pcm.sha1 @@ -0,0 +1 @@ +01278951e13675a3467782e1d2f18273c05eef50 \ No newline at end of file diff --git a/resources/audio_processing/transient/audio8kHz.pcm.sha1 b/resources/audio_processing/transient/audio8kHz.pcm.sha1 new file mode 100644 index 0000000000..7f44983276 --- /dev/null +++ b/resources/audio_processing/transient/audio8kHz.pcm.sha1 @@ -0,0 +1 @@ +5fcb4621ea0f50c3fc9a63e4720ff52631258437 \ No newline at end of file diff --git a/resources/audio_processing/transient/detect16kHz.dat.sha1 b/resources/audio_processing/transient/detect16kHz.dat.sha1 new file mode 100644 index 0000000000..333e7c5dd7 --- /dev/null +++ b/resources/audio_processing/transient/detect16kHz.dat.sha1 @@ -0,0 +1 @@ +35639dd1b73b678360897975a91a7c8af0be3644 \ No newline at end of file diff --git a/resources/audio_processing/transient/detect32kHz.dat.sha1 b/resources/audio_processing/transient/detect32kHz.dat.sha1 new file mode 100644 index 0000000000..ff9485c61c --- /dev/null +++ b/resources/audio_processing/transient/detect32kHz.dat.sha1 @@ -0,0 +1 @@ +c9d3d0b81262ffaba7d358ad534e6fcb27c00076 \ No newline at end of file diff --git a/resources/audio_processing/transient/detect48kHz.dat.sha1 b/resources/audio_processing/transient/detect48kHz.dat.sha1 new file mode 100644 index 0000000000..0410b9adee --- /dev/null +++ b/resources/audio_processing/transient/detect48kHz.dat.sha1 @@ -0,0 +1 @@ +f46a3380c9285324e583965ef547fcaa1650f8b8 \ No newline at end of file diff --git a/resources/audio_processing/transient/detect8kHz.dat.sha1 b/resources/audio_processing/transient/detect8kHz.dat.sha1 new file mode 100644 index 0000000000..30c19bda23 --- /dev/null +++ b/resources/audio_processing/transient/detect8kHz.dat.sha1 @@ -0,0 +1 @@ +f625c14d134d69ad38b67295459406fc9947a705 \ No newline at end of file diff --git a/resources/audio_processing/transient/double-utils.dat.sha1 b/resources/audio_processing/transient/double-utils.dat.sha1 new file mode 100644 index 0000000000..3895184f5f --- /dev/null +++ b/resources/audio_processing/transient/double-utils.dat.sha1 @@ -0,0 +1 @@ +c26083880cd227178917b4df230520dbfb9b9bb1 \ No newline at end of file diff --git a/resources/audio_processing/transient/float-utils.dat.sha1 b/resources/audio_processing/transient/float-utils.dat.sha1 new file mode 100644 index 0000000000..1817c60d23 --- /dev/null +++ b/resources/audio_processing/transient/float-utils.dat.sha1 @@ -0,0 +1 @@ +0eaaf21344b4b030d6c0fb6dcc419e7d3959a148 \ No newline at end of file diff --git a/resources/audio_processing/transient/suppressed16kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed16kHz.pcm.sha1 new file mode 100644 index 0000000000..7ea55c3660 --- /dev/null +++ b/resources/audio_processing/transient/suppressed16kHz.pcm.sha1 @@ -0,0 +1 @@ +9781792dc39d7aada6418370246eef9f544ca47b \ No newline at end of file diff --git a/resources/audio_processing/transient/suppressed32kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed32kHz.pcm.sha1 new file mode 100644 index 0000000000..5f49bc04f9 --- /dev/null +++ b/resources/audio_processing/transient/suppressed32kHz.pcm.sha1 @@ -0,0 +1 @@ +8b2bd11b591521178232aae598e6df0d001051c4 \ No newline at end of file diff --git a/resources/audio_processing/transient/suppressed8kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed8kHz.pcm.sha1 new file mode 100644 index 0000000000..b0086daf4e --- /dev/null +++ b/resources/audio_processing/transient/suppressed8kHz.pcm.sha1 @@ -0,0 +1 @@ +8a6c7ed696f9791f8cb5c5b061f07eb019affd49 \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd0.dat.sha1 b/resources/audio_processing/transient/wpd0.dat.sha1 new file mode 100644 index 0000000000..9d9edd3751 --- /dev/null +++ b/resources/audio_processing/transient/wpd0.dat.sha1 @@ -0,0 +1 @@ +7c01839f888fe6e10276e1819bd5207668345dcf \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd1.dat.sha1 b/resources/audio_processing/transient/wpd1.dat.sha1 new file mode 100644 index 0000000000..59ff08557d --- /dev/null +++ b/resources/audio_processing/transient/wpd1.dat.sha1 @@ -0,0 +1 @@ +f7553df9abca91401715185d97d1d9c20a2ecb9b \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd2.dat.sha1 b/resources/audio_processing/transient/wpd2.dat.sha1 new file mode 100644 index 0000000000..3161de872d --- /dev/null +++ b/resources/audio_processing/transient/wpd2.dat.sha1 @@ -0,0 +1 @@ +0455d7042c64075e793285753a98f02268e6238b \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd3.dat.sha1 b/resources/audio_processing/transient/wpd3.dat.sha1 new file mode 100644 index 0000000000..1a3b6f6473 --- /dev/null +++ b/resources/audio_processing/transient/wpd3.dat.sha1 @@ -0,0 +1 @@ +941cc5d0bfccfd1d6bd68a1d882975202f22b6de \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd4.dat.sha1 b/resources/audio_processing/transient/wpd4.dat.sha1 new file mode 100644 index 0000000000..3e05085728 --- /dev/null +++ b/resources/audio_processing/transient/wpd4.dat.sha1 @@ -0,0 +1 @@ +a16139b3750a13b62327e2a78ea008493a2b508b \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd5.dat.sha1 b/resources/audio_processing/transient/wpd5.dat.sha1 new file mode 100644 index 0000000000..aef4367c49 --- /dev/null +++ b/resources/audio_processing/transient/wpd5.dat.sha1 @@ -0,0 +1 @@ +6bf9272123656bc0561550a40734245709bbac10 \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd6.dat.sha1 b/resources/audio_processing/transient/wpd6.dat.sha1 new file mode 100644 index 0000000000..355c31e246 --- /dev/null +++ b/resources/audio_processing/transient/wpd6.dat.sha1 @@ -0,0 +1 @@ +6a2667c6c4b3794776af1dabacc3575791023168 \ No newline at end of file diff --git a/resources/audio_processing/transient/wpd7.dat.sha1 b/resources/audio_processing/transient/wpd7.dat.sha1 new file mode 100644 index 0000000000..daf85ed18c --- /dev/null +++ b/resources/audio_processing/transient/wpd7.dat.sha1 @@ -0,0 +1 @@ +620cf1f732c99003ff0e5d6ae3350c0a2ea2a9d7 \ No newline at end of file diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index fbc1e7a581..d3b1012f70 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -22,25 +22,52 @@ declare_args() { source_set("audio_processing") { sources = [ - "aec/include/echo_cancellation.h", + "aec/aec_core.c", + "aec/aec_core.h", + "aec/aec_core_internal.h", + "aec/aec_rdft.c", + "aec/aec_rdft.h", + "aec/aec_resampler.c", + "aec/aec_resampler.h", "aec/echo_cancellation.c", "aec/echo_cancellation_internal.h", - "aec/aec_core.h", - "aec/aec_core.c", - "aec/aec_core_internal.h", - "aec/aec_rdft.h", - "aec/aec_rdft.c", - "aec/aec_resampler.h", - "aec/aec_resampler.c", - "aecm/include/echo_control_mobile.h", - "aecm/echo_control_mobile.c", + "aec/include/echo_cancellation.h", "aecm/aecm_core.c", "aecm/aecm_core.h", - "agc/include/gain_control.h", + "aecm/echo_control_mobile.c", + "aecm/include/echo_control_mobile.h", + "agc/agc.cc", + "agc/agc.h", + "agc/agc_audio_proc.cc", + "agc/agc_audio_proc.h", + "agc/agc_audio_proc_internal.h", + "agc/agc_manager_direct.cc", + "agc/agc_manager_direct.h", "agc/analog_agc.c", "agc/analog_agc.h", + "agc/circular_buffer.cc", + "agc/circular_buffer.h", + "agc/common.h", "agc/digital_agc.c", "agc/digital_agc.h", + "agc/gain_map_internal.h", + "agc/gmm.cc", + "agc/gmm.h", + "agc/histogram.cc", + "agc/histogram.h", + "agc/include/gain_control.h", + "agc/noise_gmm_tables.h", + "agc/pitch_based_vad.cc", + "agc/pitch_based_vad.h", + "agc/pitch_internal.cc", + "agc/pitch_internal.h", + "agc/pole_zero_filter.cc", + "agc/pole_zero_filter.h", + "agc/standalone_vad.cc", + "agc/standalone_vad.h", + "agc/utility.cc", + "agc/utility.h", + "agc/voice_gmm_tables.h", "audio_buffer.cc", "audio_buffer.h", "audio_processing_impl.cc", @@ -67,6 +94,19 @@ source_set("audio_processing") { "rms_level.h", "splitting_filter.cc", "splitting_filter.h", + "transient/common.h", + "transient/daubechies_8_wavelet_coeffs.h", + "transient/dyadic_decimator.h", + "transient/moving_moments.cc", + "transient/moving_moments.h", + "transient/transient_detector.cc", + "transient/transient_detector.h", + "transient/transient_suppressor.cc", + "transient/transient_suppressor.h", + "transient/wpd_node.cc", + "transient/wpd_node.h", + "transient/wpd_tree.cc", + "transient/wpd_tree.h", "typing_detection.cc", "typing_detection.h", "utility/delay_estimator.c", diff --git a/webrtc/modules/audio_processing/agc/agc.cc b/webrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000000..298cfd9d88 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc.h" + +#include +#include + +#include + +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h" +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/modules/audio_processing/agc/histogram.h" +#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h" +#include "webrtc/modules/audio_processing/agc/standalone_vad.h" +#include "webrtc/modules/audio_processing/agc/utility.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" + +namespace webrtc { +namespace { + +const int kDefaultLevelDbfs = -18; +const double kDefaultVoiceValue = 1.0; +const int kNumAnalysisFrames = 100; +const double kActivityThreshold = 0.3; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + last_voice_probability_(kDefaultVoiceValue), + target_level_dbfs_(kDefaultLevelDbfs), + standalone_vad_enabled_(true), + histogram_(Histogram::Create(kNumAnalysisFrames)), + inactive_histogram_(Histogram::Create()), + audio_processing_(new AgcAudioProc()), + pitch_based_vad_(new PitchBasedVad()), + standalone_vad_(StandaloneVad::Create()), + // Initialize to the most common resampling situation. + resampler_(new Resampler(32000, kSampleRateHz, kResamplerSynchronous)) { + } + +Agc::~Agc() {} + +float Agc::AnalyzePreproc(const int16_t* audio, int length) { + assert(length > 0); + int num_clipped = 0; + for (int i = 0; i < length; ++i) { + if (audio[i] == 32767 || audio[i] == -32768) + ++num_clipped; + } + return 1.0f * num_clipped / length; +} + +int Agc::Process(const int16_t* audio, int length, int sample_rate_hz) { + assert(length == sample_rate_hz / 100); + if (sample_rate_hz > 32000) { + return -1; + } + // Resample to the required rate. + int16_t resampled[kLength10Ms]; + const int16_t* resampled_ptr = audio; + if (sample_rate_hz != kSampleRateHz) { + if (resampler_->ResetIfNeeded(sample_rate_hz, + kSampleRateHz, + kResamplerSynchronous) != 0) { + return -1; + } + resampler_->Push(audio, length, resampled, kLength10Ms, length); + resampled_ptr = resampled; + } + assert(length == kLength10Ms); + + if (standalone_vad_enabled_) { + if (standalone_vad_->AddAudio(resampled_ptr, length) != 0) + return -1; + } + + AudioFeatures features; + audio_processing_->ExtractFeatures(resampled_ptr, length, &features); + if (features.num_frames > 0) { + if (features.silence) { + // The other features are invalid, so update the histogram with an + // arbitrary low value. + for (int n = 0; n < features.num_frames; ++n) + histogram_->Update(features.rms[n], 0.01); + return 0; + } + + // Initialize to 0.5 which is a neutral value for combining probabilities, + // in case the standalone-VAD is not enabled. + double p_combined[] = {0.5, 0.5, 0.5, 0.5}; + COMPILE_ASSERT(sizeof(p_combined) / sizeof(p_combined[0]) == kMaxNumFrames, + combined_probability_incorrect_size); + if (standalone_vad_enabled_) { + if (standalone_vad_->GetActivity(p_combined, kMaxNumFrames) < 0) + return -1; + } + // If any other VAD is enabled it must be combined before calling the + // pitch-based VAD. + if (pitch_based_vad_->VoicingProbability(features, p_combined) < 0) + return -1; + for (int n = 0; n < features.num_frames; n++) { + histogram_->Update(features.rms[n], p_combined[n]); + last_voice_probability_ = p_combined[n]; + } + } + return 0; +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + assert(false); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +void Agc::EnableStandaloneVad(bool enable) { + standalone_vad_enabled_ = enable; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc.h b/webrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000000..3c535d37e7 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class AgcAudioProc; +class Histogram; +class PitchBasedVad; +class Resampler; +class StandaloneVad; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // Returns the proportion of samples in the buffer which are at full-scale + // (and presumably clipped). + virtual float AnalyzePreproc(const int16_t* audio, int length); + // |audio| must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual int Process(const int16_t* audio, int length, int sample_rate_hz); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case |error| should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const { return target_level_dbfs_; } + + virtual void EnableStandaloneVad(bool enable); + virtual bool standalone_vad_enabled() const { + return standalone_vad_enabled_; + } + + virtual double voice_probability() const { return last_voice_probability_; } + + private: + double target_level_loudness_; + double last_voice_probability_; + int target_level_dbfs_; + bool standalone_vad_enabled_; + scoped_ptr histogram_; + scoped_ptr inactive_histogram_; + scoped_ptr audio_processing_; + scoped_ptr pitch_based_vad_; + scoped_ptr standalone_vad_; + scoped_ptr resampler_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc.cc b/webrtc/modules/audio_processing/agc/agc_audio_proc.cc new file mode 100644 index 0000000000..002b201f80 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_audio_proc.cc @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h" + +#include +#include + +#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h" +#include "webrtc/modules/audio_processing/agc/pitch_internal.h" +#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h" +extern "C" { +#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" +#include "webrtc/modules/audio_processing/utility/fft4g.h" +} +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" + +namespace webrtc { + +// The following structures are declared anonymous in iSAC's structs.h. To +// forward declare them, we use this derived class trick. +struct AgcAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; +struct AgcAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; + +static const float kFrequencyResolution = kSampleRateHz / + static_cast(AgcAudioProc::kDftSize); +static const int kSilenceRms = 5; + +// TODO(turajs): Make a Create or Init for AgcAudioProc. +AgcAudioProc::AgcAudioProc() + : audio_buffer_(), + num_buffer_samples_(kNumPastSignalSamples), + log_old_gain_(-2), + old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). + pitch_analysis_handle_(new PitchAnalysisStruct), + pre_filter_handle_(new PreFiltBankstr), + high_pass_filter_(PoleZeroFilter::Create( + kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) { + COMPILE_ASSERT(kNumPastSignalSamples + kNumSubframeSamples == + sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), + lpc_analysis_window_incorrect_size); + COMPILE_ASSERT(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), + correlation_weight_incorrect_size); + + // TODO(turajs): Are we doing too much in the constructor? + float data[kDftSize]; + // Make FFT to initialize. + ip_[0] = 0; + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + // TODO(turajs): Need to initialize high-pass filter. + + // Initialize iSAC components. + WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); + WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); +} + +AgcAudioProc::~AgcAudioProc() {} + +void AgcAudioProc::ResetBuffer() { + memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], + sizeof(audio_buffer_[0]) * kNumPastSignalSamples); + num_buffer_samples_ = kNumPastSignalSamples; +} + +int AgcAudioProc::ExtractFeatures(const int16_t* frame, + int length, + AudioFeatures* features) { + features->num_frames = 0; + if (length != kNumSubframeSamples) { + return -1; + } + + // High-pass filter to remove the DC component and very low frequency content. + // We have experienced that this high-pass filtering improves voice/non-voiced + // classification. + if (high_pass_filter_->Filter(frame, kNumSubframeSamples, + &audio_buffer_[num_buffer_samples_]) != 0) { + return -1; + } + + num_buffer_samples_ += kNumSubframeSamples; + if (num_buffer_samples_ < kBufferLength) { + return 0; + } + assert(num_buffer_samples_ == kBufferLength); + features->num_frames = kNum10msSubframes; + features->silence = false; + + Rms(features->rms, kMaxNumFrames); + for (int i = 0; i < kNum10msSubframes; ++i) { + if (features->rms[i] < kSilenceRms) { + // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. + // Bail out here instead. + features->silence = true; + ResetBuffer(); + return 0; + } + } + + PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, + kMaxNumFrames); + FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); + ResetBuffer(); + return 0; +} + +// Computes |kLpcOrder + 1| correlation coefficients. +void AgcAudioProc::SubframeCorrelation(double* corr, int length_corr, + int subframe_index) { + assert(length_corr >= kLpcOrder + 1); + double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; + int buffer_index = subframe_index * kNumSubframeSamples; + + for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) + windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; + + WebRtcIsac_AutoCorr(corr, windowed_audio, kNumSubframeSamples + + kNumPastSignalSamples, kLpcOrder); +} + +// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. +// The analysis window is 15 ms long and it is centered on the first half of +// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the +// first half of each 10 ms subframe. +void AgcAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) { + assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); + double corr[kLpcOrder + 1]; + double reflec_coeff[kLpcOrder]; + for (int i = 0, offset_lpc = 0; i < kNum10msSubframes; + i++, offset_lpc += kLpcOrder + 1) { + SubframeCorrelation(corr, kLpcOrder + 1, i); + corr[0] *= 1.0001; + // This makes Lev-Durb a bit more stable. + for (int k = 0; k < kLpcOrder + 1; k++) { + corr[k] *= kCorrWeight[k]; + } + WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); + } +} + +// Fit a second order curve to these 3 points and find the location of the +// extremum. The points are inverted before curve fitting. +static float QuadraticInterpolation(float prev_val, float curr_val, + float next_val) { + // Doing the interpolation in |1 / A(z)|^2. + float fractional_index = 0; + next_val = 1.0f / next_val; + prev_val = 1.0f / prev_val; + curr_val = 1.0f / curr_val; + + fractional_index = -(next_val - prev_val) * 0.5f / (next_val + prev_val - + 2.f * curr_val); + assert(fabs(fractional_index) < 1); + return fractional_index; +} + +// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope +// of the input signal. The local maximum of the spectral envelope corresponds +// with the local minimum of A(z). It saves complexity, as we save one +// inversion. Furthermore, we find the first local maximum of magnitude squared, +// to save on one square root. +void AgcAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) { + assert(length_f_peak >= kNum10msSubframes); + double lpc[kNum10msSubframes * (kLpcOrder + 1)]; + // For all sub-frames. + GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); + + const int kNumDftCoefficients = kDftSize / 2 + 1; + float data[kDftSize]; + + for (int i = 0; i < kNum10msSubframes; i++) { + // Convert to float with zero pad. + memset(data, 0, sizeof(data)); + for (int n = 0; n < kLpcOrder + 1; n++) { + data[n] = static_cast(lpc[i * (kLpcOrder + 1) + n]); + } + // Transform to frequency domain. + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + + int index_peak = 0; + float prev_magn_sqr = data[0] * data[0]; + float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; + float next_magn_sqr; + bool found_peak = false; + for (int n = 2; n < kNumDftCoefficients - 1; n++) { + next_magn_sqr = data[2 * n] * data[2 * n] + + data[2 * n + 1] * data[2 * n + 1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + found_peak = true; + index_peak = n - 1; + break; + } + prev_magn_sqr = curr_magn_sqr; + curr_magn_sqr = next_magn_sqr; + } + float fractional_index = 0; + if (!found_peak) { + // Checking if |kNumDftCoefficients - 1| is the local minimum. + next_magn_sqr = data[1] * data[1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + index_peak = kNumDftCoefficients - 1; + } + } else { + // A peak is found, do a simple quadratic interpolation to get a more + // accurate estimate of the peak location. + fractional_index = QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, + next_magn_sqr); + } + f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; + } +} + +// Using iSAC functions to estimate pitch gains & lags. +void AgcAudioProc::PitchAnalysis(double* log_pitch_gains, double* pitch_lags_hz, + int length) { + // TODO(turajs): This can be "imported" from iSAC & and the next two + // constants. + assert(length >= kNum10msSubframes); + const int kNumPitchSubframes = 4; + double gains[kNumPitchSubframes]; + double lags[kNumPitchSubframes]; + + const int kNumSubbandFrameSamples = 240; + const int kNumLookaheadSamples = 24; + + float lower[kNumSubbandFrameSamples]; + float upper[kNumSubbandFrameSamples]; + double lower_lookahead[kNumSubbandFrameSamples]; + double upper_lookahead[kNumSubbandFrameSamples]; + double lower_lookahead_pre_filter[kNumSubbandFrameSamples + + kNumLookaheadSamples]; + + // Split signal to lower and upper bands + WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], + lower, upper, lower_lookahead, upper_lookahead, + pre_filter_handle_.get()); + WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, + pitch_analysis_handle_.get(), lags, gains); + + // Lags are computed on lower-band signal with sampling rate half of the + // input signal. + GetSubframesPitchParameters(kSampleRateHz / 2, gains, lags, + kNumPitchSubframes, kNum10msSubframes, + &log_old_gain_, &old_lag_, + log_pitch_gains, pitch_lags_hz); +} + +void AgcAudioProc::Rms(double* rms, int length_rms) { + assert(length_rms >= kNum10msSubframes); + int offset = kNumPastSignalSamples; + for (int i = 0; i < kNum10msSubframes; i++) { + rms[i] = 0; + for (int n = 0; n < kNumSubframeSamples; n++, offset++) + rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; + rms[i] = sqrt(rms[i] / kNumSubframeSamples); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc.h b/webrtc/modules/audio_processing/agc/agc_audio_proc.h new file mode 100644 index 0000000000..aedc20b32d --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_audio_proc.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_ + +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class PoleZeroFilter; + +class AgcAudioProc { + public: + // Forward declare iSAC structs. + struct PitchAnalysisStruct; + struct PreFiltBankstr; + + AgcAudioProc(); + ~AgcAudioProc(); + + int ExtractFeatures(const int16_t* audio_frame, + int length, + AudioFeatures* audio_features); + + static const int kDftSize = 512; + + private: + void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, int length); + void SubframeCorrelation(double* corr, int lenght_corr, int subframe_index); + void GetLpcPolynomials(double* lpc, int length_lpc); + void FindFirstSpectralPeaks(double* f_peak, int length_f_peak); + void Rms(double* rms, int length_rms); + void ResetBuffer(); + + // To compute spectral peak we perform LPC analysis to get spectral envelope. + // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. + // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame + // we need 5 ms of past signal to create the input of LPC analysis. + static const int kNumPastSignalSamples = kSampleRateHz / 200; + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + static const int kNum10msSubframes = 3; + static const int kNumSubframeSamples = kSampleRateHz / 100; + static const int kNumSamplesToProcess = kNum10msSubframes * + kNumSubframeSamples; // Samples in 30 ms @ given sampling rate. + static const int kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess; + static const int kIpLength = kDftSize >> 1; + static const int kWLength = kDftSize >> 1; + + static const int kLpcOrder = 16; + + int ip_[kIpLength]; + float w_fft_[kWLength]; + + // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). + float audio_buffer_[kBufferLength]; + int num_buffer_samples_; + + double log_old_gain_; + double old_lag_; + + scoped_ptr pitch_analysis_handle_; + scoped_ptr pre_filter_handle_; + scoped_ptr high_pass_filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_ diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h b/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h new file mode 100644 index 0000000000..dc125ef050 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_ + +#include "webrtc/system_wrappers/interface/compile_assert.h" + +namespace webrtc { + +// These values should match MATLAB counterparts for unit-tests to pass. +static const double kCorrWeight[] = { + 1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, 0.913308, + 0.899609, 0.886115, 0.872823, 0.859730, 0.846834, 0.834132, 0.821620, + 0.809296, 0.797156, 0.785199 +}; + +static const double kLpcAnalWin[] = { + 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, + 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, + 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, + 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, + 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, + 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, + 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, + 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, + 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, + 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, + 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, + 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, + 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, + 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, + 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, + 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, + 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, + 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, + 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, + 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, + 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, + 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, + 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, + 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, + 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, + 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, + 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, + 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, + 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, + 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, + 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, + 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, + 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, + 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, + 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, + 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, + 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, + 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, + 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, + 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000 +}; + +static const int kFilterOrder = 2; +static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f, + 0.974827f}; +static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f, + 0.972457f}; + +COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffNumerator) / + sizeof(kCoeffNumerator[0]), numerator_coefficients_incorrect_size); +COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffDenominator) / + sizeof(kCoeffDenominator[0]), denominator_coefficients_incorrect_size); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AUDIO_PROCESSING_H_ diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc b/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc new file mode 100644 index 0000000000..9534aec2ec --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// We don't test the value of pitch gain and lags as they are created by iSAC +// routines. However, interpolation of pitch-gain and lags is in a separate +// class and has its own unit-test. + +#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h" + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) { + AgcAudioProc audioproc; + + std::string peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* peak_file = fopen(peak_file_name.c_str(), "rb"); + ASSERT_TRUE(peak_file != NULL); + + std::string pcm_file_name = + test::ResourcePath("audio_processing/agc/agc_audio", "pcm"); + FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb"); + ASSERT_TRUE(pcm_file != NULL); + + // Read 10 ms audio in each iteration. + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = { 0 }; + AudioFeatures features; + double sp[kMaxNumFrames]; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + audioproc.ExtractFeatures(data, kDataLength, &features); + if (features.num_frames > 0) { + ASSERT_LT(features.num_frames, kMaxNumFrames); + // Read reference values. + const size_t num_frames = features.num_frames; + ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file)); + for (int n = 0; n < features.num_frames; n++) + EXPECT_NEAR(features.spectral_peak[n], sp[n], 3); + } + } + + fclose(peak_file); + fclose(pcm_file); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000000..37248c1830 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include + +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include +#endif + +#include "webrtc/modules/audio_processing/agc/gain_map_internal.h" +#include "webrtc/modules/audio_processing/gain_control_impl.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" +#include "webrtc/system_wrappers/interface/logging.h" + +namespace webrtc { + +namespace { + +// Lowest the microphone level can be lowered due to clipping. +const int kClippedLevelMin = 170; +// Amount the microphone level is lowered with every clipping event. +const int kClippedLevelStep = 15; +// Proportion of clipped samples required to declare a clipping event. +const float kClippedRatioThreshold = 0.1f; +// Time in frames to wait after a clipping event before checking again. +const int kClippedWaitFrames = 300; + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +const int kLevelQuantizationSlack = 25; + +const int kDefaultCompressionGain = 7; +const int kMaxCompressionGain = 12; +const int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +const float kCompressionGainStep = 0.05f; + +const int kMaxMicLevel = 255; +COMPILE_ASSERT(kGainMapSize > kMaxMicLevel, gain_map_too_small); +const int kMinMicLevel = 12; +const int kMinInitMicLevel = 85; + +// Prevent very large microphone level changes. +const int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +const int kSurplusCompressionGain = 6; + +int LevelFromGainError(int gain_error, int level) { + assert(level >= 0 && level <= kMaxMicLevel); + if (gain_error == 0) { + return level; + } + // TODO(ajm): Could be made more efficient with a binary search. + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > kMinMicLevel) { + --new_level; + } + } + return new_level; +} + +} // namespace + +// Facility for dumping debug audio files. All methods are no-ops in the +// default case where WEBRTC_AGC_DEBUG_DUMP is undefined. +class DebugFile { +#ifdef WEBRTC_AGC_DEBUG_DUMP + public: + explicit DebugFile(const char* filename) + : file_(fopen(filename, "wb")) { + assert(file_); + } + ~DebugFile() { + fclose(file_); + } + void Write(const int16_t* data, int length_samples) { + fwrite(data, 1, length_samples * sizeof(int16_t), file_); + } + private: + FILE* file_; +#else + public: + explicit DebugFile(const char* filename) { + } + ~DebugFile() { + } + void Write(const int16_t* data, int length_samples) { + } +#endif // WEBRTC_AGC_DEBUG_DUMP +}; + +AgcManagerDirect::AgcManagerDirect(GainControl* gctrl, + VolumeCallbacks* volume_callbacks) + : agc_(new Agc()), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) { +} + +AgcManagerDirect::AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks) + : agc_(agc), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) { +} + +AgcManagerDirect::~AgcManagerDirect() {} + +int AgcManagerDirect::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = kDefaultCompressionGain; + compression_ = target_compression_; + compression_accumulator_ = compression_; + capture_muted_ = false; + check_volume_on_next_process_ = true; + // TODO(bjornv): Investigate if we need to reset |startup_| as well. For + // example, what happens when we change devices. + + if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) { + LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital); + return -1; + } + if (gctrl_->set_target_level_dbfs(2) != 0) { + LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2); + return -1; + } + if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) { + LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain); + return -1; + } + if (gctrl_->enable_limiter(true) != 0) { + LOG_FERR1(LS_ERROR, enable_limiter, true); + return -1; + } + return 0; +} + +void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, + int num_channels, + int samples_per_channel) { + int length = num_channels * samples_per_channel; + if (capture_muted_) { + return; + } + + file_preproc_->Write(audio, length); + + if (frames_since_clipped_ < kClippedWaitFrames) { + ++frames_since_clipped_; + return; + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = agc_->AnalyzePreproc(audio, length); + if (clipped_ratio > kClippedRatioThreshold) { + LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep)); + if (level_ > kClippedLevelMin) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep)); + // Reset the AGC since the level has changed. + agc_->Reset(); + } + frames_since_clipped_ = 0; + } +} + +void AgcManagerDirect::Process(const int16_t* audio, + int length, + int sample_rate_hz) { + if (capture_muted_) { + return; + } + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + if (agc_->Process(audio, length, sample_rate_hz) != 0) { + LOG_FERR0(LS_ERROR, Agc::Process); + assert(false); + } + + UpdateGain(); + UpdateCompressor(); + + file_postproc_->Write(audio, length); +} + +void AgcManagerDirect::SetLevel(int new_level) { + int voe_level = volume_callbacks_->GetMicVolume(); + if (voe_level < 0) { + return; + } + if (voe_level == 0) { + LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level > kMaxMicLevel) { + LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level; + return; + } + + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + << "stored level from " << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + volume_callbacks_->SetMicVolume(new_level); + LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", " + << "level_=" << level_ << ", " + << "new_level=" << new_level; + level_ = new_level; +} + +void AgcManagerDirect::SetMaxLevel(int level) { + assert(level >= kClippedLevelMin); + max_level_ = level; + // Scale the |kSurplusCompressionGain| linearly across the restricted + // level range. + max_compression_gain_ = kMaxCompressionGain + std::floor( + (1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) * + kSurplusCompressionGain + 0.5f); + LOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void AgcManagerDirect::SetCaptureMuted(bool muted) { + if (capture_muted_ == muted) { + return; + } + capture_muted_ = muted; + + if (!muted) { + // When we unmute, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +float AgcManagerDirect::voice_probability() { + return static_cast(agc_->voice_probability()); +} + +int AgcManagerDirect::CheckVolumeAndReset() { + int level = volume_callbacks_->GetMicVolume(); + if (level < 0) { + return -1; + } + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of |level| == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level > kMaxMicLevel) { + LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level; + return -1; + } + LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + int minLevel = startup_ ? kMinInitMicLevel : kMinMicLevel; + if (level < minLevel) { + level = minLevel; + LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + volume_callbacks_->SetMicVolume(level); + } + agc_->Reset(); + level_ = level; + startup_ = false; + return 0; +} + +// Requests the RMS error from AGC and distributes the required gain change +// between the digital compression stage and volume slider. We use the +// compressor first, providing a slack region around the current slider +// position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void AgcManagerDirect::UpdateGain() { + int rms_error = 0; + if (!agc_->GetRmsErrorDb(&rms_error)) { + // No error update ready. + return; + } + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = std::max(std::min(rms_error, max_compression_gain_), + kMinCompressionGain); + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = (raw_compression - target_compression_) / 2 + + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + int residual_gain = rms_error - raw_compression; + residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange), + kMaxResidualGainChange); + LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", " + << "target_compression=" << target_compression_ << ", " + << "residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + SetLevel(LevelFromGainError(residual_gain, level_)); +} + +void AgcManagerDirect::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + if (gctrl_->set_compression_gain_db(compression_) != 0) { + LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_); + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.h b/webrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000000..fac5f026b6 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include "webrtc/modules/audio_processing/agc/agc.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +class AudioFrame; +class DebugFile; +class GainControl; + +// Callbacks that need to be injected into AgcManagerDirect to read and control +// the volume values. They have different behavior if they are called from +// AgcManager or AudioProcessing. This is done to remove the VoiceEngine +// dependency in AgcManagerDirect. +class VolumeCallbacks { + public: + virtual ~VolumeCallbacks() {} + virtual void SetMicVolume(int volume) = 0; + virtual int GetMicVolume() = 0; +}; + +// Direct interface to use AGC to set volume and compression values. +// AudioProcessing uses this interface directly to integrate the callback-less +// AGC. AgcManager delegates most of its calls here. See agc_manager.h for +// undocumented methods. +// +// This class is not thread-safe. +class AgcManagerDirect { + public: + // AgcManagerDirect will configure GainControl internally. The user is + // responsible for processing the audio using it after the call to Process. + AgcManagerDirect(GainControl* gctrl, VolumeCallbacks* volume_callbacks); + // Dependency injection for testing. Don't delete |agc| as the memory is owned + // by the manager. + AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks); + ~AgcManagerDirect(); + + int Initialize(); + void AnalyzePreProcess(int16_t* audio, + int num_channels, + int samples_per_channel); + void Process(const int16_t* audio, int length, int sample_rate_hz); + + // Sets a new microphone level, after first checking that it hasn't been + // updated by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum level the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The level must be at least + // |kClippedLevelMin|. + void SetMaxLevel(int level); + + void SetCaptureMuted(bool muted); + bool capture_muted() { return capture_muted_; } + + float voice_probability(); + + private: + int CheckVolumeAndReset(); + void UpdateGain(); + void UpdateCompressor(); + + scoped_ptr agc_; + GainControl* gctrl_; + VolumeCallbacks* volume_callbacks_; + + int frames_since_clipped_; + int level_; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_muted_; + bool check_volume_on_next_process_; + bool startup_; + + scoped_ptr file_preproc_; + scoped_ptr file_postproc_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/webrtc/modules/audio_processing/agc/agc_unittest.cc b/webrtc/modules/audio_processing/agc/agc_unittest.cc new file mode 100644 index 0000000000..3d5e2d73bf --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_unittest.cc @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "webrtc/modules/audio_processing/agc/test/test_utils.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/test/testsupport/fileutils.h" + +using ::testing::_; +using ::testing::AllOf; +using ::testing::AtLeast; +using ::testing::Eq; +using ::testing::Gt; +using ::testing::InSequence; +using ::testing::Lt; +using ::testing::Mock; +using ::testing::SaveArg; + +namespace webrtc { +namespace { + +// The tested values depend on this assumed gain. +const int kMaxGain = 80; + +MATCHER_P(GtPointee, p, "") { return arg > *p; } +MATCHER_P(LtPointee, p, "") { return arg < *p; } + +class AgcChecker { + public: + MOCK_METHOD2(LevelChanged, void(int iterations, int level)); +}; + +class AgcTest : public ::testing::Test { + protected: + AgcTest() + : agc_(), + checker_(), + mic_level_(128) { + } + + // A gain of <= -100 will zero out the signal. + void RunAgc(int iterations, float gain_db) { + FILE* input_file = fopen( + test::ResourcePath("voice_engine/audio_long16", "pcm").c_str(), "rb"); + ASSERT_TRUE(input_file != NULL); + + AudioFrame frame; + frame.sample_rate_hz_ = 16000; + frame.num_channels_ = 1; + frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; + const size_t length = frame.samples_per_channel_ * frame.num_channels_; + + float gain = Db2Linear(gain_db); + if (gain_db <= -100) { + gain = 0; + } + + for (int i = 0; i < iterations; ++i) { + ASSERT_EQ(length, fread(frame.data_, sizeof(int16_t), length, + input_file)); + SimulateMic(kMaxGain, mic_level_, &frame); + ApplyGainLinear(gain, &frame); + ASSERT_GE(agc_.Process(frame), 0); + + int mic_level = agc_.MicLevel(); + if (mic_level != mic_level_) { + printf("mic_level=%d\n", mic_level); + checker_.LevelChanged(i, mic_level); + } + mic_level_ = mic_level; + } + fclose(input_file); + } + + Agc agc_; + AgcChecker checker_; + // Stores mic level between multiple runs of RunAgc in one test. + int mic_level_; +}; + +TEST_F(AgcTest, UpwardsChangeIsLimited) { + { + InSequence seq; + EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(179))).Times(1); + EXPECT_CALL(checker_, LevelChanged(_, Gt(179))).Times(AtLeast(1)); + } + RunAgc(1000, -40); +} + +TEST_F(AgcTest, DownwardsChangeIsLimited) { + { + InSequence seq; + EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(77))).Times(1); + EXPECT_CALL(checker_, LevelChanged(_, Lt(77))).Times(AtLeast(1)); + } + RunAgc(1000, 40); +} + +TEST_F(AgcTest, MovesUpToMaxAndDownToMin) { + int last_level = 128; + EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level))) + .Times(AtLeast(2)) + .WillRepeatedly(SaveArg<1>(&last_level)); + RunAgc(1000, -30); + EXPECT_EQ(255, last_level); + Mock::VerifyAndClearExpectations(&checker_); + + EXPECT_CALL(checker_, LevelChanged(_, LtPointee(&last_level))) + .Times(AtLeast(2)) + .WillRepeatedly(SaveArg<1>(&last_level)); + RunAgc(1000, 50); + EXPECT_EQ(1, last_level); +} + +TEST_F(AgcTest, HandlesZeroSignal) { + int last_level = 128; + // Doesn't respond to a zero signal. + EXPECT_CALL(checker_, LevelChanged(_, _)).Times(0); + RunAgc(1000, -100); + Mock::VerifyAndClearExpectations(&checker_); + + // Reacts as usual afterwards. + EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level))) + .Times(AtLeast(2)) + .WillRepeatedly(SaveArg<1>(&last_level)); + RunAgc(500, -20); +} + +TEST_F(AgcTest, ReachesSteadyState) { + int last_level = 128; + EXPECT_CALL(checker_, LevelChanged(_, _)) + .Times(AtLeast(2)) + .WillRepeatedly(SaveArg<1>(&last_level)); + RunAgc(1000, -20); + Mock::VerifyAndClearExpectations(&checker_); + + // If the level changes, it should be in a narrow band around the previous + // adaptation. + EXPECT_CALL(checker_, LevelChanged(_, + AllOf(Gt(last_level * 0.95), Lt(last_level * 1.05)))) + .Times(AtLeast(0)); + RunAgc(1000, -20); +} + +// TODO(ajm): Add this test; requires measuring the signal RMS. +TEST_F(AgcTest, AdaptsToCorrectRMS) { +} + +} // namespace +} // namespace webrtc + diff --git a/webrtc/modules/audio_processing/agc/circular_buffer.cc b/webrtc/modules/audio_processing/agc/circular_buffer.cc new file mode 100644 index 0000000000..8ecb76008f --- /dev/null +++ b/webrtc/modules/audio_processing/agc/circular_buffer.cc @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/circular_buffer.h" + +#include +#include + +namespace webrtc { + +AgcCircularBuffer::AgcCircularBuffer(int buffer_size) + : buffer_(new double[buffer_size]), + is_full_(false), + index_(0), + buffer_size_(buffer_size), + sum_(0) {} + +AgcCircularBuffer::~AgcCircularBuffer() {} + +void AgcCircularBuffer::Reset() { + is_full_ = false; + index_ = 0; + sum_ = 0; +} + +AgcCircularBuffer* AgcCircularBuffer::Create(int buffer_size) { + if (buffer_size <= 0) + return NULL; + return new AgcCircularBuffer(buffer_size); +} + +double AgcCircularBuffer::Oldest() const { + if (!is_full_) + return buffer_[0]; + else + return buffer_[index_]; +} + +double AgcCircularBuffer::Mean() { + double m; + if (is_full_) { + m = sum_ / buffer_size_; + } else { + if (index_ > 0) + m = sum_ / index_; + else + m = 0; + } + return m; +} + +void AgcCircularBuffer::Insert(double value) { + if (is_full_) { + sum_ -= buffer_[index_]; + } + sum_ += value; + buffer_[index_] = value; + index_++; + if (index_ >= buffer_size_) { + is_full_ = true; + index_ = 0; + } +} +int AgcCircularBuffer::BufferLevel() { + if (is_full_) + return buffer_size_; + return index_; +} + +int AgcCircularBuffer::Get(int index, double* value) const { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + *value = buffer_[index]; + return 0; +} + +int AgcCircularBuffer::Set(int index, double value) { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + + sum_ -= buffer_[index]; + buffer_[index] = value; + sum_ += value; + return 0; +} + +int AgcCircularBuffer::ConvertToLinearIndex(int* index) const { + if (*index < 0 || *index >= buffer_size_) + return -1; + + if (!is_full_ && *index >= index_) + return -1; + + *index = index_ - 1 - *index; + if (*index < 0) + *index += buffer_size_; + return 0; +} + +int AgcCircularBuffer::RemoveTransient(int width_threshold, + double val_threshold) { + if (!is_full_ && index_ < width_threshold + 2) + return 0; + + int index_1 = 0; + int index_2 = width_threshold + 1; + double v = 0; + if (Get(index_1, &v) < 0) + return -1; + if (v < val_threshold) { + Set(index_1, 0); + int index; + for (index = index_2; index > index_1; index--) { + if (Get(index, &v) < 0) + return -1; + if (v < val_threshold) + break; + } + for (; index > index_1; index--) { + if (Set(index, 0.0) < 0) + return -1; + } + } + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/circular_buffer.h b/webrtc/modules/audio_processing/agc/circular_buffer.h new file mode 100644 index 0000000000..98baa1304a --- /dev/null +++ b/webrtc/modules/audio_processing/agc/circular_buffer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_ + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +// A circular buffer tailored to the need of this project. It stores last +// K samples of the input, and keeps track of the mean of the last samples. +// +// It is used in class "PitchBasedActivity" to keep track of posterior +// probabilities in the past few seconds. The posterior probabilities are used +// to recursively update prior probabilities. +class AgcCircularBuffer { + public: + static AgcCircularBuffer* Create(int buffer_size); + ~AgcCircularBuffer(); + + // If buffer is wrapped around. + bool is_full() const { return is_full_; } + // Get the oldest entry in the buffer. + double Oldest() const; + // Insert new value into the buffer. + void Insert(double value); + // Reset buffer, forget the past, start fresh. + void Reset(); + + // The mean value of the elements in the buffer. The return value is zero if + // buffer is empty, i.e. no value is inserted. + double Mean(); + // Remove transients. If the values exceed |val_threshold| for a period + // shorter then or equal to |width_threshold|, then that period is considered + // transient and set to zero. + int RemoveTransient(int width_threshold, double val_threshold); + + private: + explicit AgcCircularBuffer(int buffer_size); + // Get previous values. |index = 0| corresponds to the most recent + // insertion. |index = 1| is the one before the most recent insertion, and + // so on. + int Get(int index, double* value) const; + // Set a given position to |value|. |index| is interpreted as above. + int Set(int index, double value); + // Return the number of valid elements in the buffer. + int BufferLevel(); + + // Convert an index with the interpretation as get() method to the + // corresponding linear index. + int ConvertToLinearIndex(int* index) const; + + scoped_ptr buffer_; + bool is_full_; + int index_; + int buffer_size_; + double sum_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc b/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc new file mode 100644 index 0000000000..6bab2e52be --- /dev/null +++ b/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/circular_buffer.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +static const int kWidthThreshold = 7; +static const double kValThreshold = 1.0; +static const int kLongBuffSize = 100; +static const int kShortBuffSize = 10; + +static void InsertSequentially(int k, AgcCircularBuffer* circular_buffer) { + double mean_val; + for (int n = 1; n <= k; n++) { + EXPECT_TRUE(!circular_buffer->is_full()); + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_EQ((n + 1.0) / 2., mean_val); + } +} + +static void Insert(double value, int num_insertion, + AgcCircularBuffer* circular_buffer) { + for (int n = 0; n < num_insertion; n++) + circular_buffer->Insert(value); +} + +static void InsertZeros(int num_zeros, AgcCircularBuffer* circular_buffer) { + Insert(0.0, num_zeros, circular_buffer); +} + +TEST(AgcCircularBufferTest, GeneralTest) { + scoped_ptr circular_buffer( + AgcCircularBuffer::Create(kShortBuffSize)); + double mean_val; + + // Mean should return zero if nothing is inserted. + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0.0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + + // Should be full. + EXPECT_TRUE(circular_buffer->is_full()); + // Correct update after being full. + for (int n = 1; n < kShortBuffSize; n++) { + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val); + EXPECT_TRUE(circular_buffer->is_full()); + } + + // Check reset. This should be like starting fresh. + circular_buffer->Reset(); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + EXPECT_TRUE(circular_buffer->is_full()); +} + +TEST(AgcCircularBufferTest, TransientsRemoval) { + scoped_ptr circular_buffer( + AgcCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get()); + + double push_val = kValThreshold; + double mean_val; + for (int k = kWidthThreshold; k >= 1; k--) { + Insert(push_val, k, circular_buffer.get()); + circular_buffer->Insert(0); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val); + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + } +} + +TEST(AgcCircularBufferTest, TransientDetection) { + scoped_ptr circular_buffer( + AgcCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + int num_insertion = kLongBuffSize - kWidthThreshold / 2; + InsertZeros(num_insertion, circular_buffer.get()); + + double push_val = 2; + // This is longer than a transient and shouldn't be removed. + int num_non_zero_elements = kWidthThreshold + 1; + Insert(push_val, num_non_zero_elements, circular_buffer.get()); + + double mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + circular_buffer->Insert(0); + EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold, + kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // A transient right after a non-transient, should be removed and mean is + // not changed. + num_insertion = 3; + Insert(push_val, num_insertion, circular_buffer.get()); + circular_buffer->Insert(0); + EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold, + kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // Last input is larger than threshold, although the sequence is short but + // it shouldn't be considered transient. + Insert(push_val, num_insertion, circular_buffer.get()); + num_non_zero_elements += num_insertion; + EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold, + kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/common.h b/webrtc/modules/audio_processing/agc/common.h new file mode 100644 index 0000000000..e9ed1edadd --- /dev/null +++ b/webrtc/modules/audio_processing/agc/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_ + +static const int kSampleRateHz = 16000; +static const int kLength10Ms = kSampleRateHz / 100; +static const int kMaxNumFrames = 4; + +struct AudioFeatures { + double log_pitch_gain[kMaxNumFrames]; + double pitch_lag_hz[kMaxNumFrames]; + double spectral_peak[kMaxNumFrames]; + double rms[kMaxNumFrames]; + int num_frames; + bool silence; +}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_ diff --git a/webrtc/modules/audio_processing/agc/gain_map_internal.h b/webrtc/modules/audio_processing/agc/gain_map_internal.h new file mode 100644 index 0000000000..53c71c1148 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/gain_map_internal.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ + +static const int kGainMapSize = 256; +// Uses parameters: si = 2, sf = 0.25, D = 8/256 +static const int kGainMap[kGainMapSize] = { + -56, + -54, + -52, + -50, + -48, + -47, + -45, + -43, + -42, + -40, + -38, + -37, + -35, + -34, + -33, + -31, + -30, + -29, + -27, + -26, + -25, + -24, + -23, + -22, + -20, + -19, + -18, + -17, + -16, + -15, + -14, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -8, + -7, + -6, + -5, + -5, + -4, + -3, + -2, + -2, + -1, + 0, + 0, + 1, + 1, + 2, + 3, + 3, + 4, + 4, + 5, + 5, + 6, + 6, + 7, + 7, + 8, + 8, + 9, + 9, + 10, + 10, + 11, + 11, + 12, + 12, + 13, + 13, + 13, + 14, + 14, + 15, + 15, + 15, + 16, + 16, + 17, + 17, + 17, + 18, + 18, + 18, + 19, + 19, + 19, + 20, + 20, + 21, + 21, + 21, + 22, + 22, + 22, + 23, + 23, + 23, + 24, + 24, + 24, + 24, + 25, + 25, + 25, + 26, + 26, + 26, + 27, + 27, + 27, + 28, + 28, + 28, + 28, + 29, + 29, + 29, + 30, + 30, + 30, + 30, + 31, + 31, + 31, + 32, + 32, + 32, + 32, + 33, + 33, + 33, + 33, + 34, + 34, + 34, + 35, + 35, + 35, + 35, + 36, + 36, + 36, + 36, + 37, + 37, + 37, + 38, + 38, + 38, + 38, + 39, + 39, + 39, + 39, + 40, + 40, + 40, + 40, + 41, + 41, + 41, + 41, + 42, + 42, + 42, + 42, + 43, + 43, + 43, + 44, + 44, + 44, + 44, + 45, + 45, + 45, + 45, + 46, + 46, + 46, + 46, + 47, + 47, + 47, + 47, + 48, + 48, + 48, + 48, + 49, + 49, + 49, + 49, + 50, + 50, + 50, + 50, + 51, + 51, + 51, + 51, + 52, + 52, + 52, + 52, + 53, + 53, + 53, + 53, + 54, + 54, + 54, + 54, + 55, + 55, + 55, + 55, + 56, + 56, + 56, + 56, + 57, + 57, + 57, + 57, + 58, + 58, + 58, + 58, + 59, + 59, + 59, + 59, + 60, + 60, + 60, + 60, + 61, + 61, + 61, + 61, + 62, + 62, + 62, + 62, + 63, + 63, + 63, + 63, + 64 +}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/agc/gmm.cc b/webrtc/modules/audio_processing/agc/gmm.cc new file mode 100644 index 0000000000..9ad8ef95ae --- /dev/null +++ b/webrtc/modules/audio_processing/agc/gmm.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/gmm.h" + +#include +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const int kMaxDimension = 10; + +static void RemoveMean(const double* in, const double* mean_vec, + int dimension, double* out) { + for (int n = 0; n < dimension; ++n) + out[n] = in[n] - mean_vec[n]; +} + +static double ComputeExponent(const double* in, const double* covar_inv, + int dimension) { + double q = 0; + for (int i = 0; i < dimension; ++i) { + double v = 0; + for (int j = 0; j < dimension; j++) + v += (*covar_inv++) * in[j]; + q += v * in[i]; + } + q *= -0.5; + return q; +} + +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { + if (gmm_parameters.dimension > kMaxDimension) { + return -1; // This is invalid pdf so the caller can check this. + } + double f = 0; + double v[kMaxDimension]; + const double* mean_vec = gmm_parameters.mean; + const double* covar_inv = gmm_parameters.covar_inverse; + + for (int n = 0; n < gmm_parameters.num_mixtures; n++) { + RemoveMean(x, mean_vec, gmm_parameters.dimension, v); + double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + + gmm_parameters.weight[n]; + f += exp(q); + mean_vec += gmm_parameters.dimension; + covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; + } + return f; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/gmm.h b/webrtc/modules/audio_processing/agc/gmm.h new file mode 100644 index 0000000000..90ce95d4dd --- /dev/null +++ b/webrtc/modules/audio_processing/agc/gmm.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_ + +namespace webrtc { + +// A structure that specifies a GMM. +// A GMM is formulated as +// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + +// w[num_mixtures - 1] * mixture[num_mixtures - 1]; +// Where a 'mixture' is a Gaussian density. + +struct GmmParameters { + // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n])); + // where cov[n] is the covariance matrix of mixture n; + const double* weight; + // pointer to the first element of a |num_mixtures|x|dimension| matrix + // where kth row is the mean of the kth mixture. + const double* mean; + // pointer to the first element of a |num_mixtures|x|dimension|x|dimension| + // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance + // matrix of the kth mixture. + const double* covar_inverse; + // Dimensionality of the mixtures. + int dimension; + // number of the mixtures. + int num_mixtures; +}; + +// Evaluate the given GMM, according to |gmm_parameters|, at the given point +// |x|. If the dimensionality of the given GMM is larger that the maximum +// acceptable dimension by the following function -1 is returned. +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_ diff --git a/webrtc/modules/audio_processing/agc/gmm_unittest.cc b/webrtc/modules/audio_processing/agc/gmm_unittest.cc new file mode 100644 index 0000000000..4ca658d732 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/gmm_unittest.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/gmm.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h" +#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h" + +namespace webrtc { + +TEST(GmmTest, EvaluateGmm) { + GmmParameters noise_gmm; + GmmParameters voice_gmm; + + // Setup noise GMM. + noise_gmm.dimension = kNoiseGmmDim; + noise_gmm.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm.weight = kNoiseGmmWeights; + noise_gmm.mean = &kNoiseGmmMean[0][0]; + noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm.dimension = kVoiceGmmDim; + voice_gmm.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm.weight = kVoiceGmmWeights; + voice_gmm.mean = &kVoiceGmmMean[0][0]; + voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; + + // Test vectors. These are the mean of the GMM means. + const double kXVoice[kVoiceGmmDim] = { + -1.35893162459863, 602.862491970368, 178.022069191324}; + const double kXNoise[kNoiseGmmDim] = { + -2.33443722724409, 2827.97828765184, 141.114178166812}; + + // Expected pdf values. These values are computed in MATLAB using EvalGmm.m + const double kPdfNoise = 1.88904409403101e-07; + const double kPdfVoice = 1.30453996982266e-06; + + // Relative error should be smaller that the following value. + const double kAcceptedRelativeErr = 1e-10; + + // Test Voice. + double pdf = EvaluateGmm(kXVoice, voice_gmm); + EXPECT_GT(pdf, 0); + double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice; + EXPECT_LE(relative_error, kAcceptedRelativeErr); + + // Test Noise. + pdf = EvaluateGmm(kXNoise, noise_gmm); + EXPECT_GT(pdf, 0); + relative_error = fabs(pdf - kPdfNoise) / kPdfNoise; + EXPECT_LE(relative_error, kAcceptedRelativeErr); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/histogram.cc b/webrtc/modules/audio_processing/agc/histogram.cc new file mode 100644 index 0000000000..ab18c6580a --- /dev/null +++ b/webrtc/modules/audio_processing/agc/histogram.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/histogram.h" + +#include +#include + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = static_cast( + kLowProbabilityThreshold * kProbQDomain); + +Histogram::Histogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + COMPILE_ASSERT(kHistSize == sizeof(kHistBinCenters) / + sizeof(kHistBinCenters[0]), histogram_bin_centers_incorrect_size); +} + +Histogram::Histogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +Histogram::~Histogram() {} + +void Histogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = static_cast(floor(activity_probaility * + kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void Histogram::RemoveOldestEntryAndUpdate() { + assert(len_circular_buffer_ > 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void Histogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // |kTransientWidthThreshold| or there has not been any transient. + assert(len_high_activity_ <= kTransientWidthThreshold); + int index = (buffer_index_ > 0) ? (buffer_index_ - 1) : + len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void Histogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double Histogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +Histogram* Histogram::Create() { + return new Histogram; +} + +Histogram* Histogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new Histogram(window_size); +} + +void Histogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int Histogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast(floor((rms_log - kLogDomainMinBinCenter) * + kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double Histogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/histogram.h b/webrtc/modules/audio_processing/agc/histogram.h new file mode 100644 index 0000000000..8f5c518e8a --- /dev/null +++ b/webrtc/modules/audio_processing/agc/histogram.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ + +#include + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class Histogram { + public: + // Create a non-sliding Histogram. + static Histogram* Create(); + + // Create a sliding Histogram, i.e. the histogram represents the last + // |window_size| samples. + static Histogram* Create(int window_size); + ~Histogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + Histogram(); + explicit Histogram(int window); + + // Find the histogram bin associated with the given |rms|. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // |bin_count_q10_|. + int64_t audio_content_q10_; + + // Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),' + // we increment the associated histogram-bin with the given probability. The + // increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + scoped_ptr activity_probability_; + // Circular buffer for histogram-indices of probabilities. + scoped_ptr hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ diff --git a/webrtc/modules/audio_processing/agc/histogram_unittest.cc b/webrtc/modules/audio_processing/agc/histogram_unittest.cc new file mode 100644 index 0000000000..0ae7591d4f --- /dev/null +++ b/webrtc/modules/audio_processing/agc/histogram_unittest.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Use CreateHistUnittestFile.m to generate the input file. + +#include "webrtc/modules/audio_processing/agc/histogram.h" + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/modules/audio_processing/agc/utility.h" + +namespace webrtc { + +struct InputOutput { + double rms; + double activity_probability; + double audio_content; + double loudness; +}; + +const double kRelativeErrTol = 1e-10; + +class HistogramTest : public ::testing::Test { + protected: + void RunTest(bool enable_circular_buff, + const char* filename); + + private: + void TestClean(); + scoped_ptr hist_; +}; + +void HistogramTest::TestClean() { + EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02); + EXPECT_EQ(hist_->AudioContent(), 0); + EXPECT_EQ(hist_->num_updates(), 0); +} + +void HistogramTest::RunTest(bool enable_circular_buff, const char* filename) { + FILE* in_file = fopen(filename, "rb"); + ASSERT_TRUE(in_file != NULL); + if (enable_circular_buff) { + int buffer_size; + EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u); + hist_.reset(Histogram::Create(buffer_size)); + } else { + hist_.reset(Histogram::Create()); + } + TestClean(); + + InputOutput io; + int num_updates = 0; + int num_reset = 0; + while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) { + if (io.rms < 0) { + // We have to reset. + hist_->Reset(); + TestClean(); + num_updates = 0; + num_reset++; + // Read the next chunk of input. + if (fread(&io, sizeof(InputOutput), 1, in_file) != 1) + break; + } + hist_->Update(io.rms, io.activity_probability); + num_updates++; + EXPECT_EQ(hist_->num_updates(), num_updates); + double audio_content = hist_->AudioContent(); + + double abs_err = std::min(audio_content, io.audio_content) * + kRelativeErrTol; + + ASSERT_NEAR(audio_content, io.audio_content, abs_err); + double current_loudness = Linear2Loudness(hist_->CurrentRms()); + abs_err = std::min(fabs(current_loudness), fabs(io.loudness)) * + kRelativeErrTol; + ASSERT_NEAR(current_loudness, io.loudness, abs_err); + } + fclose(in_file); +} + +TEST_F(HistogramTest, ActiveCircularBuffer) { + RunTest(true, + test::ResourcePath("audio_processing/agc/agc_with_circular_buffer", + "dat").c_str()); +} + +TEST_F(HistogramTest, InactiveCircularBuffer) { + RunTest(false, + test::ResourcePath("audio_processing/agc/agc_no_circular_buffer", + "dat").c_str()); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/mock_agc.h b/webrtc/modules/audio_processing/agc/mock_agc.h new file mode 100644 index 0000000000..1c36a055ec --- /dev/null +++ b/webrtc/modules/audio_processing/agc/mock_agc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ + +#include "webrtc/modules/audio_processing/agc/agc.h" + +#include "gmock/gmock.h" +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +class MockAgc : public Agc { + public: + MOCK_METHOD2(AnalyzePreproc, float(const int16_t* audio, int length)); + MOCK_METHOD3(Process, int(const int16_t* audio, int length, + int sample_rate_hz)); + MOCK_METHOD1(GetRmsErrorDb, bool(int* error)); + MOCK_METHOD0(Reset, void()); + MOCK_METHOD1(set_target_level_dbfs, int(int level)); + MOCK_CONST_METHOD0(target_level_dbfs, int()); + MOCK_METHOD1(EnableStandaloneVad, void(bool enable)); + MOCK_CONST_METHOD0(standalone_vad_enabled, bool()); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/noise_gmm_tables.h b/webrtc/modules/audio_processing/agc/noise_gmm_tables.h new file mode 100644 index 0000000000..779fd8c368 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/noise_gmm_tables.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for inactive segments. Generated by MakeGmmTables.m. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_ + +static const int kNoiseGmmNumMixtures = 12; +static const int kNoiseGmmDim = 3; + +static const double kNoiseGmmCovarInverse[kNoiseGmmNumMixtures] + [kNoiseGmmDim][kNoiseGmmDim] = { + {{ 7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, + { 4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, + { 1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, + {{ 8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, + {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, + { 5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, + {{ 4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, + {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, + {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, + {{ 9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, + {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, + {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, + {{ 7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, + {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, + { 2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, + {{ 8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, + {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, + {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, + {{ 9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, + { 5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, + {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, + {{ 8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, + { 5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, + { 6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, + {{ 6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, + {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, + { 5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, + {{ 6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, + { 4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, + {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, + {{ 1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, + {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, + {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, + {{ 4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, + {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, + { 5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; + +static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { + {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, + {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, + {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, + {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, + {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, + {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, + {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, + {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, + {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, + {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, + {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, + {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; + +static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { + -1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01, + -1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01, + -1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01, + -1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01}; +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_ diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc new file mode 100644 index 0000000000..675a1c8cb1 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h" + +#include +#include +#include + +#include "webrtc/modules/audio_processing/agc/circular_buffer.h" +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h" +#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" + +namespace webrtc { + +COMPILE_ASSERT(kNoiseGmmDim == kVoiceGmmDim, + noise_and_voice_gmm_dimension_not_equal); + +// These values should match MATLAB counterparts for unit-tests to pass. +static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. +static const double kInitialPriorProbability = 0.3; +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static double LimitProbability(double p) { + const double kLimHigh = 0.99; + const double kLimLow = 0.01; + + if (p > kLimHigh) + p = kLimHigh; + else if (p < kLimLow) + p = kLimLow; + return p; +} + +PitchBasedVad::PitchBasedVad() + : p_prior_(kInitialPriorProbability), + circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) { + // Setup noise GMM. + noise_gmm_.dimension = kNoiseGmmDim; + noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm_.weight = kNoiseGmmWeights; + noise_gmm_.mean = &kNoiseGmmMean[0][0]; + noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm_.dimension = kVoiceGmmDim; + voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm_.weight = kVoiceGmmWeights; + voice_gmm_.mean = &kVoiceGmmMean[0][0]; + voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; +} + +PitchBasedVad::~PitchBasedVad() {} + +int PitchBasedVad::VoicingProbability(const AudioFeatures& features, + double* p_combined) { + double p; + double gmm_features[3]; + double pdf_features_given_voice; + double pdf_features_given_noise; + // These limits are the same in matlab implementation 'VoicingProbGMM().' + const double kLimLowLogPitchGain = -2.0; + const double kLimHighLogPitchGain = -0.9; + const double kLimLowSpectralPeak = 200; + const double kLimHighSpectralPeak = 2000; + const double kEps = 1e-12; + for (int n = 0; n < features.num_frames; n++) { + gmm_features[0] = features.log_pitch_gain[n]; + gmm_features[1] = features.spectral_peak[n]; + gmm_features[2] = features.pitch_lag_hz[n]; + + pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); + pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); + + if (features.spectral_peak[n] < kLimLowSpectralPeak || + features.spectral_peak[n] > kLimHighSpectralPeak || + features.log_pitch_gain[n] < kLimLowLogPitchGain) { + pdf_features_given_voice = kEps * pdf_features_given_noise; + } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { + pdf_features_given_noise = kEps * pdf_features_given_voice; + } + + p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice * + p_prior_ + pdf_features_given_noise * (1 - p_prior_)); + + p = LimitProbability(p); + + // Combine pitch-based probability with standalone probability, before + // updating prior probabilities. + double prod_active = p * p_combined[n]; + double prod_inactive = (1 - p) * (1 - p_combined[n]); + p_combined[n] = prod_active / (prod_active + prod_inactive); + + if (UpdatePrior(p_combined[n]) < 0) + return -1; + // Limit prior probability. With a zero prior probability the posterior + // probability is always zero. + p_prior_ = LimitProbability(p_prior_); + } + return 0; +} + +int PitchBasedVad::UpdatePrior(double p) { + circular_buffer_->Insert(p); + if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, + kLowProbabilityThreshold) < 0) + return -1; + p_prior_ = circular_buffer_->Mean(); + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.h b/webrtc/modules/audio_processing/agc/pitch_based_vad.h new file mode 100644 index 0000000000..41183a5dec --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_based_vad.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_ + +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/modules/audio_processing/agc/gmm.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class AgcCircularBuffer; + +// Computes the probability of the input audio frame to be active given +// the corresponding pitch-gain and lag of the frame. +class PitchBasedVad { + public: + PitchBasedVad(); + ~PitchBasedVad(); + + // Compute pitch-based voicing probability, given the features. + // features: a structure containing features required for computing voicing + // probabilities. + // + // p_combined: an array which contains the combined activity probabilities + // computed prior to the call of this function. The method, + // then, computes the voicing probabilities and combine them + // with the given values. The result are returned in |p|. + int VoicingProbability(const AudioFeatures& features, double* p_combined); + private: + int UpdatePrior(double p); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + GmmParameters noise_gmm_; + GmmParameters voice_gmm_; + + double p_prior_; + + scoped_ptr circular_buffer_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_ diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc new file mode 100644 index 0000000000..3ec0baac95 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(PitchBasedVadTest, VoicingProbabilityTest) { + std::string spectral_peak_file_name = test::ResourcePath( + "audio_processing/agc/agc_spectral_peak", "dat"); + FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb"); + ASSERT_TRUE(spectral_peak_file != NULL); + + std::string pitch_gain_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat"); + FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_gain_file != NULL); + + std::string pitch_lag_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat"); + FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_lag_file != NULL); + + std::string voicing_prob_file_name = + test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat"); + FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb"); + ASSERT_TRUE(voicing_prob_file != NULL); + + PitchBasedVad vad_; + + double reference_activity_probability; + + AudioFeatures audio_features; + memset(&audio_features, 0, sizeof(audio_features)); + audio_features.num_frames = 1; + while (fread(audio_features.spectral_peak, + sizeof(audio_features.spectral_peak[0]), 1, + spectral_peak_file) == 1u) { + double p; + ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, sizeof( + audio_features.log_pitch_gain[0]), 1, pitch_gain_file)); + ASSERT_EQ(1u, fread(audio_features.pitch_lag_hz, sizeof( + audio_features.pitch_lag_hz[0]), 1, pitch_lag_file)); + ASSERT_EQ(1u, fread(&reference_activity_probability, sizeof( + reference_activity_probability), 1, voicing_prob_file)); + + p = 0.5; // Initialize to the neutral value for combining probabilities. + EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p)); + EXPECT_NEAR(p, reference_activity_probability, 0.01); + } + + fclose(spectral_peak_file); + fclose(pitch_gain_file); + fclose(pitch_lag_file); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/pitch_internal.cc b/webrtc/modules/audio_processing/agc/pitch_internal.cc new file mode 100644 index 0000000000..b394074bd3 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_internal.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pitch_internal.h" + +#include + +// A 4-to-3 linear interpolation. +// The interpolation constants are derived as following: +// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval +// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is +// like interpolating 4-to-6 and keep the odd samples. +// The reason behind this is that LPC coefficients are computed for the first +// half of each 10ms interval. +static void PitchInterpolation(double old_val, const double* in, double* out) { + out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; + out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; + out[2] = 0.5 * in[2] + 0.5 * in[3]; +} + + +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz) { + // Gain interpolation is in log-domain, also returned in log-domain. + for (int n = 0; n < num_in_frames; n++) + gains[n] = log(gains[n] + 1e-12); + + // Interpolate lags and gains. + PitchInterpolation(*log_old_gain, gains, log_pitch_gain); + *log_old_gain = gains[num_in_frames - 1]; + PitchInterpolation(*old_lag, lags, pitch_lag_hz); + *old_lag = lags[num_in_frames - 1]; + + // Convert pitch-lags to Hertz. + for (int n = 0; n < num_out_frames; n++) { + pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); + } +} diff --git a/webrtc/modules/audio_processing/agc/pitch_internal.h b/webrtc/modules/audio_processing/agc/pitch_internal.h new file mode 100644 index 0000000000..ed73760e3a --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_internal.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_ + +// TODO(turajs): Write a description of this function. Also be consistent with +// usage of |sampling_rate_hz| vs |kSamplingFreqHz|. +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc b/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc new file mode 100644 index 0000000000..8998f9014b --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pitch_internal.h" + +#include + +#include "gtest/gtest.h" + +TEST(PitchInternalTest, test) { + const int kSamplingRateHz = 8000; + const int kNumInputParameters = 4; + const int kNumOutputParameters = 3; + // Inputs + double log_old_gain = log(0.5); + double gains[] = {0.6, 0.2, 0.5, 0.4}; + + double old_lag = 70; + double lags[] = {90, 111, 122, 50}; + + // Expected outputs + double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507, + -0.80471895621705}; + double expected_log_old_gain = log(gains[kNumInputParameters - 1]); + + double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121, + 93.0232558139535}; + double expected_old_lag = lags[kNumInputParameters - 1]; + + double log_pitch_gain[kNumOutputParameters]; + double pitch_lag_hz[kNumInputParameters]; + + GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters, + kNumOutputParameters, &log_old_gain, &old_lag, + log_pitch_gain, pitch_lag_hz); + + for (int n = 0; n < 3; n++) { + EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6); + EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8); + } + EXPECT_NEAR(old_lag, expected_old_lag, 1e-6); + EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8); +} diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter.cc b/webrtc/modules/audio_processing/agc/pole_zero_filter.cc new file mode 100644 index 0000000000..3c41e33dd6 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pole_zero_filter.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h" + +#include +#include +#include + +namespace webrtc { + +PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, + int order_numerator, + const float* denominator_coefficients, + int order_denominator) { + if (order_numerator < 0 || + order_denominator < 0 || + order_numerator > kMaxFilterOrder || + order_denominator > kMaxFilterOrder || + denominator_coefficients[0] == 0 || + numerator_coefficients == NULL || + denominator_coefficients == NULL) + return NULL; + return new PoleZeroFilter(numerator_coefficients, order_numerator, + denominator_coefficients, order_denominator); +} + +PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, + int order_numerator, + const float* denominator_coefficients, + int order_denominator) + : past_input_(), + past_output_(), + numerator_coefficients_(), + denominator_coefficients_(), + order_numerator_(order_numerator), + order_denominator_(order_denominator), + highest_order_(std::max(order_denominator, order_numerator)) { + memcpy(numerator_coefficients_, numerator_coefficients, + sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); + memcpy(denominator_coefficients_, denominator_coefficients, + sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); + + if (denominator_coefficients_[0] != 1) { + for (int n = 0; n <= order_numerator_; n++) + numerator_coefficients_[n] /= denominator_coefficients_[0]; + for (int n = 0; n <= order_denominator_; n++) + denominator_coefficients_[n] /= denominator_coefficients_[0]; + } +} + +template +static float FilterArPast(const T* past, int order, + const float* coefficients) { + float sum = 0.0f; + int past_index = order - 1; + for (int k = 1; k <= order; k++, past_index--) + sum += coefficients[k] * past[past_index]; + return sum; +} + +int PoleZeroFilter::Filter(const int16_t* in, + int num_input_samples, + float* output) { + if (in == NULL || num_input_samples < 0 || output == NULL) + return -1; + // This is the typical case, just a memcpy. + const int k = std::min(num_input_samples, highest_order_); + int n; + for (n = 0; n < k; n++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&past_input_[n], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&past_output_[n], order_denominator_, + denominator_coefficients_); + + past_input_[n + order_numerator_] = in[n]; + past_output_[n + order_denominator_] = output[n]; + } + if (highest_order_ < num_input_samples) { + for (int m = 0; n < num_input_samples; n++, m++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&in[m], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&output[m], order_denominator_, + denominator_coefficients_); + } + // Record into the past signal. + memcpy(past_input_, &in[num_input_samples - order_numerator_], + sizeof(in[0]) * order_numerator_); + memcpy(past_output_, &output[num_input_samples - order_denominator_], + sizeof(output[0]) * order_denominator_); + } else { + // Odd case that the length of the input is shorter that filter order. + memmove(past_input_, &past_input_[num_input_samples], order_numerator_ * + sizeof(past_input_[0])); + memmove(past_output_, &past_output_[num_input_samples], order_denominator_ * + sizeof(past_output_[0])); + } + return 0; +} + +} // namespace webrtc + diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter.h b/webrtc/modules/audio_processing/agc/pole_zero_filter.h new file mode 100644 index 0000000000..c9d96fdd42 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pole_zero_filter.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_ + +#include "webrtc/typedefs.h" + +namespace webrtc { + +class PoleZeroFilter { + public: + ~PoleZeroFilter() {} + + static PoleZeroFilter* Create(const float* numerator_coefficients, + int order_numerator, + const float* denominator_coefficients, + int order_denominator); + + int Filter(const int16_t* in, int num_input_samples, float* output); + + private: + PoleZeroFilter(const float* numerator_coefficients, + int order_numerator, + const float* denominator_coefficients, + int order_denominator); + + static const int kMaxFilterOrder = 24; + + int16_t past_input_[kMaxFilterOrder * 2]; + float past_output_[kMaxFilterOrder * 2]; + + float numerator_coefficients_[kMaxFilterOrder + 1]; + float denominator_coefficients_[kMaxFilterOrder + 1]; + + int order_numerator_; + int order_denominator_; + int highest_order_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_ diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc b/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc new file mode 100644 index 0000000000..e487858ef3 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h" + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h" +#include "webrtc/system_wrappers/interface/compile_assert.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +static const int kInputSamples = 50; + +static const int16_t kInput[kInputSamples] = {-2136, -7116, 10715, 2464, 3164, + 8139, 11393, 24013, -32117, -5544, -27740, 10181, 14190, -24055, -15912, + 17393, 6359, -9950, -13894, 32432, -23944, 3437, -8381, 19768, 3087, -19795, + -5920, 13310, 1407, 3876, 4059, 3524, -23130, 19121, -27900, -24840, 4089, + 21422, -3625, 3015, -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469, + 29727, 32229}; + +static const float kReferenceOutput[kInputSamples] = {-2082.230472f, + -6878.572941f, 10697.090871f, 2358.373952f, 2973.936512f, 7738.580650f, + 10690.803213f, 22687.091576f, -32676.684717f, -5879.621684f, -27359.297432f, + 10368.735888f, 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f, + 6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, -23391.849347f, + 3953.805667f, -7667.761363f, 19995.153447f, 3185.575477f, -19207.365160f, + -5143.103201f, 13756.317237f, 1779.654794f, 4142.269755f, 4209.475034f, + 3572.991789f, -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f, + 5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, -10135.510093f, + 29241.509970f, 13394.397233f, 6340.721417f, -19510.207905f, -5908.442086f, + 15882.301634f, -9211.335255f, 29253.056735f, 30874.443046f}; + +class PoleZeroFilterTest : public ::testing::Test { + protected: + PoleZeroFilterTest() + : my_filter_(PoleZeroFilter::Create( + kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {} + + ~PoleZeroFilterTest() {} + + void FilterSubframes(int num_subframes); + + private: + void TestClean(); + scoped_ptr my_filter_; +}; + +void PoleZeroFilterTest::FilterSubframes(int num_subframes) { + float output[kInputSamples]; + const int num_subframe_samples = kInputSamples / num_subframes; + EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples); + + for (int n = 0; n < num_subframes; n++) { + my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples, + &output[n * num_subframe_samples]); + } + for (int n = 0; n < kInputSamples; n++) { + EXPECT_NEAR(output[n], kReferenceOutput[n], 1); + } +} + +TEST_F(PoleZeroFilterTest, OneSubframe) { + FilterSubframes(1); +} + +TEST_F(PoleZeroFilterTest, TwoSubframes) { + FilterSubframes(2); +} + +TEST_F(PoleZeroFilterTest, FiveSubframes) { + FilterSubframes(5); +} + +TEST_F(PoleZeroFilterTest, TenSubframes) { + FilterSubframes(10); +} + +TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) { + FilterSubframes(25); +} + +TEST_F(PoleZeroFilterTest, FiftySubframes) { + FilterSubframes(50); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/standalone_vad.cc b/webrtc/modules/audio_processing/agc/standalone_vad.cc new file mode 100644 index 0000000000..afd9d7b6dd --- /dev/null +++ b/webrtc/modules/audio_processing/agc/standalone_vad.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/standalone_vad.h" + +#include + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/utility/interface/audio_frame_operations.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const int kDefaultStandaloneVadMode = 3; + +StandaloneVad::StandaloneVad(VadInst* vad) + : vad_(vad), + buffer_(), + index_(0), + mode_(kDefaultStandaloneVadMode) {} + +StandaloneVad::~StandaloneVad() { + WebRtcVad_Free(vad_); +} + +StandaloneVad* StandaloneVad::Create() { + VadInst* vad = NULL; + if (WebRtcVad_Create(&vad) < 0) + return NULL; + + int err = WebRtcVad_Init(vad); + err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); + if (err != 0) { + WebRtcVad_Free(vad); + return NULL; + } + return new StandaloneVad(vad); +} + +int StandaloneVad::AddAudio(const int16_t* data, int length) { + if (length != kLength10Ms) + return -1; + + if (index_ + length > kLength10Ms * kMaxNum10msFrames) + // Reset the buffer if it's full. + // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we + // can forgo the buffering. + index_ = 0; + + memcpy(&buffer_[index_], data, sizeof(int16_t) * length); + index_ += length; + return 0; +} + +int StandaloneVad::GetActivity(double* p, int length_p) { + if (index_ == 0) + return -1; + + const int num_frames = index_ / kLength10Ms; + if (num_frames > length_p) + return -1; + assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0); + + int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); + if (activity < 0) + return -1; + else if (activity == 0) + p[0] = 0.01; // Arbitrary but small and non-zero. + else + p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities. + for (int n = 1; n < num_frames; n++) + p[n] = p[0]; + // Reset the buffer to start from the beginning. + index_ = 0; + return activity; +} + +int StandaloneVad::set_mode(int mode) { + if (mode < 0 || mode > 3) + return -1; + if (WebRtcVad_set_mode(vad_, mode) != 0) + return -1; + + mode_ = mode; + return 0; +} + +} // namespace webrtc + diff --git a/webrtc/modules/audio_processing/agc/standalone_vad.h b/webrtc/modules/audio_processing/agc/standalone_vad.h new file mode 100644 index 0000000000..6f268381a0 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/standalone_vad.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ + +#include "webrtc/common_audio/vad/include/webrtc_vad.h" +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; + +class StandaloneVad { + public: + static StandaloneVad* Create(); + ~StandaloneVad(); + + // Outputs + // p: a buffer where probabilities are written to. + // length_p: number of elements of |p|. + // + // return value: + // -1: if no audio is stored or VAD returns error. + // 0: in success. + // In case of error the content of |activity| is unchanged. + // + // Note that due to a high false-positive (VAD decision is active while the + // processed audio is just background noise) rate, stand-alone VAD is used as + // a one-sided indicator. The activity probability is 0.5 if the frame is + // classified as active, and the probability is 0.01 if the audio is + // classified as passive. In this way, when probabilities are combined, the + // effect of the stand-alone VAD is neutral if the input is classified as + // active. + int GetActivity(double* p, int length_p); + + // Expecting 10 ms of 16 kHz audio to be pushed in. + int AddAudio(const int16_t* data, int length); + + // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most + // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, + // otherwise 0 is returned. + int set_mode(int mode); + // Get the agressiveness of the current VAD. + int mode() const { return mode_; } + + private: + explicit StandaloneVad(VadInst* vad); + + static const int kMaxNum10msFrames = 3; + + // TODO(turajs): Is there a way to use scoped-pointer here? + VadInst* vad_; + int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; + int index_; + int mode_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ diff --git a/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc b/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc new file mode 100644 index 0000000000..43d09a79b0 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/standalone_vad.h" + +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(StandaloneVadTest, Api) { + scoped_ptr vad(StandaloneVad::Create()); + int16_t data[kLength10Ms] = { 0 }; + + // Valid frame length (for 32 kHz rate), but not what the VAD is expecting. + EXPECT_EQ(-1, vad->AddAudio(data, 320)); + + const int kMaxNumFrames = 3; + double p[kMaxNumFrames]; + for (int n = 0; n < kMaxNumFrames; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + + // Pretend |p| is shorter that it should be. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1)); + + EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames)); + + // Ask for activity when buffer is empty. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames)); + + // Should reset and result in one buffer. + for (int n = 0; n < kMaxNumFrames + 1; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + EXPECT_EQ(0, vad->GetActivity(p, 1)); + + // Wrong modes + EXPECT_EQ(-1, vad->set_mode(-1)); + EXPECT_EQ(-1, vad->set_mode(4)); + + // Valid mode. + const int kMode = 2; + EXPECT_EQ(0, vad->set_mode(kMode)); + EXPECT_EQ(kMode, vad->mode()); +} + +TEST(StandaloneVadTest, ActivityDetection) { + scoped_ptr vad(StandaloneVad::Create()); + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = { 0 }; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(), + "rb"); + ASSERT_TRUE(pcm_file != NULL); + + FILE* reference_file = fopen( + test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb"); + ASSERT_TRUE(reference_file != NULL); + + // Reference activities are prepared with 0 aggressiveness. + ASSERT_EQ(0, vad->set_mode(0)); + + // Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The + // reference file is created for 30 ms frame. + const int kNumVadFramesToProcess = 3; + int num_frames = 0; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + vad->AddAudio(data, kDataLength); + num_frames++; + if (num_frames == kNumVadFramesToProcess) { + num_frames = 0; + int referece_activity; + double p[kNumVadFramesToProcess]; + EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1, + reference_file)); + int activity = vad->GetActivity(p, kNumVadFramesToProcess); + EXPECT_EQ(referece_activity, activity); + if (activity != 0) { + // When active, probabilities are set to 0.5. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.5, p[n]); + } else { + // When inactive, probabilities are set to 0.01. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.01, p[n]); + } + } + } + fclose(reference_file); + fclose(pcm_file); +} +} diff --git a/webrtc/modules/audio_processing/agc/test/fake_agc.h b/webrtc/modules/audio_processing/agc/test/fake_agc.h new file mode 100644 index 0000000000..e2aabd81f8 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/fake_agc.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_ + +#include "webrtc/modules/audio_processing/agc/agc.h" + +namespace webrtc { + +class FakeAgc : public Agc { + public: + FakeAgc() + : counter_(0), + volume_(kMaxVolume / 2) { + } + + virtual int Process(const AudioFrame& audio_frame) { + const int kUpdateIntervalFrames = 10; + const int kMaxVolume = 255; + if (counter_ % kUpdateIntervalFrames == 0) { + volume_ = (++volume_) % kMaxVolume; + } + counter_++; + return 0; + } + + virtual int FakeAgc::MicVolume() { + return volume_; + } + + private: + int counter_; + int volume_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/test/test_utils.cc b/webrtc/modules/audio_processing/agc/test/test_utils.cc new file mode 100644 index 0000000000..e7c884baf4 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/test_utils.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/test/test_utils.h" + +#include + +#include + +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +float MicLevel2Gain(int gain_range_db, int level) { + return (level - 127.0f) / 128.0f * gain_range_db / 2; +} + +float Db2Linear(float db) { + return powf(10.0f, db / 20.0f); +} + +void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame) { + const int frame_length = frame->samples_per_channel_ * frame->num_channels_; + // Smooth the transition between gain levels across the frame. + float smoothed_gain = last_gain; + float gain_step = (gain - last_gain) / (frame_length - 1); + for (int i = 0; i < frame_length; ++i) { + smoothed_gain += gain_step; + float sample = std::floor(frame->data_[i] * smoothed_gain + 0.5); + sample = std::max(std::min(32767.0f, sample), -32768.0f); + frame->data_[i] = static_cast(sample); + } +} + +void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame) { + ApplyGainLinear(Db2Linear(gain_db), Db2Linear(last_gain_db), frame); +} + +void SimulateMic(int gain_range_db, int mic_level, int last_mic_level, + AudioFrame* frame) { + assert(mic_level >= 0 && mic_level <= 255); + assert(last_mic_level >= 0 && last_mic_level <= 255); + ApplyGain(MicLevel2Gain(gain_range_db, mic_level), + MicLevel2Gain(gain_range_db, last_mic_level), + frame); +} + +void SimulateMic(int gain_map[255], int mic_level, int last_mic_level, + AudioFrame* frame) { + assert(mic_level >= 0 && mic_level <= 255); + assert(last_mic_level >= 0 && last_mic_level <= 255); + ApplyGain(gain_map[mic_level], gain_map[last_mic_level], frame); +} + +} // namespace webrtc + diff --git a/webrtc/modules/audio_processing/agc/test/test_utils.h b/webrtc/modules/audio_processing/agc/test/test_utils.h new file mode 100644 index 0000000000..25dc496e40 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/test_utils.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ +namespace webrtc { + +class AudioFrame; + +float MicLevel2Gain(int gain_range_db, int level); +float Db2Linear(float db); +void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame); +void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame); +void SimulateMic(int gain_range_db, int mic_level, int last_mic_level, + AudioFrame* frame); +void SimulateMic(int gain_map[255], int mic_level, int last_mic_level, + AudioFrame* frame); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ diff --git a/webrtc/modules/audio_processing/agc/utility.cc b/webrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000000..48458adfb5 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/utility.h" + +#include + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} diff --git a/webrtc/modules/audio_processing/agc/utility.h b/webrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000000..df85c2e129 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ diff --git a/webrtc/modules/audio_processing/agc/voice_gmm_tables.h b/webrtc/modules/audio_processing/agc/voice_gmm_tables.h new file mode 100644 index 0000000000..9a490a47e0 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/voice_gmm_tables.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for active segments. Generated by MakeGmmTables.m. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_ + +static const int kVoiceGmmNumMixtures = 12; +static const int kVoiceGmmDim = 3; + +static const double kVoiceGmmCovarInverse[kVoiceGmmNumMixtures] + [kVoiceGmmDim][kVoiceGmmDim] = { + {{ 1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, + {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, + { 4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, + {{ 6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, + {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, + {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, + {{ 9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, + {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, + {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, + {{ 3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, + {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, + {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, + {{ 1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, + {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, + {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, + {{ 1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, + {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, + {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, + {{ 8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, + {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, + {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, + {{ 2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, + {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, + { 7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, + {{ 3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, + { 1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, + {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, + {{ 6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, + {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, + {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, + {{ 2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, + {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, + {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, + {{ 1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, + {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, + {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; + +static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { + {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, + {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, + {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, + {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, + {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, + {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, + {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, + {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, + {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, + {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, + {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, + {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; + +static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { + -1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01, + -1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01, + -1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01, + -1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00}; +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_ diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index d36fe70c10..74398f22de 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -9,6 +9,7 @@ { 'variables': { 'audio_processing_dependencies': [ + '<(DEPTH)/webrtc/modules/modules.gyp:iSAC', '<(webrtc_root)/base/base.gyp:rtc_base_approved', '<(webrtc_root)/common_audio/common_audio.gyp:common_audio', '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers', @@ -33,25 +34,52 @@ '<@(audio_processing_dependencies)', ], 'sources': [ - 'aec/include/echo_cancellation.h', + 'aec/aec_core.c', + 'aec/aec_core.h', + 'aec/aec_core_internal.h', + 'aec/aec_rdft.c', + 'aec/aec_rdft.h', + 'aec/aec_resampler.c', + 'aec/aec_resampler.h', 'aec/echo_cancellation.c', 'aec/echo_cancellation_internal.h', - 'aec/aec_core.h', - 'aec/aec_core.c', - 'aec/aec_core_internal.h', - 'aec/aec_rdft.h', - 'aec/aec_rdft.c', - 'aec/aec_resampler.h', - 'aec/aec_resampler.c', - 'aecm/include/echo_control_mobile.h', - 'aecm/echo_control_mobile.c', + 'aec/include/echo_cancellation.h', 'aecm/aecm_core.c', 'aecm/aecm_core.h', - 'agc/include/gain_control.h', + 'aecm/echo_control_mobile.c', + 'aecm/include/echo_control_mobile.h', + 'agc/agc.cc', + 'agc/agc.h', + 'agc/agc_audio_proc.cc', + 'agc/agc_audio_proc.h', + 'agc/agc_audio_proc_internal.h', + 'agc/agc_manager_direct.cc', + 'agc/agc_manager_direct.h', 'agc/analog_agc.c', 'agc/analog_agc.h', + 'agc/circular_buffer.cc', + 'agc/circular_buffer.h', + 'agc/common.h', 'agc/digital_agc.c', 'agc/digital_agc.h', + 'agc/gain_map_internal.h', + 'agc/gmm.cc', + 'agc/gmm.h', + 'agc/histogram.cc', + 'agc/histogram.h', + 'agc/include/gain_control.h', + 'agc/noise_gmm_tables.h', + 'agc/pitch_based_vad.cc', + 'agc/pitch_based_vad.h', + 'agc/pitch_internal.cc', + 'agc/pitch_internal.h', + 'agc/pole_zero_filter.cc', + 'agc/pole_zero_filter.h', + 'agc/standalone_vad.cc', + 'agc/standalone_vad.h', + 'agc/utility.cc', + 'agc/utility.h', + 'agc/voice_gmm_tables.h', 'audio_buffer.cc', 'audio_buffer.h', 'audio_processing_impl.cc', @@ -74,10 +102,23 @@ 'noise_suppression_impl.h', 'processing_component.cc', 'processing_component.h', - 'splitting_filter.cc', - 'splitting_filter.h', 'rms_level.cc', 'rms_level.h', + 'splitting_filter.cc', + 'splitting_filter.h', + 'transient/common.h', + 'transient/daubechies_8_wavelet_coeffs.h', + 'transient/dyadic_decimator.h', + 'transient/moving_moments.cc', + 'transient/moving_moments.h', + 'transient/transient_detector.cc', + 'transient/transient_detector.h', + 'transient/transient_suppressor.cc', + 'transient/transient_suppressor.h', + 'transient/wpd_node.cc', + 'transient/wpd_node.h', + 'transient/wpd_tree.cc', + 'transient/wpd_tree.h', 'typing_detection.cc', 'typing_detection.h', 'utility/delay_estimator.c', diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index ba22f33536..3ce84fb0b4 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -15,6 +15,8 @@ #include "webrtc/base/platform_file.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/modules/audio_processing/channel_buffer.h" #include "webrtc/modules/audio_processing/common.h" @@ -54,6 +56,85 @@ namespace webrtc { // Throughout webrtc, it's assumed that success is represented by zero. COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero); +// This class has two main functionalities: +// +// 1) It is returned instead of the real GainControl after the new AGC has been +// enabled in order to prevent an outside user from overriding compression +// settings. It doesn't do anything in its implementation, except for +// delegating the const methods and Enable calls to the real GainControl, so +// AGC can still be disabled. +// +// 2) It is injected into AgcManagerDirect and implements volume callbacks for +// getting and setting the volume level. It just caches this value to be used +// in VoiceEngine later. +class GainControlForNewAgc : public GainControl, public VolumeCallbacks { + public: + explicit GainControlForNewAgc(GainControlImpl* gain_control) + : real_gain_control_(gain_control), + volume_(0) { + } + + // GainControl implementation. + virtual int Enable(bool enable) OVERRIDE { + return real_gain_control_->Enable(enable); + } + virtual bool is_enabled() const OVERRIDE { + return real_gain_control_->is_enabled(); + } + virtual int set_stream_analog_level(int level) OVERRIDE { + volume_ = level; + return AudioProcessing::kNoError; + } + virtual int stream_analog_level() OVERRIDE { + return volume_; + } + virtual int set_mode(Mode mode) OVERRIDE { return AudioProcessing::kNoError; } + virtual Mode mode() const OVERRIDE { return GainControl::kAdaptiveAnalog; } + virtual int set_target_level_dbfs(int level) OVERRIDE { + return AudioProcessing::kNoError; + } + virtual int target_level_dbfs() const OVERRIDE { + return real_gain_control_->target_level_dbfs(); + } + virtual int set_compression_gain_db(int gain) OVERRIDE { + return AudioProcessing::kNoError; + } + virtual int compression_gain_db() const OVERRIDE { + return real_gain_control_->compression_gain_db(); + } + virtual int enable_limiter(bool enable) OVERRIDE { + return AudioProcessing::kNoError; + } + virtual bool is_limiter_enabled() const OVERRIDE { + return real_gain_control_->is_limiter_enabled(); + } + virtual int set_analog_level_limits(int minimum, + int maximum) OVERRIDE { + return AudioProcessing::kNoError; + } + virtual int analog_level_minimum() const OVERRIDE { + return real_gain_control_->analog_level_minimum(); + } + virtual int analog_level_maximum() const OVERRIDE { + return real_gain_control_->analog_level_maximum(); + } + virtual bool stream_is_saturated() const OVERRIDE { + return real_gain_control_->stream_is_saturated(); + } + + // VolumeCallbacks implementation. + virtual void SetMicVolume(int volume) OVERRIDE { + volume_ = volume; + } + virtual int GetMicVolume() OVERRIDE { + return volume_; + } + + private: + GainControl* real_gain_control_; + int volume_; +}; + AudioProcessing* AudioProcessing::Create(int id) { return Create(); } @@ -96,7 +177,13 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) delay_offset_ms_(0), was_stream_delay_set_(false), output_will_be_muted_(false), - key_pressed_(false) { + key_pressed_(false), +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + use_new_agc_(false), +#else + use_new_agc_(config.Get().enabled), +#endif + transient_suppressor_enabled_(config.Get().enabled) { echo_cancellation_ = new EchoCancellationImpl(this, crit_); component_list_.push_back(echo_cancellation_); @@ -118,12 +205,18 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) voice_detection_ = new VoiceDetectionImpl(this, crit_); component_list_.push_back(voice_detection_); + gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_)); + SetExtraOptions(config); } AudioProcessingImpl::~AudioProcessingImpl() { { CriticalSectionScoped crit_scoped(crit_); + // Depends on gain_control_ and gain_control_for_new_agc_. + agc_manager_.reset(); + // Depends on gain_control_. + gain_control_for_new_agc_.reset(); while (!component_list_.empty()) { ProcessingComponent* component = component_list_.front(); component->Destroy(); @@ -192,6 +285,16 @@ int AudioProcessingImpl::InitializeLocked() { } } + int err = InitializeExperimentalAgc(); + if (err != kNoError) { + return err; + } + + err = InitializeTransient(); + if (err != kNoError) { + return err; + } + #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { int err = WriteInitMessage(); @@ -303,6 +406,11 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) { std::list::iterator it; for (it = component_list_.begin(); it != component_list_.end(); ++it) (*it)->SetExtraOptions(config); + + if (transient_suppressor_enabled_ != config.Get().enabled) { + transient_suppressor_enabled_ = config.Get().enabled; + InitializeTransient(); + } } int AudioProcessingImpl::input_sample_rate_hz() const { @@ -337,6 +445,10 @@ int AudioProcessingImpl::num_output_channels() const { void AudioProcessingImpl::set_output_will_be_muted(bool muted) { output_will_be_muted_ = muted; + CriticalSectionScoped lock(crit_); + if (agc_manager_.get()) { + agc_manager_->SetCaptureMuted(output_will_be_muted_); + } } bool AudioProcessingImpl::output_will_be_muted() const { @@ -470,6 +582,12 @@ int AudioProcessingImpl::ProcessStreamLocked() { #endif AudioBuffer* ca = capture_audio_.get(); // For brevity. + if (use_new_agc_ && gain_control_->is_enabled()) { + agc_manager_->AnalyzePreProcess(ca->data(0), + ca->num_channels(), + fwd_proc_format_.samples_per_channel()); + } + bool data_processed = is_data_processed(); if (analysis_needed(data_processed)) { ca->SplitIntoFrequencyBands(); @@ -486,12 +604,35 @@ int AudioProcessingImpl::ProcessStreamLocked() { RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca)); RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); + + if (use_new_agc_ && gain_control_->is_enabled()) { + agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz], + ca->samples_per_split_channel(), + split_rate_); + } RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca)); if (synthesis_needed(data_processed)) { ca->MergeFrequencyBands(); } + // TODO(aluebs): Investigate if the transient suppression placement should be + // before or after the AGC. + if (transient_suppressor_enabled_) { + float voice_probability = + agc_manager_.get() ? agc_manager_->voice_probability() : 1.f; + + transient_suppressor_->Suppress(ca->data_f(0), + ca->samples_per_channel(), + ca->num_channels(), + ca->split_bands_const_f(0)[kBand0To8kHz], + ca->samples_per_split_channel(), + ca->keyboard_data(), + ca->samples_per_keyboard_channel(), + voice_probability, + key_pressed_); + } + // The level estimator operates on the recombined data. RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); @@ -586,7 +727,9 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked() { RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); - RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); + if (!use_new_agc_) { + RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); + } return kNoError; } @@ -728,6 +871,9 @@ EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { } GainControl* AudioProcessingImpl::gain_control() const { + if (use_new_agc_) { + return gain_control_for_new_agc_.get(); + } return gain_control_; } @@ -775,7 +921,7 @@ bool AudioProcessingImpl::is_data_processed() const { bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { // Check if we've upmixed or downmixed the audio. return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) || - is_data_processed); + is_data_processed || transient_suppressor_enabled_); } bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { @@ -784,7 +930,8 @@ bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { } bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { - if (!is_data_processed && !voice_detection_->is_enabled()) { + if (!is_data_processed && !voice_detection_->is_enabled() && + !transient_suppressor_enabled_) { // Only level_estimator_ is enabled. return false; } else if (fwd_proc_format_.rate() == kSampleRate32kHz || @@ -795,6 +942,30 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { return false; } +int AudioProcessingImpl::InitializeExperimentalAgc() { + if (use_new_agc_) { + if (!agc_manager_.get()) { + agc_manager_.reset( + new AgcManagerDirect(gain_control_, gain_control_for_new_agc_.get())); + } + agc_manager_->Initialize(); + agc_manager_->SetCaptureMuted(output_will_be_muted_); + } + return kNoError; +} + +int AudioProcessingImpl::InitializeTransient() { + if (transient_suppressor_enabled_) { + if (!transient_suppressor_.get()) { + transient_suppressor_.reset(new TransientSuppressor()); + } + transient_suppressor_->Initialize(fwd_proc_format_.rate(), + split_rate_, + fwd_out_format_.num_channels()); + } + return kNoError; +} + #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP int AudioProcessingImpl::WriteMessageToDebugFile() { int32_t size = event_msg_->ByteSize(); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index caab37962a..be70273e65 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -8,28 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ #include "webrtc/modules/audio_processing/include/audio_processing.h" #include #include +#include "webrtc/base/thread_annotations.h" #include "webrtc/system_wrappers/interface/scoped_ptr.h" namespace webrtc { +class AgcManagerDirect; class AudioBuffer; class CriticalSectionWrapper; class EchoCancellationImpl; class EchoControlMobileImpl; class FileWrapper; class GainControlImpl; +class GainControlForNewAgc; class HighPassFilterImpl; class LevelEstimatorImpl; class NoiseSuppressionImpl; class ProcessingComponent; +class TransientSuppressor; class VoiceDetectionImpl; #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP @@ -138,7 +142,7 @@ class AudioProcessingImpl : public AudioProcessing { protected: // Overridden in a mock. - virtual int InitializeLocked(); + virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); private: int InitializeLocked(int input_sample_rate_hz, @@ -146,20 +150,24 @@ class AudioProcessingImpl : public AudioProcessing { int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, - int num_reverse_channels); + int num_reverse_channels) + EXCLUSIVE_LOCKS_REQUIRED(crit_); int MaybeInitializeLocked(int input_sample_rate_hz, int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, - int num_reverse_channels); - int ProcessStreamLocked(); - int AnalyzeReverseStreamLocked(); + int num_reverse_channels) + EXCLUSIVE_LOCKS_REQUIRED(crit_); + int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); + int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); bool is_data_processed() const; bool output_copy_needed(bool is_data_processed) const; bool synthesis_needed(bool is_data_processed) const; bool analysis_needed(bool is_data_processed) const; + int InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_); + int InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_); EchoCancellationImpl* echo_cancellation_; EchoControlMobileImpl* echo_control_mobile_; @@ -168,6 +176,7 @@ class AudioProcessingImpl : public AudioProcessing { LevelEstimatorImpl* level_estimator_; NoiseSuppressionImpl* noise_suppression_; VoiceDetectionImpl* voice_detection_; + scoped_ptr gain_control_for_new_agc_; std::list component_list_; CriticalSectionWrapper* crit_; @@ -199,8 +208,15 @@ class AudioProcessingImpl : public AudioProcessing { bool output_will_be_muted_; bool key_pressed_; + + // Only set through the constructor's Config parameter. + const bool use_new_agc_; + scoped_ptr agc_manager_ GUARDED_BY(crit_); + + bool transient_suppressor_enabled_; + scoped_ptr transient_suppressor_; }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ diff --git a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc index 0957617575..f4c36d0009 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc @@ -27,7 +27,9 @@ class MockInitialize : public AudioProcessingImpl { } MOCK_METHOD0(InitializeLocked, int()); - int RealInitializeLocked() { return AudioProcessingImpl::InitializeLocked(); } + int RealInitializeLocked() NO_THREAD_SAFETY_ANALYSIS { + return AudioProcessingImpl::InitializeLocked(); + } }; TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { diff --git a/webrtc/modules/audio_processing/audio_processing_tests.gypi b/webrtc/modules/audio_processing/audio_processing_tests.gypi index 99b80f291b..627e669452 100644 --- a/webrtc/modules/audio_processing/audio_processing_tests.gypi +++ b/webrtc/modules/audio_processing/audio_processing_tests.gypi @@ -46,6 +46,33 @@ ], 'sources': [ 'test/unpack.cc', ], }, + { + 'target_name': 'transient_suppression_test', + 'type': 'executable', + 'dependencies': [ + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(DEPTH)/third_party/gflags/gflags.gyp:gflags', + '<(webrtc_root)/test/test.gyp:test_support', + '<(webrtc_root)/modules/modules.gyp:audio_processing', + ], + 'sources': [ + 'transient/transient_suppression_test.cc', + 'transient/file_utils.cc', + 'transient/file_utils.h', + ], + }, # transient_suppression_test + { + 'target_name': 'click_annotate', + 'type': 'executable', + 'dependencies': [ + '<(webrtc_root)/modules/modules.gyp:audio_processing', + ], + 'sources': [ + 'transient/click_annotate.cc', + 'transient/file_utils.cc', + 'transient/file_utils.h', + ], + }, # click_annotate ], }], ], diff --git a/webrtc/modules/audio_processing/transient/click_annotate.cc b/webrtc/modules/audio_processing/transient/click_annotate.cc new file mode 100644 index 0000000000..f525366d6e --- /dev/null +++ b/webrtc/modules/audio_processing/transient/click_annotate.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include + +#include "webrtc/modules/audio_processing/transient/transient_detector.h" +#include "webrtc/modules/audio_processing/transient/file_utils.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +using webrtc::FileWrapper; +using webrtc::TransientDetector; +using webrtc::scoped_ptr; + +// Application to generate a RTP timing file. +// Opens the PCM file and divides the signal in frames. +// Creates a send times array, one for each step. +// Each block that contains a transient, has an infinite send time. +// The resultant array is written to a DAT file +// Returns -1 on error or |lost_packets| otherwise. +int main(int argc, char* argv[]) { + if (argc != 5) { + printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]); + printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]); + printf("Opens the PCMfile with sampleRate in Hertz.\n"); + printf("Creates a send times array, one for each chunkSize "); + printf("milliseconds step.\n"); + printf("Each block that contains a transient, has an infinite send time. "); + printf("The resultant array is written to a DATfile.\n\n"); + return 0; + } + + scoped_ptr pcm_file(FileWrapper::Create()); + pcm_file->OpenFile(argv[1], true, false, false); + if (!pcm_file->Open()) { + printf("\nThe %s could not be opened.\n\n", argv[1]); + return -1; + } + + scoped_ptr dat_file(FileWrapper::Create()); + dat_file->OpenFile(argv[2], false, false, false); + if (!dat_file->Open()) { + printf("\nThe %s could not be opened.\n\n", argv[2]); + return -1; + } + + int chunk_size_ms = atoi(argv[3]); + if (chunk_size_ms <= 0) { + printf("\nThe chunkSize must be a positive integer\n\n"); + return -1; + } + + int sample_rate_hz = atoi(argv[4]); + if (sample_rate_hz <= 0) { + printf("\nThe sampleRate must be a positive integer\n\n"); + return -1; + } + + TransientDetector detector(sample_rate_hz); + int lost_packets = 0; + size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000; + scoped_ptr audio_buffer(new float[audio_buffer_length]); + std::vector send_times; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer( + pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + for (int time = 0; file_samples_read > 0; time += chunk_size_ms) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < audio_buffer_length; ++i) { + audio_buffer[i] = 0.0; + } + float value = + detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0); + if (value < 0.5f) { + value = time; + } else { + value = FLT_MAX; + ++lost_packets; + } + send_times.push_back(value); + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + } + + size_t floats_written = WriteFloatBufferToFile(dat_file.get(), + send_times.size(), + &send_times[0]); + + if (floats_written == 0) { + printf("\nThe send times could not be written to DAT file\n\n"); + return -1; + } + + pcm_file->CloseFile(); + dat_file->CloseFile(); + + return lost_packets; +} diff --git a/webrtc/modules/audio_processing/transient/common.h b/webrtc/modules/audio_processing/transient/common.h new file mode 100644 index 0000000000..92194e97e6 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +namespace webrtc { +namespace ts { + +static const float kPi = 3.14159265358979323846f; +static const int kChunkSizeMs = 10; +enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 +}; + +} // namespace ts +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ diff --git a/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h new file mode 100644 index 0000000000..b1236ac732 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file defines the coefficients of the FIR based approximation of +// the Meyer Wavelet +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ + +// Decomposition coefficients Daubechies 8. + +namespace webrtc { + +const int kDaubechies8CoefficientsLength = 16; + +const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength] + = { + -5.44158422430816093862e-02f, + 3.12871590914465924627e-01f, + -6.75630736298012846142e-01f, + 5.85354683654869090148e-01f, + 1.58291052560238926228e-02f, + -2.84015542962428091389e-01f, + -4.72484573997972536787e-04f, + 1.28747426620186011803e-01f, + 1.73693010020221083600e-02f, + -4.40882539310647192377e-02f, + -1.39810279170155156436e-02f, + 8.74609404701565465445e-03f, + 4.87035299301066034600e-03f, + -3.91740372995977108837e-04f, + -6.75449405998556772109e-04f, + -1.17476784002281916305e-04f +}; + +const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = { + -1.17476784002281916305e-04f, + 6.75449405998556772109e-04f, + -3.91740372995977108837e-04f, + -4.87035299301066034600e-03f, + 8.74609404701565465445e-03f, + 1.39810279170155156436e-02f, + -4.40882539310647192377e-02f, + -1.73693010020221083600e-02f, + 1.28747426620186011803e-01f, + 4.72484573997972536787e-04f, + -2.84015542962428091389e-01f, + -1.58291052560238926228e-02f, + 5.85354683654869090148e-01f, + 6.75630736298012846142e-01f, + 3.12871590914465924627e-01f, + 5.44158422430816093862e-02f +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ diff --git a/webrtc/modules/audio_processing/transient/dyadic_decimator.h b/webrtc/modules/audio_processing/transient/dyadic_decimator.h new file mode 100644 index 0000000000..c1046f2293 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/dyadic_decimator.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ + +#include + +#include "webrtc/typedefs.h" + +// Provides a set of static methods to perform dyadic decimations. + +namespace webrtc { + +// Returns the proper length of the output buffer that you should use for the +// given |in_length| and decimation |odd_sequence|. +// Return -1 on error. +inline size_t GetOutLengthToDyadicDecimate(size_t in_length, + bool odd_sequence) { + size_t out_length = in_length / 2; + + if (in_length % 2 == 1 && !odd_sequence) { + ++out_length; + } + + return out_length; +} + +// Performs a dyadic decimation: removes every odd/even member of a sequence +// halving its overall length. +// Arguments: +// in: array of |in_length|. +// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...); +// if true, the even members will be removed (0, 2, 4, ...). +// out: array of |out_length|. |out_length| must be large enough to +// hold the decimated output. The necessary length can be provided by +// GetOutLengthToDyadicDecimate(). +// Must be previously allocated. +// Returns the number of output samples, -1 on error. +template +static size_t DyadicDecimate(const T* in, + size_t in_length, + bool odd_sequence, + T* out, + size_t out_length) { + size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence); + + if (!in || !out || in_length <= 0 || out_length < half_length) { + return 0; + } + + size_t output_samples = 0; + size_t index_adjustment = odd_sequence ? 1 : 0; + for (output_samples = 0; output_samples < half_length; ++output_samples) { + out[output_samples] = in[output_samples * 2 + index_adjustment]; + } + + return output_samples; +} + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ diff --git a/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc b/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc new file mode 100644 index 0000000000..f5c9f49dba --- /dev/null +++ b/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace webrtc { + +static const size_t kEvenBufferLength = 6; +static const size_t kOddBufferLength = 5; +static const size_t kOutBufferLength = 3; + +int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5}; +int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4}; +int16_t test_buffer_out[kOutBufferLength]; + +TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) { + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false)); + EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true)); +} + + +TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) { + size_t out_samples = 0; + + out_samples = DyadicDecimate(static_cast(NULL), + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + static_cast(NULL), + kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + // Less than required |out_length|. + out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + 2); + EXPECT_EQ(0u, out_samples); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + true, // Odd sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); + EXPECT_EQ(5, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, + kOddBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, + kOddBufferLength, + true, // Odd sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/file_utils.cc b/webrtc/modules/audio_processing/transient/file_utils.cc new file mode 100644 index 0000000000..c7415bdea8 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/file_utils.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/file_utils.h" + +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) { + if (!bytes || !out) { + return -1; + } + + uint32_t binary_value = 0; + for (int i = 3; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) { + if (!bytes || !out) { + return -1; + } + + uint64_t binary_value = 0; + for (int i = 7; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) { + if (!out_bytes) { + return -1; + } + + uint32_t binary_value = bit_cast(value); + for (size_t i = 0; i < 4; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) { + if (!out_bytes) { + return -1; + } + + uint64_t binary_value = bit_cast(value); + for (size_t i = 0; i < 8; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[2]); + + size_t int16s_read = 0; + + while (int16s_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 2); + if (bytes_read < 2) { + break; + } + int16_t value = byte_array[1]; + value <<= 8; + value += byte_array[0]; + buffer[int16s_read] = value; + ++int16s_read; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadFloatBufferFromFile(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[4]); + + size_t floats_read = 0; + + while (floats_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 4); + if (bytes_read < 4) { + break; + } + ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]); + ++floats_read; + } + + return floats_read; +} + +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[8]); + + size_t doubles_read = 0; + + while (doubles_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 8); + if (bytes_read < 8) { + break; + } + ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]); + ++doubles_read; + } + + return doubles_read; +} + +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[2]); + + size_t int16s_written = 0; + + for (int16s_written = 0; int16s_written < length; ++int16s_written) { + // Get byte representation. + byte_array[0] = buffer[int16s_written] & 0xFF; + byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF; + + file->Write(byte_array.get(), 2); + } + + file->Flush(); + + return int16s_written; +} + +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[4]); + + size_t floats_written = 0; + + for (floats_written = 0; floats_written < length; ++floats_written) { + // Get byte representation. + ConvertFloatToByteArray(buffer[floats_written], byte_array.get()); + + file->Write(byte_array.get(), 4); + } + + file->Flush(); + + return floats_written; +} + +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + scoped_ptr byte_array(new uint8_t[8]); + + size_t doubles_written = 0; + + for (doubles_written = 0; doubles_written < length; ++doubles_written) { + // Get byte representation. + ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get()); + + file->Write(byte_array.get(), 8); + } + + file->Flush(); + + return doubles_written; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/file_utils.h b/webrtc/modules/audio_processing/transient/file_utils.h new file mode 100644 index 0000000000..8dc477de5e --- /dev/null +++ b/webrtc/modules/audio_processing/transient/file_utils.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ + +#include + +#include "webrtc/base/compile_assert.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This is a copy of the cast included in the Chromium codebase here: +// http://cs.chromium.org/src/third_party/cld/base/casts.h +template +inline Dest bit_cast(const Source& source) { + // A compile error here means your Dest and Source have different sizes. + COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), + dest_and_source_have_different_sizes); + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Converts the byte array with binary float representation to float. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out); + +// Converts the byte array with binary double representation to double. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out); + +// Converts a float to a byte array with binary float representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]); + +// Converts a double to a byte array with binary double representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]); + +// Reads |length| 16-bit integers from |file| to |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer); + +// Reads |length| floats in binary representation (4 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of floats read or -1 on error. +size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer); + +// Reads |length| doubles in binary representation (8 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of doubles read or -1 on error. +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer); + +// Writes |length| 16-bit integers from |buffer| in binary representation (2 +// bytes) to |file|. It flushes |file|, so after this call there are no +// writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer); + +// Writes |length| floats from |buffer| in binary representation (4 bytes) to +// |file|. It flushes |file|, so after this call there are no writtings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer); + +// Writes |length| doubles from |buffer| in binary representation (8 bytes) to +// |file|. It flushes |file|, so after this call there are no writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ diff --git a/webrtc/modules/audio_processing/transient/file_utils_unittest.cc b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc new file mode 100644 index 0000000000..af2f9b39b0 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/file_utils.h" + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40}; +static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40}; +static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66}; + +static const uint8_t kPiBytes[8] = + {0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40}; +static const uint8_t kEBytes[8] = + {0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40}; +static const uint8_t kAvogadroBytes[8] = + {0xF4, 0xBC, 0xA8, 0xDF, 0x85, 0xE1, 0xDF, 0x44}; + +static const double kPi = 3.14159265358979323846; +static const double kE = 2.71828182845904523536; +static const double kAvogadro = 602214100000000000000000.0; + +class TransientFileUtilsTest: public ::testing::Test { + protected: + TransientFileUtilsTest() + : kTestFileName( + test::ResourcePath("audio_processing/transient/double-utils", + "dat")), + kTestFileNamef( + test::ResourcePath("audio_processing/transient/float-utils", + "dat")) {} + // This file (used in some tests) contains binary data. The data correspond to + // the double representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileName; + + // This file (used in some tests) contains binary data. The data correspond to + // the float representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileNamef; +}; + +TEST_F(TransientFileUtilsTest, ConvertByteArrayToFloat) { + float value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value)); + EXPECT_FLOAT_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value)); + EXPECT_FLOAT_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value)); + EXPECT_FLOAT_EQ(kAvogadro, value); +} + +TEST_F(TransientFileUtilsTest, ConvertByteArrayToDouble) { + double value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value)); + EXPECT_DOUBLE_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value)); + EXPECT_DOUBLE_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value)); + EXPECT_DOUBLE_EQ(kAvogadro, value); +} + +TEST_F(TransientFileUtilsTest, ConvertFloatToByteArray) { + scoped_ptr bytes(new uint8_t[4]); + + EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4)); +} + +TEST_F(TransientFileUtilsTest, ConvertDoubleToByteArray) { + scoped_ptr bytes(new uint8_t[8]); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8)); +} + +TEST_F(TransientFileUtilsTest, ReadInt16BufferFromFile) { + std::string test_filename = kTestFileName; + + scoped_ptr file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + scoped_ptr buffer(new int16_t[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new int16_t[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_EQ(11544, buffer[0]); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); +} + +TEST_F(TransientFileUtilsTest, ReadInt16FromFileToFloatBuffer) { + std::string test_filename = kTestFileName; + + scoped_ptr file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + scoped_ptr buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToFloatBuffer(file.get(), + kBufferLength, + buffer.get())); + + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +TEST_F(TransientFileUtilsTest, ReadInt16FromFileToDoubleBuffer) { + std::string test_filename = kTestFileName; + + scoped_ptr file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + scoped_ptr buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(file.get(), + kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToDoubleBuffer(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +TEST_F(TransientFileUtilsTest, ReadFloatBufferFromFile) { + std::string test_filename = kTestFileNamef; + + scoped_ptr file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileNamef.c_str(); + + const size_t kBufferLength = 3; + scoped_ptr buffer(new float[kBufferLength]); + + + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); +} + +TEST_F(TransientFileUtilsTest, ReadDoubleBufferFromFile) { + std::string test_filename = kTestFileName; + + scoped_ptr file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 3; + scoped_ptr buffer(new double[kBufferLength]); + + + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); +} + +TEST_F(TransientFileUtilsTest, WriteInt16BufferToFile) { + scoped_ptr file(FileWrapper::Create()); + + std::string kOutFileName = test::OutputPath() + "utils_test.out"; + + file->OpenFile(kOutFileName.c_str(), + false, // Write mode. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + scoped_ptr written_buffer(new int16_t[kBufferLength]); + scoped_ptr read_buffer(new int16_t[kBufferLength]); + + written_buffer[0] = 1; + written_buffer[1] = 2; + written_buffer[2] = 3; + + EXPECT_EQ(kBufferLength, WriteInt16BufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), + true, // Read only. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +TEST_F(TransientFileUtilsTest, WriteFloatBufferToFile) { + scoped_ptr file(FileWrapper::Create()); + + std::string kOutFileName = test::OutputPath() + "utils_test.out"; + + file->OpenFile(kOutFileName.c_str(), + false, // Write mode. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + scoped_ptr written_buffer(new float[kBufferLength]); + scoped_ptr read_buffer(new float[kBufferLength]); + + written_buffer[0] = kPi; + written_buffer[1] = kE; + written_buffer[2] = kAvogadro; + + EXPECT_EQ(kBufferLength, WriteFloatBufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), + true, // Read only. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +TEST_F(TransientFileUtilsTest, WriteDoubleBufferToFile) { + scoped_ptr file(FileWrapper::Create()); + + std::string kOutFileName = test::OutputPath() + "utils_test.out"; + + file->OpenFile(kOutFileName.c_str(), + false, // Write mode. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + scoped_ptr written_buffer(new double[kBufferLength]); + scoped_ptr read_buffer(new double[kBufferLength]); + + written_buffer[0] = kPi; + written_buffer[1] = kE; + written_buffer[2] = kAvogadro; + + EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), + true, // Read only. + false, // No loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +TEST_F(TransientFileUtilsTest, ExpectedErrorReturnValues) { + std::string test_filename = kTestFileName; + + double value; + scoped_ptr int16_buffer(new int16_t[1]); + scoped_ptr double_buffer(new double[1]); + scoped_ptr file(FileWrapper::Create()); + + EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value)); + EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL)); + + EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL)); + + // Tests with file not opened. + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), + 1, + double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, double_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, double_buffer.get())); + + file->OpenFile(test_filename.c_str(), + true, // Read only. + true, // Loop. + false); // No text. + ASSERT_TRUE(file->Open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 0, int16_buffer.get())); + + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), + 0, + double_buffer.get())); + + EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 0, double_buffer.get())); + + EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 0, int16_buffer.get())); + + EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 0, double_buffer.get())); +} + +} // namespace webrtc + diff --git a/webrtc/modules/audio_processing/transient/moving_moments.cc b/webrtc/modules/audio_processing/transient/moving_moments.cc new file mode 100644 index 0000000000..e116832124 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/moving_moments.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/moving_moments.h" + +#include +#include + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +MovingMoments::MovingMoments(size_t length) + : length_(length), + queue_(), + sum_(0.0), + sum_of_squares_(0.0) { + assert(length > 0); + for (size_t i = 0; i < length; ++i) { + queue_.push(0.0); + } +} + +MovingMoments::~MovingMoments() {} + +void MovingMoments::CalculateMoments(const float* in, size_t in_length, + float* first, float* second) { + assert(in && in_length > 0 && first && second); + + for (size_t i = 0; i < in_length; ++i) { + const float old_value = queue_.front(); + queue_.pop(); + queue_.push(in[i]); + + sum_ += in[i] - old_value; + sum_of_squares_ += in[i] * in[i] - old_value * old_value; + first[i] = sum_ / length_; + second[i] = sum_of_squares_ / length_; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/moving_moments.h b/webrtc/modules/audio_processing/transient/moving_moments.h new file mode 100644 index 0000000000..f063e7ce3d --- /dev/null +++ b/webrtc/modules/audio_processing/transient/moving_moments.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ + +#include + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +// Calculates the first and second moments for each value of a buffer taking +// into account a given number of previous values. +// It preserves its state, so it can be multiple-called. +// TODO(chadan): Implement a function that takes a buffer of first moments and a +// buffer of second moments; and calculates the variances. When needed. +// TODO(chadan): Add functionality to update with a buffer but only output are +// the last values of the moments. When needed. +class MovingMoments { + public: + // Creates a Moving Moments object, that uses the last |length| values + // (including the new value introduced in every new calculation). + explicit MovingMoments(size_t length); + ~MovingMoments(); + + // Calculates the new values using |in|. Results will be in the out buffers. + // |first| and |second| must be allocated with at least |in_length|. + void CalculateMoments(const float* in, size_t in_length, + float* first, float* second); + + private: + size_t length_; + // A queue holding the |length_| latest input values. + std::queue queue_; + // Sum of the values of the queue. + float sum_; + // Sum of the squares of the values of the queue. + float sum_of_squares_; +}; + +} // namespace webrtc + + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ diff --git a/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc b/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc new file mode 100644 index 0000000000..14cc5a2ef6 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/moving_moments.h" + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +static const float kTolerance = 0.0001f; + +class MovingMomentsTest : public ::testing::Test { + protected: + static const size_t kMovingMomentsBufferLength = 5; + static const size_t kMaxOutputLength = 20; // Valid for this tests only. + + virtual void SetUp(); + // Calls CalculateMoments and verifies that it produces the expected + // outputs. + void CalculateMomentsAndVerify(const float* input, size_t input_length, + const float* expected_mean, + const float* expected_mean_squares); + + scoped_ptr moving_moments_; + float output_mean_[kMaxOutputLength]; + float output_mean_squares_[kMaxOutputLength]; +}; + +const size_t MovingMomentsTest::kMaxOutputLength; + +void MovingMomentsTest::SetUp() { + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); +} + +void MovingMomentsTest::CalculateMomentsAndVerify( + const float* input, size_t input_length, + const float* expected_mean, + const float* expected_mean_squares) { + ASSERT_LE(input_length, kMaxOutputLength); + + moving_moments_->CalculateMoments(input, + input_length, + output_mean_, + output_mean_squares_); + + for (size_t i = 1; i < input_length; ++i) { + EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance); + EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance); + } +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) { + const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) { + const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {1.f, 2.f, 3.f, 4.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const float expected_mean_squares[kInputLength] = + {5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) { + const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.2f, 0.6f, 1.2f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + const float expected_mean_squares[kInputLength] = + {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) { + const float kInput[] = + {-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {-0.2f, -0.6f, -1.2f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f}; + const float expected_mean_squares[kInputLength] = + {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) { + const size_t kMovingMomentsBufferLength = 4; + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + const float kInput[] = + {1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.25f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = + {0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) { + const float kInput[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares[kInputLength] = + {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f, + 0.0294f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) { + const float kInputFirstCall[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputFirstCallLength = sizeof(kInputFirstCall) / + sizeof(kInputFirstCall[0]); + const float kInputSecondCall[] = {0.29f, 0.31f}; + const size_t kInputSecondCallLength = sizeof(kInputSecondCall) / + sizeof(kInputSecondCall[0]); + const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f}; + const size_t kInputThirdCallLength = sizeof(kInputThirdCall) / + sizeof(kInputThirdCall[0]); + + const float expected_mean_first_call[kInputFirstCallLength] = + {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares_first_call[kInputFirstCallLength] = + {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f, + 0.0294f}; + + const float expected_mean_second_call[kInputSecondCallLength] = + {0.202f, 0.238f}; + const float expected_mean_squares_second_call[kInputSecondCallLength] = + {0.0438f, 0.0596f}; + + const float expected_mean_third_call[kInputThirdCallLength] = + {0.278f, 0.322f, 0.362f, 0.398f}; + const float expected_mean_squares_third_call[kInputThirdCallLength] = + {0.0812f, 0.1076f, 0.134f, 0.1614f}; + + CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength, + expected_mean_first_call, expected_mean_squares_first_call); + + CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength, + expected_mean_second_call, expected_mean_squares_second_call); + + CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength, + expected_mean_third_call, expected_mean_squares_third_call); +} + +TEST_F(MovingMomentsTest, + VerifySampleBasedVsBlockBasedCalculation) { + const float kInput[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + float output_mean_block_based[kInputLength]; + float output_mean_squares_block_based[kInputLength]; + + float output_mean_sample_based; + float output_mean_squares_sample_based; + + moving_moments_->CalculateMoments( + kInput, kInputLength, output_mean_block_based, + output_mean_squares_block_based); + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + for (size_t i = 0; i < kInputLength; ++i) { + moving_moments_->CalculateMoments( + &kInput[i], 1, &output_mean_sample_based, + &output_mean_squares_sample_based); + EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based); + EXPECT_FLOAT_EQ(output_mean_squares_block_based[i], + output_mean_squares_sample_based); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/test/plotDetection.m b/webrtc/modules/audio_processing/transient/test/plotDetection.m new file mode 100644 index 0000000000..f81ad50482 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/test/plotDetection.m @@ -0,0 +1,12 @@ +function [] = plotDetection(PCMfile, DATfile, fs, chunkSize) +%[] = plotDetection(PCMfile, DATfile, fs, chunkSize) +% +%Plots the signal alongside the detection values. +% +%PCMfile: The file of the input signal in PCM format. +%DATfile: The file containing the detection values in binary float format. +%fs: The sample rate of the signal in Hertz. +%chunkSize: The chunk size used to compute the detection values in seconds. +[x, tx] = readPCM(PCMfile, fs); +[d, td] = readDetection(DATfile, fs, chunkSize); +plot(tx, x, td, d); diff --git a/webrtc/modules/audio_processing/transient/test/readDetection.m b/webrtc/modules/audio_processing/transient/test/readDetection.m new file mode 100644 index 0000000000..1a9e8a7e64 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/test/readDetection.m @@ -0,0 +1,16 @@ +function [d, t] = readDetection(file, fs, chunkSize) +%[d, t] = readDetection(file, fs, chunkSize) +% +%Reads a detection signal from a DAT file. +% +%d: The detection signal. +%t: The respective time vector. +% +%file: The DAT file where the detection signal is stored in float format. +%fs: The signal sample rate in Hertz. +%chunkSize: The chunk size used for the detection in seconds. +fid = fopen(file); +d = fread(fid, inf, 'float'); +fclose(fid); +t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs); +d = d(floor(t / chunkSize) + 1); diff --git a/webrtc/modules/audio_processing/transient/test/readPCM.m b/webrtc/modules/audio_processing/transient/test/readPCM.m new file mode 100644 index 0000000000..47ccac33e7 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/test/readPCM.m @@ -0,0 +1,16 @@ +function [x, t] = readPCM(file, fs) +%[x, t] = readPCM(file, fs) +% +%Reads a signal from a PCM file. +% +%x: The read signal after normalization. +%t: The respective time vector. +% +%file: The PCM file where the signal is stored in int16 format. +%fs: The signal sample rate in Hertz. +fid = fopen(file); +x = fread(fid, inf, 'int16'); +fclose(fid); +x = x - mean(x); +x = x / max(abs(x)); +t = 0:(1 / fs):((length(x) - 1) / fs); diff --git a/webrtc/modules/audio_processing/transient/transient_detector.cc b/webrtc/modules/audio_processing/transient/transient_detector.cc new file mode 100644 index 0000000000..7f021ac7b3 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_detector.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_detector.h" + +#include +#include +#include +#include + +#include "webrtc/modules/audio_processing/transient/common.h" +#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "webrtc/modules/audio_processing/transient/moving_moments.h" +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +static const int kTransientLengthMs = 30; +static const int kChunksAtStartupLeftToDelete = + kTransientLengthMs / ts::kChunkSizeMs; +static const float kDetectThreshold = 16.f; + +TransientDetector::TransientDetector(int sample_rate_hz) + : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), + last_first_moment_(), + last_second_moment_(), + chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), + reference_energy_(1.f), + using_reference_(false) { + assert(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; + // Adjustment to avoid data loss while downsampling, making + // |samples_per_chunk_| and |samples_per_transient| always divisible by + // |kLeaves|. + samples_per_chunk_ -= samples_per_chunk_ % kLeaves; + samples_per_transient -= samples_per_transient % kLeaves; + + tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; + wpd_tree_.reset(new WPDTree(samples_per_chunk_, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, + kLevels)); + for (size_t i = 0; i < kLeaves; ++i) { + moving_moments_[i].reset( + new MovingMoments(samples_per_transient / kLeaves)); + } + + first_moments_.reset(new float[tree_leaves_data_length_]); + second_moments_.reset(new float[tree_leaves_data_length_]); + + for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { + previous_results_.push_back(0.f); + } +} + +TransientDetector::~TransientDetector() {} + +float TransientDetector::Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length) { + assert(data && data_length == samples_per_chunk_); + + // TODO(aluebs): Check if these errors can logically happen and if not assert + // on them. + if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { + return -1.f; + } + + float result = 0.f; + + for (size_t i = 0; i < kLeaves; ++i) { + WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); + + moving_moments_[i]->CalculateMoments(leaf->data(), + tree_leaves_data_length_, + first_moments_.get(), + second_moments_.get()); + + // Add value delayed (Use the last moments from the last call to Detect). + float unbiased_data = leaf->data()[0] - last_first_moment_[i]; + result += + unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); + + // Add new values. + for (size_t j = 1; j < tree_leaves_data_length_; ++j) { + unbiased_data = leaf->data()[j] - first_moments_[j - 1]; + result += + unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); + } + + last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; + last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; + } + + result /= tree_leaves_data_length_; + + result *= ReferenceDetectionValue(reference_data, reference_length); + + if (chunks_at_startup_left_to_delete_ > 0) { + chunks_at_startup_left_to_delete_--; + result = 0.f; + } + + if (result >= kDetectThreshold) { + result = 1.f; + } else { + // Get proportional value. + // Proportion achieved with a squared raised cosine function with domain + // [0, kDetectThreshold) and image [0, 1), it's always increasing. + const float horizontal_scaling = ts::kPi / kDetectThreshold; + const float kHorizontalShift = ts::kPi; + const float kVerticalScaling = 0.5f; + const float kVerticalShift = 1.f; + + result = (cos(result * horizontal_scaling + kHorizontalShift) + + kVerticalShift) * kVerticalScaling; + result *= result; + } + + previous_results_.pop_front(); + previous_results_.push_back(result); + + // In the current implementation we return the max of the current result and + // the previous results, so the high results have a width equals to + // |transient_length|. + return *std::max_element(previous_results_.begin(), previous_results_.end()); +} + +// Looks for the highest slope and compares it with the previous ones. +// An exponential transformation takes this to the [0, 1] range. This value is +// multiplied by the detection result to avoid false positives. +float TransientDetector::ReferenceDetectionValue(const float* data, + size_t length) { + if (data == NULL) { + using_reference_ = false; + return 1.f; + } + static const float kEnergyRatioThreshold = 0.2f; + static const float kReferenceNonLinearity = 20.f; + static const float kMemory = 0.99f; + float reference_energy = 0.f; + for (size_t i = 1; i < length; ++i) { + reference_energy += data[i] * data[i]; + } + if (reference_energy == 0.f) { + using_reference_ = false; + return 1.f; + } + assert(reference_energy_ != 0); + float result = 1.f / (1.f + exp(kReferenceNonLinearity * + (kEnergyRatioThreshold - + reference_energy / reference_energy_))); + reference_energy_ = + kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; + + using_reference_ = true; + + return result; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/transient_detector.h b/webrtc/modules/audio_processing/transient/transient_detector.h new file mode 100644 index 0000000000..04691d56ce --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_detector.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ + +#include + +#include "webrtc/modules/audio_processing/transient/moving_moments.h" +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +// This is an implementation of the transient detector described in "Causal +// Wavelet based transient detector". +// Calculates the log-likelihood of a transient to happen on a signal at any +// given time based on the previous samples; it uses a WPD tree to analyze the +// signal. It preserves its state, so it can be multiple-called. +class TransientDetector { + public: + // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree + // of 3 levels. Make an overloaded constructor to allow different wavelets and + // depths of the tree. When needed. + + // Creates a wavelet based transient detector. + TransientDetector(int sample_rate_hz); + + ~TransientDetector(); + + // Calculates the log-likelihood of the existence of a transient in |data|. + // |data_length| has to be equal to |samples_per_chunk_|. + // Returns a value between 0 and 1, as a non linear representation of this + // likelihood. + // Returns a negative value on error. + float Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length); + + bool using_reference() { return using_reference_; } + + private: + float ReferenceDetectionValue(const float* data, size_t length); + + static const size_t kLevels = 3; + static const size_t kLeaves = 1 << kLevels; + + size_t samples_per_chunk_; + + scoped_ptr wpd_tree_; + size_t tree_leaves_data_length_; + + // A MovingMoments object is needed for each leaf in the WPD tree. + scoped_ptr moving_moments_[kLeaves]; + + scoped_ptr first_moments_; + scoped_ptr second_moments_; + + // Stores the last calculated moments from the previous detection. + float last_first_moment_[kLeaves]; + float last_second_moment_[kLeaves]; + + // We keep track of the previous results from the previous chunks, so it can + // be used to effectively give results according to the |transient_length|. + std::deque previous_results_; + + // Number of chunks that are going to return only zeros at the beginning of + // the detection. It helps to avoid infs and nans due to the lack of + // information. + int chunks_at_startup_left_to_delete_; + + float reference_energy_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ diff --git a/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc new file mode 100644 index 0000000000..ee8619f310 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_detector.h" + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/transient/common.h" +#include "webrtc/modules/audio_processing/transient/file_utils.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const int kSampleRatesHz[] = {ts::kSampleRate8kHz, + ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz}; +static const size_t kNumberOfSampleRates = + sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz); + +// This test is for the correctness of the transient detector. +// Checks the results comparing them with the ones stored in the detect files in +// the directory: resources/audio_processing/transient/ +// The files contain all the results in double precision (Little endian). +// The audio files used with different sample rates are stored in the same +// directory. +TEST(TransientDetectorTest, CorrectnessBasedOnFiles) { + for (size_t i = 0; i < kNumberOfSampleRates; ++i) { + int sample_rate_hz = kSampleRatesHz[i]; + + // Prepare detect file. + std::stringstream detect_file_name; + detect_file_name << "audio_processing/transient/detect" + << (sample_rate_hz / 1000) << "kHz"; + + scoped_ptr detect_file(FileWrapper::Create()); + + detect_file->OpenFile( + test::ResourcePath(detect_file_name.str(), "dat").c_str(), + true, // Read only. + false, // No loop. + false); // No text. + + bool file_opened = detect_file->Open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" + << detect_file_name.str().c_str(); + + // Prepare audio file. + std::stringstream audio_file_name; + audio_file_name << "audio_processing/transient/audio" + << (sample_rate_hz / 1000) << "kHz"; + + scoped_ptr audio_file(FileWrapper::Create()); + + audio_file->OpenFile( + test::ResourcePath(audio_file_name.str(), "pcm").c_str(), + true, // Read only. + false, // No loop. + false); // No text. + + // Create detector. + TransientDetector detector(sample_rate_hz); + + const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000; + scoped_ptr buffer(new float[buffer_length]); + + const float kTolerance = 0.01f; + + size_t frames_read = 0; + + while (ReadInt16FromFileToFloatBuffer(audio_file.get(), + buffer_length, + buffer.get()) == buffer_length) { + ++frames_read; + + float detector_value = + detector.Detect(buffer.get(), buffer_length, NULL, 0); + double file_value; + ASSERT_EQ(1u, ReadDoubleBufferFromFile(detect_file.get(), 1, &file_value)) + << "Detect test file is malformed.\n"; + + // Compare results with data from the matlab test file. + EXPECT_NEAR(file_value, detector_value, kTolerance) << "Frame: " + << frames_read; + } + + detect_file->CloseFile(); + audio_file->CloseFile(); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/transient_suppression_test.cc b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc new file mode 100644 index 0000000000..a4c2ef1a05 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" + +#include +#include +#include + +#include "gflags/gflags.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/modules/audio_processing/agc/agc.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/typedefs.h" + +DEFINE_string(in_file_name, "", "PCM file that contains the signal."); +DEFINE_string(detection_file_name, + "", + "PCM file that contains the detection signal."); +DEFINE_string(reference_file_name, + "", + "PCM file that contains the reference signal."); + +static bool ValidatePositiveInt(const char* flagname, int32_t value) { + if (value <= 0) { + printf("%s must be a positive integer.\n", flagname); + return false; + } + return true; +} +DEFINE_int32(chunk_size_ms, + 10, + "Time between each chunk of samples in milliseconds."); +static const bool chunk_size_ms_dummy = + google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt); + +DEFINE_int32(sample_rate_hz, + 16000, + "Sampling frequency of the signal in Hertz."); +static const bool sample_rate_hz_dummy = + google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt); +DEFINE_int32(detection_rate_hz, + 0, + "Sampling frequency of the detection signal in Hertz."); + +DEFINE_int32(num_channels, 1, "Number of channels."); +static const bool num_channels_dummy = + google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt); + +namespace webrtc { + +const char kUsage[] = + "\nDetects and suppresses transients from file.\n\n" + "This application loads the signal from the in_file_name with a specific\n" + "num_channels and sample_rate_hz, the detection signal from the\n" + "detection_file_name with a specific detection_rate_hz, and the reference\n" + "signal from the reference_file_name with sample_rate_hz, divides them\n" + "into chunk_size_ms blocks, computes its voice value and depending on the\n" + "voice_threshold does the respective restoration. You can always get the\n" + "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" + "1 respectively.\n\n"; + +// Read next buffers from the test files (signed 16-bit host-endian PCM +// format). audio_buffer has int16 samples, detection_buffer has float samples +// with range [-32768,32767], and reference_buffer has float samples with range +// [-1,1]. Return true iff all the buffers were filled completely. +bool ReadBuffers(FILE* in_file, + size_t audio_buffer_size, + int num_channels, + int16_t* audio_buffer, + FILE* detection_file, + size_t detection_buffer_size, + float* detection_buffer, + FILE* reference_file, + float* reference_buffer) { + scoped_ptr tmpbuf; + int16_t* read_ptr = audio_buffer; + if (num_channels > 1) { + tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); + read_ptr = tmpbuf.get(); + } + if (fread(read_ptr, + sizeof(*read_ptr), + num_channels * audio_buffer_size, + in_file) != num_channels * audio_buffer_size) { + return false; + } + // De-interleave. + if (num_channels > 1) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < audio_buffer_size; ++j) { + audio_buffer[i * audio_buffer_size + j] = + read_ptr[i + j * num_channels]; + } + } + } + if (detection_file) { + scoped_ptr ibuf(new int16_t[detection_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, + detection_file) != detection_buffer_size) + return false; + for (size_t i = 0; i < detection_buffer_size; ++i) + detection_buffer[i] = ibuf[i]; + } + if (reference_file) { + scoped_ptr ibuf(new int16_t[audio_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) + != audio_buffer_size) + return false; + S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); + } + return true; +} + +// Write a number of samples to an open signed 16-bit host-endian PCM file. +static void WritePCM(FILE* f, + size_t num_samples, + int num_channels, + const float* buffer) { + scoped_ptr ibuf(new int16_t[num_channels * num_samples]); + // Interleave. + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_samples; ++j) { + ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); + } + } + fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); +} + +// This application tests the transient suppression by providing a processed +// PCM file, which has to be listened to in order to evaluate the +// performance. +// It gets an audio file, and its voice gain information, and the suppressor +// process it giving the output file "suppressed_keystrokes.pcm". +void void_main() { + // TODO(aluebs): Remove all FileWrappers. + // Prepare the input file. + FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + + // Prepare the detection file. + FILE* detection_file = NULL; + if (FLAGS_detection_file_name != "") { + detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb"); + } + + // Prepare the reference file. + FILE* reference_file = NULL; + if (FLAGS_reference_file_name != "") { + reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb"); + } + + // Prepare the output file. + std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; + FILE* out_file = fopen(out_file_name.c_str(), "wb"); + ASSERT_TRUE(out_file != NULL); + + int detection_rate_hz = FLAGS_detection_rate_hz; + if (detection_rate_hz == 0) { + detection_rate_hz = FLAGS_sample_rate_hz; + } + + Agc agc; + + TransientSuppressor suppressor; + suppressor.Initialize( + FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels); + + const size_t audio_buffer_size = + FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000; + const size_t detection_buffer_size = + FLAGS_chunk_size_ms * detection_rate_hz / 1000; + + // int16 and float variants of the same data. + scoped_ptr audio_buffer_i( + new int16_t[FLAGS_num_channels * audio_buffer_size]); + scoped_ptr audio_buffer_f( + new float[FLAGS_num_channels * audio_buffer_size]); + + scoped_ptr detection_buffer, reference_buffer; + + if (detection_file) + detection_buffer.reset(new float[detection_buffer_size]); + if (reference_file) + reference_buffer.reset(new float[audio_buffer_size]); + + while (ReadBuffers(in_file, + audio_buffer_size, + FLAGS_num_channels, + audio_buffer_i.get(), + detection_file, + detection_buffer_size, + detection_buffer.get(), + reference_file, + reference_buffer.get())) { + ASSERT_EQ(0, + agc.Process(audio_buffer_i.get(), + static_cast(audio_buffer_size), + FLAGS_sample_rate_hz)) + << "The AGC could not process the frame"; + + for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) { + audio_buffer_f[i] = audio_buffer_i[i]; + } + + ASSERT_EQ(0, + suppressor.Suppress(audio_buffer_f.get(), + audio_buffer_size, + FLAGS_num_channels, + detection_buffer.get(), + detection_buffer_size, + reference_buffer.get(), + audio_buffer_size, + agc.voice_probability(), + true)) + << "The transient suppressor could not suppress the frame"; + + // Write result to out file. + WritePCM( + out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get()); + } + + fclose(in_file); + if (detection_file) { + fclose(detection_file); + } + if (reference_file) { + fclose(reference_file); + } + fclose(out_file); +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + google::SetUsageMessage(webrtc::kUsage); + google::ParseCommandLineFlags(&argc, &argv, true); + webrtc::void_main(); + return 0; +} diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.cc b/webrtc/modules/audio_processing/transient/transient_suppressor.cc new file mode 100644 index 0000000000..7eb302bdff --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppressor.cc @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" + +#include +#include +#include +#include +#include +#include + +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/transient/common.h" +#include "webrtc/modules/audio_processing/transient/transient_detector.h" +#include "webrtc/modules/audio_processing/ns/windows_private.h" +extern "C" { +#include "webrtc/modules/audio_processing/utility/fft4g.h" +} +#include "webrtc/system_wrappers/interface/logging.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const float kMeanIIRCoefficient = 0.5f; +static const float kVoiceThreshold = 0.02f; + +// TODO(aluebs): Check if these values work also for 48kHz. +static const size_t kMinVoiceBin = 3; +static const size_t kMaxVoiceBin = 60; + +namespace { +float ComplexMagnitude(float a, float b) { + return std::abs(a) + std::abs(b); +} +} + +TransientSuppressor::TransientSuppressor() + : data_length_(0), + detection_length_(0), + analysis_length_(0), + buffer_delay_(0), + complex_analysis_length_(0), + num_channels_(0), + window_(NULL), + detector_smoothed_(0.f), + keypress_counter_(0), + chunks_since_keypress_(0), + detection_enabled_(false), + suppression_enabled_(false), + use_hard_restoration_(false), + chunks_since_voice_change_(0), + seed_(182), + using_reference_(false) { +} + +TransientSuppressor::~TransientSuppressor() {} + +int TransientSuppressor::Initialize(int sample_rate_hz, + int detection_rate_hz, + int num_channels) { + switch (sample_rate_hz) { + case ts::kSampleRate8kHz: + analysis_length_ = 128u; + window_ = kBlocks80w128; + break; + case ts::kSampleRate16kHz: + analysis_length_ = 256u; + window_ = kBlocks160w256; + break; + case ts::kSampleRate32kHz: + analysis_length_ = 512u; + window_ = kBlocks320w512; + break; + case ts::kSampleRate48kHz: + analysis_length_ = 1024u; + window_ = kBlocks480w1024; + break; + default: + return -1; + } + if (detection_rate_hz != ts::kSampleRate8kHz && + detection_rate_hz != ts::kSampleRate16kHz && + detection_rate_hz != ts::kSampleRate32kHz && + detection_rate_hz != ts::kSampleRate48kHz) { + return -1; + } + if (num_channels <= 0) { + return -1; + } + + detector_.reset(new TransientDetector(detection_rate_hz)); + data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000; + if (data_length_ > analysis_length_) { + assert(false); + return -1; + } + buffer_delay_ = analysis_length_ - data_length_; + + complex_analysis_length_ = analysis_length_ / 2 + 1; + assert(complex_analysis_length_ >= kMaxVoiceBin); + num_channels_ = num_channels; + in_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(in_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(in_buffer_[0])); + detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000; + detection_buffer_.reset(new float[detection_length_]); + memset(detection_buffer_.get(), + 0, + detection_length_ * sizeof(detection_buffer_[0])); + out_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(out_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(out_buffer_[0])); + // ip[0] must be zero to trigger initialization using rdft(). + size_t ip_length = 2 + sqrtf(analysis_length_); + ip_.reset(new int[ip_length]()); + memset(ip_.get(), 0, ip_length * sizeof(ip_[0])); + wfft_.reset(new float[complex_analysis_length_ - 1]); + memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0])); + spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]); + memset(spectral_mean_.get(), + 0, + complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0])); + fft_buffer_.reset(new float[analysis_length_ + 2]); + memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0])); + magnitudes_.reset(new float[complex_analysis_length_]); + memset(magnitudes_.get(), + 0, + complex_analysis_length_ * sizeof(magnitudes_[0])); + mean_factor_.reset(new float[complex_analysis_length_]); + + static const float kFactorHeight = 10.f; + static const float kLowSlope = 1.f; + static const float kHighSlope = 0.3f; + for (size_t i = 0; i < complex_analysis_length_; ++i) { + mean_factor_[i] = + kFactorHeight / + (1.f + exp(kLowSlope * static_cast(i - kMinVoiceBin))) + + kFactorHeight / + (1.f + exp(kHighSlope * static_cast(kMaxVoiceBin - i))); + } + detector_smoothed_ = 0.f; + keypress_counter_ = 0; + chunks_since_keypress_ = 0; + detection_enabled_ = false; + suppression_enabled_ = false; + use_hard_restoration_ = false; + chunks_since_voice_change_ = 0; + seed_ = 182; + using_reference_ = false; + return 0; +} + +int TransientSuppressor::Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) { + if (!data || data_length != data_length_ || num_channels != num_channels_ || + detection_length != detection_length_ || voice_probability < 0 || + voice_probability > 1) { + return -1; + } + + UpdateKeypress(key_pressed); + UpdateBuffers(data); + + int result = 0; + if (detection_enabled_) { + UpdateRestoration(voice_probability); + + if (!detection_data) { + // Use the input data of the first channel if special detection data is + // not supplied. + detection_data = &in_buffer_[buffer_delay_]; + } + + float detector_result = detector_->Detect( + detection_data, detection_length, reference_data, reference_length); + if (detector_result < 0) { + return -1; + } + + using_reference_ = detector_->using_reference(); + + // |detector_smoothed_| follows the |detector_result| when this last one is + // increasing, but has an exponential decaying tail to be able to suppress + // the ringing of keyclicks. + float smooth_factor = using_reference_ ? 0.6 : 0.1; + detector_smoothed_ = detector_result >= detector_smoothed_ + ? detector_result + : smooth_factor * detector_smoothed_ + + (1 - smooth_factor) * detector_result; + + for (int i = 0; i < num_channels_; ++i) { + Suppress(&in_buffer_[i * analysis_length_], + &spectral_mean_[i * complex_analysis_length_], + &out_buffer_[i * analysis_length_]); + } + } + + // If the suppression isn't enabled, we use the in buffer to delay the signal + // appropriately. This also gives time for the out buffer to be refreshed with + // new data between detection and suppression getting enabled. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&data[i * data_length_], + suppression_enabled_ ? &out_buffer_[i * analysis_length_] + : &in_buffer_[i * analysis_length_], + data_length_ * sizeof(*data)); + } + return result; +} + +// This should only be called when detection is enabled. UpdateBuffers() must +// have been called. At return, |out_buffer_| will be filled with the +// processed output. +void TransientSuppressor::Suppress(float* in_ptr, + float* spectral_mean, + float* out_ptr) { + // Go to frequency domain. + for (size_t i = 0; i < analysis_length_; ++i) { + // TODO(aluebs): Rename windows + fft_buffer_[i] = in_ptr[i] * window_[i]; + } + + WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get()); + + // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end + // for convenience. + fft_buffer_[analysis_length_] = fft_buffer_[1]; + fft_buffer_[analysis_length_ + 1] = 0.f; + fft_buffer_[1] = 0.f; + + for (size_t i = 0; i < complex_analysis_length_; ++i) { + magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2], + fft_buffer_[i * 2 + 1]); + } + // Restore audio if necessary. + if (suppression_enabled_) { + if (use_hard_restoration_) { + HardRestoration(spectral_mean); + } else { + SoftRestoration(spectral_mean); + } + } + + // Update the spectral mean. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] + + kMeanIIRCoefficient * magnitudes_[i]; + } + + // Back to time domain. + // Put R[n/2] back in fft_buffer_[1]. + fft_buffer_[1] = fft_buffer_[analysis_length_]; + + WebRtc_rdft(analysis_length_, + -1, + fft_buffer_.get(), + ip_.get(), + wfft_.get()); + const float fft_scaling = 2.f / analysis_length_; + + for (size_t i = 0; i < analysis_length_; ++i) { + out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling; + } +} + +void TransientSuppressor::UpdateKeypress(bool key_pressed) { + const int kKeypressPenalty = 1000 / ts::kChunkSizeMs; + const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs; + const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds. + + if (key_pressed) { + keypress_counter_ += kKeypressPenalty; + chunks_since_keypress_ = 0; + detection_enabled_ = true; + } + keypress_counter_ = std::max(0, keypress_counter_ - 1); + + if (keypress_counter_ > kIsTypingThreshold) { + if (!suppression_enabled_) { + LOG(LS_INFO) << "[ts] Transient suppression is now enabled."; + } + suppression_enabled_ = true; + keypress_counter_ = 0; + } + + if (detection_enabled_ && + ++chunks_since_keypress_ > kChunksUntilNotTyping) { + if (suppression_enabled_) { + LOG(LS_INFO) << "[ts] Transient suppression is now disabled."; + } + detection_enabled_ = false; + suppression_enabled_ = false; + keypress_counter_ = 0; + } +} + +void TransientSuppressor::UpdateRestoration(float voice_probability) { + const int kHardRestorationOffsetDelay = 3; + const int kHardRestorationOnsetDelay = 80; + + bool not_voiced = voice_probability < kVoiceThreshold; + + if (not_voiced == use_hard_restoration_) { + chunks_since_voice_change_ = 0; + } else { + ++chunks_since_voice_change_; + + if ((use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOffsetDelay) || + (!use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOnsetDelay)) { + use_hard_restoration_ = not_voiced; + chunks_since_voice_change_ = 0; + } + } +} + +// Shift buffers to make way for new data. Must be called after +// |detection_enabled_| is updated by UpdateKeypress(). +void TransientSuppressor::UpdateBuffers(float* data) { + // TODO(aluebs): Change to ring buffer. + memmove(in_buffer_.get(), + &in_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(in_buffer_[0])); + // Copy new chunk to buffer. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_], + &data[i * data_length_], + data_length_ * sizeof(*data)); + } + if (detection_enabled_) { + // Shift previous chunk in out buffer. + memmove(out_buffer_.get(), + &out_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(out_buffer_[0])); + // Initialize new chunk in out buffer. + for (int i = 0; i < num_channels_; ++i) { + memset(&out_buffer_[buffer_delay_ + i * analysis_length_], + 0, + data_length_ * sizeof(out_buffer_[0])); + } + } +} + +// Restores the unvoiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::HardRestoration(float* spectral_mean) { + const float detector_result = + 1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f); + // To restore, we get the peaks in the spectrum. If higher than the previous + // spectral mean we adjust them. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) { + // RandU() generates values on [0, int16::max()] + const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) / + std::numeric_limits::max(); + const float scaled_mean = detector_result * spectral_mean[i]; + + fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] + + scaled_mean * cosf(phase); + fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] + + scaled_mean * sinf(phase); + magnitudes_[i] = magnitudes_[i] - + detector_result * (magnitudes_[i] - spectral_mean[i]); + } + } +} + +// Restores the voiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean and that is lower than some function of the current block +// frequency mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::SoftRestoration(float* spectral_mean) { + // Get the spectral magnitude mean of the current block. + float block_frequency_mean = 0; + for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) { + block_frequency_mean += magnitudes_[i]; + } + block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin); + + // To restore, we get the peaks in the spectrum. If higher than the + // previous spectral mean and lower than a factor of the block mean + // we adjust them. The factor is a double sigmoid that has a minimum in the + // voice frequency range (300Hz - 3kHz). + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 && + (using_reference_ || + magnitudes_[i] < block_frequency_mean * mean_factor_[i])) { + const float new_magnitude = + magnitudes_[i] - + detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]); + const float magnitude_ratio = new_magnitude / magnitudes_[i]; + + fft_buffer_[i * 2] *= magnitude_ratio; + fft_buffer_[i * 2 + 1] *= magnitude_ratio; + magnitudes_[i] = new_magnitude; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.h b/webrtc/modules/audio_processing/transient/transient_suppressor.h new file mode 100644 index 0000000000..3d7dba836f --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppressor.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ + +#include +#include + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/gtest_prod_util.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class TransientDetector; + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressor { + public: + TransientSuppressor(); + ~TransientSuppressor(); + + int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels); + + // Processes a |data| chunk, and returns it with keystrokes suppressed from + // it. The float format is assumed to be int16 ranged. If there are more than + // one channel, the chunks are concatenated one after the other in |data|. + // |data_length| must be equal to |data_length_|. + // |num_channels| must be equal to |num_channels_|. + // A sub-band, ideally the higher, can be used as |detection_data|. If it is + // NULL, |data| is used for the detection too. The |detection_data| is always + // assumed mono. + // If a reference signal (e.g. keyboard microphone) is available, it can be + // passed in as |reference_data|. It is assumed mono and must have the same + // length as |data|. NULL is accepted if unavailable. + // This suppressor performs better if voice information is available. + // |voice_probability| is the probability of voice being present in this chunk + // of audio. If voice information is not available, |voice_probability| must + // always be set to 1. + // |key_pressed| determines if a key was pressed on this audio chunk. + // Returns 0 on success and -1 otherwise. + int Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed); + + private: + FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest, + TypingDetectionLogicWorksAsExpectedForMono); + void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr); + + void UpdateKeypress(bool key_pressed); + void UpdateRestoration(float voice_probability); + + void UpdateBuffers(float* data); + + void HardRestoration(float* spectral_mean); + void SoftRestoration(float* spectral_mean); + + scoped_ptr detector_; + + size_t data_length_; + size_t detection_length_; + size_t analysis_length_; + size_t buffer_delay_; + size_t complex_analysis_length_; + int num_channels_; + // Input buffer where the original samples are stored. + scoped_ptr in_buffer_; + scoped_ptr detection_buffer_; + // Output buffer where the restored samples are stored. + scoped_ptr out_buffer_; + + // Arrays for fft. + scoped_ptr ip_; + scoped_ptr wfft_; + + scoped_ptr spectral_mean_; + + // Stores the data for the fft. + scoped_ptr fft_buffer_; + + scoped_ptr magnitudes_; + + const float* window_; + + scoped_ptr mean_factor_; + + float detector_smoothed_; + + int keypress_counter_; + int chunks_since_keypress_; + bool detection_enabled_; + bool suppression_enabled_; + + bool use_hard_restoration_; + int chunks_since_voice_change_; + + uint32_t seed_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc b/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc new file mode 100644 index 0000000000..0c1010e090 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/transient/common.h" + +namespace webrtc { + +TEST(TransientSuppressorTest, TypingDetectionLogicWorksAsExpectedForMono) { + static const int kNumChannels = 1; + + TransientSuppressor ts; + ts.Initialize(ts::kSampleRate16kHz, ts::kSampleRate16kHz, kNumChannels); + + // Each key-press enables detection. + EXPECT_FALSE(ts.detection_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + + // It takes four seconds without any key-press to disable the detection + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + } + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + + // Key-presses that are more than a second apart from each other don't enable + // suppression. + for (int i = 0; i < 100; ++i) { + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } + ts.UpdateKeypress(false); + } + + // Two consecutive key-presses is enough to enable the suppression. + ts.UpdateKeypress(true); + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.suppression_enabled_); + + // Key-presses that are less than a second apart from each other don't disable + // detection nor suppression. + for (int i = 0; i < 100; ++i) { + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + + // It takes four seconds without any key-press to disable the detection and + // suppression. + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_node.cc b/webrtc/modules/audio_processing/transient/wpd_node.cc new file mode 100644 index 0000000000..8854516bca --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_node.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_node.h" + +#include +#include +#include + +#include "webrtc/common_audio/fir_filter.h" +#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +WPDNode::WPDNode(size_t length, + const float* coefficients, + size_t coefficients_length) + : // The data buffer has parent data length to be able to contain and filter + // it. + data_(new float[2 * length + 1]), + length_(length), + filter_(FIRFilter::Create(coefficients, + coefficients_length, + 2 * length + 1)) { + assert(length > 0 && coefficients && coefficients_length > 0); + memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0])); +} + +WPDNode::~WPDNode() {} + +int WPDNode::Update(const float* parent_data, size_t parent_data_length) { + if (!parent_data || (parent_data_length / 2) != length_) { + return -1; + } + + // Filter data. + filter_->Filter(parent_data, parent_data_length, data_.get()); + + // Decimate data. + const bool kOddSequence = true; + size_t output_samples = DyadicDecimate( + data_.get(), parent_data_length, kOddSequence, data_.get(), length_); + if (output_samples != length_) { + return -1; + } + + // Get abs to all values. + for (size_t i = 0; i < length_; ++i) { + data_[i] = fabs(data_[i]); + } + + return 0; +} + +int WPDNode::set_data(const float* new_data, size_t length) { + if (!new_data || length != length_) { + return -1; + } + memcpy(data_.get(), new_data, length * sizeof(data_[0])); + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_node.h b/webrtc/modules/audio_processing/transient/wpd_node.h new file mode 100644 index 0000000000..d7c2463bcb --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_node.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class FIRFilter; + +// A single node of a Wavelet Packet Decomposition (WPD) tree. +class WPDNode { + public: + // Creates a WPDNode. The data vector will contain zeros. The filter will have + // the coefficients provided. + WPDNode(size_t length, const float* coefficients, size_t coefficients_length); + ~WPDNode(); + + // Updates the node data. |parent_data| / 2 must be equals to |length_|. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* parent_data, size_t parent_data_length); + + const float* data() const { return data_.get(); } + // Returns 0 if correct, and -1 otherwise. + int set_data(const float* new_data, size_t length); + size_t length() const { return length_; } + + private: + scoped_ptr data_; + size_t length_; + scoped_ptr filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ diff --git a/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc b/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc new file mode 100644 index 0000000000..631a6db0a2 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_node.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +static const size_t kDataLength = 5; +static const float kTolerance = 0.0001f; + +static const size_t kParentDataLength = kDataLength * 2; +static const float kParentData[kParentDataLength] = + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f}; + +static const float kCoefficients[] = {0.2f, -0.3f, 0.5f, -0.7f, 0.11f}; +static const size_t kCoefficientsLength = sizeof(kCoefficients) / + sizeof(kCoefficients[0]); + +TEST(WPDNodeTest, Accessors) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.set_data(kParentData, kDataLength)); + EXPECT_EQ(0, memcmp(node.data(), + kParentData, + kDataLength * sizeof(node.data()[0]))); +} + +TEST(WPDNodeTest, UpdateThatOnlyDecimates) { + const float kIndentyCoefficient = 1.f; + WPDNode node(kDataLength, &kIndentyCoefficient, 1); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + for (size_t i = 0; i < kDataLength; ++i) { + EXPECT_FLOAT_EQ(kParentData[i * 2 + 1], node.data()[i]); + } +} + +TEST(WPDNodeTest, UpdateWithArbitraryDataAndArbitraryFilter) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + EXPECT_NEAR(0.1f, node.data()[0], kTolerance); + EXPECT_NEAR(0.2f, node.data()[1], kTolerance); + EXPECT_NEAR(0.18f, node.data()[2], kTolerance); + EXPECT_NEAR(0.56f, node.data()[3], kTolerance); + EXPECT_NEAR(0.94f, node.data()[4], kTolerance); +} + +TEST(WPDNodeTest, ExpectedErrorReturnValue) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(-1, node.Update(kParentData, kParentDataLength - 1)); + EXPECT_EQ(-1, node.Update(NULL, kParentDataLength)); + EXPECT_EQ(-1, node.set_data(kParentData, kDataLength - 1)); + EXPECT_EQ(-1, node.set_data(NULL, kDataLength)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.cc b/webrtc/modules/audio_processing/transient/wpd_tree.cc new file mode 100644 index 0000000000..a3c3ec009e --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_tree.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +#include +#include +#include + +#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h" +#include "webrtc/modules/audio_processing/transient/wpd_node.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients, + const float* low_pass_coefficients, size_t coefficients_length, + int levels) + : data_length_(data_length), + levels_(levels), + num_nodes_((1 << (levels + 1)) - 1) { + assert(data_length > (static_cast(1) << levels) && + high_pass_coefficients && + low_pass_coefficients && + levels > 0); + // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never + // allocated. + nodes_.reset(new scoped_ptr[num_nodes_ + 1]); + + // Create the first node + const float kRootCoefficient = 1.f; // Identity Coefficient. + nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1)); + // Variables used to create the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + // Branching each node in each level to create its children. The last level is + // not branched (all the nodes of that level are leaves). + for (int current_level = 0; current_level < levels; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2, + low_pass_coefficients, + coefficients_length)); + nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2, + high_pass_coefficients, + coefficients_length)); + } + } +} + +WPDTree::~WPDTree() {} + +WPDNode* WPDTree::NodeAt(int level, int index) { + const int kNumNodesAtLevel = 1 << level; + if (level < 0 || level > levels_ || index < 0 || index >= kNumNodesAtLevel) { + return NULL; + } + return nodes_[(1 << level) + index].get(); +} + +int WPDTree::Update(const float* data, size_t data_length) { + if (!data || data_length != data_length_) { + return -1; + } + + // Update the root node. + int update_result = nodes_[1]->set_data(data, data_length); + if (update_result != 0) { + return -1; + } + + // Variables used to update the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + for (int current_level = 0; current_level < levels_; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + + update_result = nodes_[index_left_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + + update_result = nodes_[index_right_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + } + } + + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.h b/webrtc/modules/audio_processing/transient/wpd_tree.h new file mode 100644 index 0000000000..e488c9d18d --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_tree.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ + +#include "webrtc/modules/audio_processing/transient/wpd_node.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +// Tree of a Wavelet Packet Decomposition (WPD). +// +// The root node contains all the data provided; for each node in the tree, the +// left child contains the approximation coefficients extracted from the node, +// and the right child contains the detail coefficients. +// It preserves its state, so it can be multiple-called. +// +// The number of nodes in the tree will be 2 ^ levels - 1. +// +// Implementation details: Since the tree always will be a complete binary tree, +// it is implemented using a single linear array instead of managing the +// relationships in each node. For convience is better to use a array that +// starts in 1 (instead of 0). Taking that into account, the following formulas +// apply: +// Root node index: 1. +// Node(Level, Index in that level): 2 ^ Level + (Index in that level). +// Left Child: Current node index * 2. +// Right Child: Current node index * 2 + 1. +// Parent: Current Node Index / 2 (Integer division). +class WPDTree { + public: + // Creates a WPD tree using the data length and coefficients provided. + WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels); + ~WPDTree(); + + // Returns the number of nodes at any given level. + static int NumberOfNodesAtLevel(int level) { + return 1 << level; + } + + // Returns a pointer to the node at the given level and index(of that level). + // Level goes from 0 to levels(). + // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1. + // + // You can use the following formulas to get any node within the tree: + // Notation: (Level, Index of node in that level). + // Root node: (0/0). + // Left Child: (Current node level + 1, Current node index * 2). + // Right Child: (Current node level + 1, Current node index * 2 + 1). + // Parent: (Current node level - 1, Current node index / 2) (Integer division) + // + // If level or index are out of bounds the function will return NULL. + WPDNode* NodeAt(int level, int index); + + // Updates all the nodes of the tree with the new data. |data_length| must be + // teh same that was used for the creation of the tree. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* data, size_t data_length); + + // Returns the total number of levels below the root. Root is cosidered level + // 0. + int levels() const { return levels_; } + + // Returns the total number of nodes. + int num_nodes() const { return num_nodes_; } + + // Returns the total number of leaves. + int num_leaves() const { return 1 << levels_; } + + private: + size_t data_length_; + int levels_; + int num_nodes_; + scoped_ptr[]> nodes_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ diff --git a/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc new file mode 100644 index 0000000000..eecdd95185 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "webrtc/modules/audio_processing/transient/file_utils.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(WPDTreeTest, Construction) { + const size_t kTestBufferSize = 100; + const int kLevels = 5; + const int kExpectedNumberOfNodes = (1 << (kLevels + 1)) - 1; + + float test_buffer[kTestBufferSize]; + memset(test_buffer, 0.f, kTestBufferSize * sizeof(*test_buffer)); + float test_coefficients[] = {1.f, 2.f, 3.f, 4.f, 5.f}; + const size_t kTestCoefficientsLength = sizeof(test_coefficients) / + sizeof(test_coefficients[0]); + WPDTree tree(kTestBufferSize, + test_coefficients, + test_coefficients, + kTestCoefficientsLength, + kLevels); + ASSERT_EQ(kExpectedNumberOfNodes, tree.num_nodes()); + // Checks for NodeAt(level, index). + int nodes_at_level = 0; + for (int level = 0; level <= kLevels; ++level) { + nodes_at_level = 1 << level; + for (int i = 0; i < nodes_at_level; ++i) { + ASSERT_TRUE(NULL != tree.NodeAt(level, i)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(level, -1)); + EXPECT_EQ(NULL, tree.NodeAt(level, -12)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level + 5)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(-1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(-12, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 5, 0)); + // Checks for Update(). + EXPECT_EQ(0, tree.Update(test_buffer, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(NULL, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(test_buffer, kTestBufferSize - 1)); +} + +// This test is for the correctness of the tree. +// Checks the results from the Matlab equivalent, it is done comparing the +// results that are stored in the output files from Matlab. +// It also writes the results in its own set of files in the out directory. +// Matlab and output files contain all the results in double precision (Little +// endian) appended. +TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) { + // 10 ms at 16000 Hz. + const size_t kTestBufferSize = 160; + const int kLevels = 3; + const int kLeaves = 1 << kLevels; + const size_t kLeavesSamples = kTestBufferSize >> kLevels; + // Create tree with Discrete Meyer Wavelet Coefficients. + WPDTree tree(kTestBufferSize, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, + kLevels); + // Allocate and open all matlab and out files. + scoped_ptr matlab_files_data[kLeaves]; + scoped_ptr out_files_data[kLeaves]; + + for (int i = 0; i < kLeaves; ++i) { + // Matlab files. + matlab_files_data[i].reset(FileWrapper::Create()); + + std::ostringstream matlab_stream; + matlab_stream << "audio_processing/transient/wpd" << i; + std::string matlab_string = test::ResourcePath(matlab_stream.str(), "dat"); + matlab_files_data[i]->OpenFile(matlab_string.c_str(), + true, // Read only. + false, // No loop. + false); // No text. + + bool file_opened = matlab_files_data[i]->Open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << matlab_string; + + // Out files. + out_files_data[i].reset(FileWrapper::Create()); + + std::ostringstream out_stream; + out_stream << test::OutputPath() << "wpd_" << i << ".out"; + std::string out_string = out_stream.str(); + + out_files_data[i]->OpenFile(out_string.c_str(), + false, // Write mode. + false, // No loop. + false); // No text. + + file_opened = out_files_data[i]->Open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << out_string; + } + + // Prepare the test file. + std::string test_file_name = test::ResourcePath( + "audio_processing/transient/ajm-macbook-1-spke16m", "pcm"); + + scoped_ptr test_file(FileWrapper::Create()); + + test_file->OpenFile(test_file_name.c_str(), + true, // Read only. + false, // No loop. + false); // No text. + + bool file_opened = test_file->Open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << test_file_name; + + float test_buffer[kTestBufferSize]; + + // Only the first frames of the audio file are tested. The matlab files also + // only contains information about the first frames. + const size_t kMaxFramesToTest = 100; + const float kTolerance = 0.03f; + + size_t frames_read = 0; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(), + kTestBufferSize, + test_buffer); + while (file_samples_read > 0 && frames_read < kMaxFramesToTest) { + ++frames_read; + + if (file_samples_read < kTestBufferSize) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < kTestBufferSize; ++i) { + test_buffer[i] = 0.0; + } + } + tree.Update(test_buffer, kTestBufferSize); + double matlab_buffer[kTestBufferSize]; + + // Compare results with data from the matlab test files. + for (int i = 0; i < kLeaves; ++i) { + // Compare data values + size_t matlab_samples_read = + ReadDoubleBufferFromFile(matlab_files_data[i].get(), + kLeavesSamples, + matlab_buffer); + + ASSERT_EQ(kLeavesSamples, matlab_samples_read) + << "Matlab test files are malformed.\n" + << "File: 3_" << i; + // Get output data from the corresponding node + const float* node_data = tree.NodeAt(kLevels, i)->data(); + // Compare with matlab files. + for (size_t j = 0; j < kLeavesSamples; ++j) { + EXPECT_NEAR(matlab_buffer[j], node_data[j], kTolerance) + << "\nLeaf: " << i << "\nSample: " << j + << "\nFrame: " << frames_read - 1; + } + + // Write results to out files. + WriteFloatBufferToFile(out_files_data[i].get(), + kLeavesSamples, + node_data); + } + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(), + kTestBufferSize, + test_buffer); + } + + // Close all matlab and out files. + for (int i = 0; i < kLeaves; ++i) { + matlab_files_data[i]->CloseFile(); + out_files_data[i]->CloseFile(); + } + + test_file->CloseFile(); +} + +} // namespace webrtc diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index f129d359f8..a7ae7f9c7b 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -159,10 +159,31 @@ 'audio_coding/neteq/mock/mock_payload_splitter.h', 'audio_coding/neteq/tools/input_audio_file_unittest.cc', 'audio_coding/neteq/tools/packet_unittest.cc', - 'audio_processing/aec/system_delay_unittest.cc', 'audio_processing/aec/echo_cancellation_unittest.cc', + 'audio_processing/aec/system_delay_unittest.cc', + # TODO(ajm): Fix to match new interface. + # 'audio_processing/agc/agc_unittest.cc', + 'audio_processing/agc/agc_audio_proc_unittest.cc', + 'audio_processing/agc/circular_buffer_unittest.cc', + 'audio_processing/agc/gmm_unittest.cc', + 'audio_processing/agc/histogram_unittest.cc', + 'audio_processing/agc/include/mock_agc.h', + 'audio_processing/agc/pitch_based_vad_unittest.cc', + 'audio_processing/agc/pitch_internal_unittest.cc', + 'audio_processing/agc/pole_zero_filter_unittest.cc', + 'audio_processing/agc/standalone_vad_unittest.cc', + 'audio_processing/agc/test/test_utils.cc', 'audio_processing/echo_cancellation_impl_unittest.cc', 'audio_processing/splitting_filter_unittest.cc', + 'audio_processing/transient/dyadic_decimator_unittest.cc', + 'audio_processing/transient/file_utils.cc', + 'audio_processing/transient/file_utils.h', + 'audio_processing/transient/file_utils_unittest.cc', + 'audio_processing/transient/moving_moments_unittest.cc', + 'audio_processing/transient/transient_detector_unittest.cc', + 'audio_processing/transient/transient_suppressor_unittest.cc', + 'audio_processing/transient/wpd_node_unittest.cc', + 'audio_processing/transient/wpd_tree_unittest.cc', 'audio_processing/utility/delay_estimator_unittest.cc', 'audio_processing/utility/ring_buffer_unittest.cc', 'bitrate_controller/bitrate_controller_unittest.cc', @@ -326,11 +347,6 @@ 'target_name': 'modules_tests', 'type': '<(gtest_target_type)', 'dependencies': [ - 'audio_coding_module', - 'rtp_rtcp', - 'video_codecs_test_framework', - 'webrtc_utility', - 'webrtc_video_coding', '<(DEPTH)/testing/gtest.gyp:gtest', '<(webrtc_root)/common_video/common_video.gyp:common_video', '<(webrtc_root)/modules/video_coding/codecs/vp8/vp8.gyp:webrtc_vp8', @@ -339,6 +355,11 @@ '<(webrtc_root)/test/metrics.gyp:metrics', '<(webrtc_root)/test/test.gyp:test_support', '<(webrtc_root)/test/test.gyp:test_support_main', + 'audio_coding_module', + 'rtp_rtcp', + 'video_codecs_test_framework', + 'webrtc_utility', + 'webrtc_video_coding', ], 'defines': [ '<@(audio_coding_defines)', @@ -346,23 +367,23 @@ 'sources': [ 'audio_coding/main/test/APITest.cc', 'audio_coding/main/test/Channel.cc', - 'audio_coding/main/test/dual_stream_unittest.cc', 'audio_coding/main/test/EncodeDecodeTest.cc', - 'audio_coding/main/test/iSACTest.cc', - 'audio_coding/main/test/opus_test.cc', - 'audio_coding/main/test/PacketLossTest.cc', 'audio_coding/main/test/PCMFile.cc', + 'audio_coding/main/test/PacketLossTest.cc', 'audio_coding/main/test/RTPFile.cc', 'audio_coding/main/test/SpatialAudio.cc', 'audio_coding/main/test/TestAllCodecs.cc', - 'audio_coding/main/test/target_delay_unittest.cc', - 'audio_coding/main/test/Tester.cc', 'audio_coding/main/test/TestRedFec.cc', 'audio_coding/main/test/TestStereo.cc', 'audio_coding/main/test/TestVADDTX.cc', + 'audio_coding/main/test/Tester.cc', 'audio_coding/main/test/TimedTrace.cc', 'audio_coding/main/test/TwoWayCommunication.cc', + 'audio_coding/main/test/dual_stream_unittest.cc', + 'audio_coding/main/test/iSACTest.cc', 'audio_coding/main/test/initial_delay_unittest.cc', + 'audio_coding/main/test/opus_test.cc', + 'audio_coding/main/test/target_delay_unittest.cc', 'audio_coding/main/test/utility.cc', 'rtp_rtcp/test/testFec/test_fec.cc', 'video_coding/codecs/test/videoprocessor_integrationtest.cc', diff --git a/webrtc/modules/modules_unittests.isolate b/webrtc/modules/modules_unittests.isolate index c5a0a28b92..700c506000 100644 --- a/webrtc/modules/modules_unittests.isolate +++ b/webrtc/modules/modules_unittests.isolate @@ -34,6 +34,38 @@ '<(DEPTH)/resources/audio_coding/speech_mono_32_48kHz.pcm', '<(DEPTH)/resources/audio_coding/testfile32kHz.pcm', '<(DEPTH)/resources/audio_coding/teststereo32kHz.pcm', + '<(DEPTH)/resources/audio_processing/agc/agc_audio.pcm', + '<(DEPTH)/resources/audio_processing/agc/agc_no_circular_buffer.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_pitch_gain.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_pitch_lag.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_spectral_peak.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_vad.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_voicing_prob.dat', + '<(DEPTH)/resources/audio_processing/agc/agc_with_circular_buffer.dat', + '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke.gai', + '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm', + '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat', + '<(DEPTH)/resources/audio_processing/transient/audio16kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/audio32kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/audio48kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/audio8kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/detect16kHz.dat', + '<(DEPTH)/resources/audio_processing/transient/detect32kHz.dat', + '<(DEPTH)/resources/audio_processing/transient/detect48kHz.dat', + '<(DEPTH)/resources/audio_processing/transient/detect8kHz.dat', + '<(DEPTH)/resources/audio_processing/transient/double-utils.dat', + '<(DEPTH)/resources/audio_processing/transient/float-utils.dat', + '<(DEPTH)/resources/audio_processing/transient/suppressed16kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/suppressed32kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/suppressed8kHz.pcm', + '<(DEPTH)/resources/audio_processing/transient/wpd0.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd1.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd2.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd3.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd4.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd5.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd6.dat', + '<(DEPTH)/resources/audio_processing/transient/wpd7.dat', '<(DEPTH)/resources/deflicker_before_cif_short.yuv', '<(DEPTH)/resources/far16_stereo.pcm', '<(DEPTH)/resources/far32_stereo.pcm',