diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 3b49745dd6..22c904d52f 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -108,6 +108,8 @@ source_set("audio_processing") { "transient/wpd_tree.h", "typing_detection.cc", "typing_detection.h", + "utility/block_mean_calculator.cc", + "utility/block_mean_calculator.h", "utility/delay_estimator.c", "utility/delay_estimator.h", "utility/delay_estimator_internal.h", diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc index 4de7812a52..e23a79312b 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.cc +++ b/webrtc/modules/audio_processing/aec/aec_core.cc @@ -46,8 +46,8 @@ namespace webrtc { static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. // Metrics -static const int subCountLen = 4; -static const int countLen = 50; +static const size_t kSubCountLen = 4; +static const size_t kCountLen = 50; static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. // Quantities to control H band scaling for SWB input @@ -150,6 +150,17 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } +PowerLevel::PowerLevel() +// TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a +// window, of which the length is 1 unit longer than indicated. Remove "+1" +// when the code is refactored. +: framelevel(kSubCountLen + 1), + averagelevel(kCountLen + 1) { +} + +// TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor. +AecCore::AecCore() = default; + static int CmpFloat(const void* a, const void* b) { const float* da = (const float*)a; const float* db = (const float*)b; @@ -523,14 +534,9 @@ static void ComfortNoise(AecCore* aec, static void InitLevel(PowerLevel* level) { const float kBigFloat = 1E17f; - - level->averagelevel = 0; - level->framelevel = 0; + level->averagelevel.Reset(); + level->framelevel.Reset(); level->minlevel = kBigFloat; - level->frsum = 0; - level->sfrsum = 0; - level->frcounter = 0; - level->sfrcounter = 0; } static void InitStats(Stats* stats) { @@ -569,27 +575,17 @@ static float CalculatePower(const float* in, size_t num_samples) { } static void UpdateLevel(PowerLevel* level, float power) { - level->sfrsum += power; - level->sfrcounter++; - - if (level->sfrcounter > subCountLen) { - level->framelevel = level->sfrsum / subCountLen; - level->sfrsum = 0; - level->sfrcounter = 0; - if (level->framelevel > 0) { - if (level->framelevel < level->minlevel) { - level->minlevel = level->framelevel; // New minimum. + level->framelevel.AddValue(power); + if (level->framelevel.EndOfBlock()) { + const float new_frame_level = level->framelevel.GetLatestMean(); + if (new_frame_level > 0) { + if (new_frame_level < level->minlevel) { + level->minlevel = new_frame_level; // New minimum. } else { level->minlevel *= (1 + 0.001f); // Small increase. } } - level->frcounter++; - level->frsum += level->framelevel; - if (level->frcounter > countLen) { - level->averagelevel = level->frsum / countLen; - level->frsum = 0; - level->frcounter = 0; - } + level->averagelevel.AddValue(new_frame_level); } } @@ -609,29 +605,31 @@ static void UpdateMetrics(AecCore* aec) { aec->stateCounter++; } - if (aec->farlevel.frcounter == 0) { + if (aec->farlevel.averagelevel.EndOfBlock()) { if (aec->farlevel.minlevel < noisyPower) { actThreshold = actThresholdClean; } else { actThreshold = actThresholdNoisy; } - if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && - (aec->farlevel.sfrcounter == 0) + const float far_average_level = aec->farlevel.averagelevel.GetLatestMean(); + + // The last condition is to let estimation be made in active far-end + // segments only. + if ((aec->stateCounter > (0.5f * kCountLen * kSubCountLen)) && + (aec->farlevel.framelevel.EndOfBlock()) && + (far_average_level > (actThreshold * aec->farlevel.minlevel))) { + + const float near_average_level = + aec->nearlevel.averagelevel.GetLatestMean(); - // Estimate in active far-end segments only - && (aec->farlevel.averagelevel > - (actThreshold * aec->farlevel.minlevel))) { // Subtract noise power - echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; + echo = near_average_level - safety * aec->nearlevel.minlevel; // ERL - dtmp = 10 * static_cast(log10(aec->farlevel.averagelevel / - aec->nearlevel.averagelevel + - 1e-10f)); - dtmp2 = 10 * static_cast(log10(aec->farlevel.averagelevel / - echo + - 1e-10f)); + dtmp = 10 * static_cast(log10(far_average_level / + near_average_level + 1e-10f)); + dtmp2 = 10 * static_cast(log10(far_average_level / echo + 1e-10f)); aec->erl.instant = dtmp; if (dtmp > aec->erl.max) { @@ -654,13 +652,14 @@ static void UpdateMetrics(AecCore* aec) { } // A_NLP - dtmp = 10 * static_cast(log10(aec->nearlevel.averagelevel / - aec->linoutlevel.averagelevel + - 1e-10f)); + const float linout_average_level = + aec->linoutlevel.averagelevel.GetLatestMean(); + dtmp = 10 * static_cast(log10(near_average_level / + linout_average_level + 1e-10f)); // subtract noise power - suppressedEcho = aec->linoutlevel.averagelevel - - safety * aec->linoutlevel.minlevel; + suppressedEcho = + linout_average_level - safety * aec->linoutlevel.minlevel; dtmp2 = 10 * static_cast(log10(echo / suppressedEcho + 1e-10f)); @@ -685,13 +684,14 @@ static void UpdateMetrics(AecCore* aec) { } // ERLE - + const float nlpout_average_level = + aec->nlpoutlevel.averagelevel.GetLatestMean(); // subtract noise power - suppressedEcho = aec->nlpoutlevel.averagelevel - - safety * aec->nlpoutlevel.minlevel; + suppressedEcho = + nlpout_average_level - safety * aec->nlpoutlevel.minlevel; - dtmp = 10 * static_cast(log10(aec->nearlevel.averagelevel / - aec->nlpoutlevel.averagelevel + 1e-10f)); + dtmp = 10 * static_cast(log10(near_average_level / + nlpout_average_level + 1e-10f)); dtmp2 = 10 * static_cast(log10(echo / suppressedEcho + 1e-10f)); dtmp = dtmp2; @@ -1361,7 +1361,7 @@ static void ProcessBlock(AecCore* aec) { AecCore* WebRtcAec_CreateAec() { int i; - AecCore* aec = reinterpret_cast(malloc(sizeof(AecCore))); + AecCore* aec = new AecCore; if (!aec) { return NULL; } @@ -1496,7 +1496,7 @@ void WebRtcAec_FreeAec(AecCore* aec) { WebRtc_FreeDelayEstimator(aec->delay_estimator); WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); - free(aec); + delete aec; } int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 810d517834..ea5889f503 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -17,6 +17,7 @@ extern "C" { #include "webrtc/common_audio/wav_file.h" #include "webrtc/modules/audio_processing/aec/aec_common.h" #include "webrtc/modules/audio_processing/aec/aec_core.h" +#include "webrtc/modules/audio_processing/utility/block_mean_calculator.h" #include "webrtc/typedefs.h" namespace webrtc { @@ -40,16 +41,16 @@ static const float kExtendedMu = 0.4f; static const float kExtendedErrorThreshold = 1.0e-6f; typedef struct PowerLevel { - float sfrsum; - int sfrcounter; - float framelevel; - float frsum; - int frcounter; + PowerLevel(); + + BlockMeanCalculator framelevel; + BlockMeanCalculator averagelevel; float minlevel; - float averagelevel; } PowerLevel; struct AecCore { + AecCore(); + int farBufWritePos, farBufReadPos; int knownDelay; diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 77f0a14af1..264f3e5bef 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -118,6 +118,8 @@ 'transient/wpd_tree.h', 'typing_detection.cc', 'typing_detection.h', + 'utility/block_mean_calculator.cc', + 'utility/block_mean_calculator.h', 'utility/delay_estimator.c', 'utility/delay_estimator.h', 'utility/delay_estimator_internal.h', diff --git a/webrtc/modules/audio_processing/utility/block_mean_calculator.cc b/webrtc/modules/audio_processing/utility/block_mean_calculator.cc new file mode 100644 index 0000000000..7f4508ecc7 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/block_mean_calculator.cc @@ -0,0 +1,53 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/utility/block_mean_calculator.h" + +#include "webrtc/base/checks.h" + +namespace webrtc { + +BlockMeanCalculator::BlockMeanCalculator(size_t block_length) + : block_length_(block_length), + count_(0), + sum_(0.0), + mean_(0.0) { + RTC_DCHECK(block_length_ != 0); +} + +void BlockMeanCalculator::Reset() { + Clear(); + mean_ = 0.0; +} + +void BlockMeanCalculator::AddValue(float value) { + sum_ += value; + ++count_; + if (count_ == block_length_) { + mean_ = sum_ / block_length_; + Clear(); + } +} + +bool BlockMeanCalculator::EndOfBlock() const { + return count_ == 0; +} + +float BlockMeanCalculator::GetLatestMean() const { + return mean_; +} + +// Flush all samples added. +void BlockMeanCalculator::Clear() { + count_ = 0; + sum_ = 0.0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/utility/block_mean_calculator.h b/webrtc/modules/audio_processing/utility/block_mean_calculator.h new file mode 100644 index 0000000000..71e8b63ce8 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/block_mean_calculator.h @@ -0,0 +1,52 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ + +#include + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +// BlockMeanCalculator calculates the mean of a block of values. Values are +// added one after another, and the mean is updated at the end of every block. +class BlockMeanCalculator { + public: + explicit BlockMeanCalculator(size_t block_length); + + // Reset. + void Reset(); + + // Add one value to the sequence. + void AddValue(float value); + + // Return whether the latest added value was at the end of a block. + bool EndOfBlock() const; + + // Return the latest mean. + float GetLatestMean() const; + + private: + // Clear all values added. + void Clear(); + + const size_t block_length_; + size_t count_; + float sum_; + float mean_; + + RTC_DISALLOW_COPY_AND_ASSIGN(BlockMeanCalculator); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ diff --git a/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc b/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc new file mode 100644 index 0000000000..3f74ac9302 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" + +#include "webrtc/modules/audio_processing/utility/block_mean_calculator.h" + +namespace webrtc { + +TEST(MeanCalculatorTest, Correctness) { + const size_t kBlockLength = 10; + BlockMeanCalculator mean_calculator(kBlockLength); + size_t i = 0; + float reference = 0.0; + + for (; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast(i)); + EXPECT_FALSE(mean_calculator.EndOfBlock()); + } + mean_calculator.AddValue(static_cast(i++)); + EXPECT_TRUE(mean_calculator.EndOfBlock()); + + for (; i < 3 * kBlockLength; ++i) { + const bool end_of_block = i % kBlockLength == 0; + if (end_of_block) { + // Sum of (i - kBlockLength) ... (i - 1) + reference = i - 0.5 * (1 + kBlockLength); + } + EXPECT_EQ(mean_calculator.EndOfBlock(), end_of_block); + EXPECT_EQ(reference, mean_calculator.GetLatestMean()); + mean_calculator.AddValue(static_cast(i)); + } +} + +TEST(MeanCalculatorTest, Reset) { + const size_t kBlockLength = 10; + BlockMeanCalculator mean_calculator(kBlockLength); + for (size_t i = 0; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast(i)); + } + mean_calculator.Reset(); + size_t i = 0; + for (; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast(i)); + EXPECT_FALSE(mean_calculator.EndOfBlock()); + } + mean_calculator.AddValue(static_cast(i)); + EXPECT_TRUE(mean_calculator.EndOfBlock()); + EXPECT_EQ(mean_calculator.GetLatestMean(), 0.5 * (kBlockLength - 1)); +} + +} // namespace webrtc diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 44f97ab250..492b6184cc 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -258,6 +258,7 @@ 'audio_processing/transient/transient_suppressor_unittest.cc', 'audio_processing/transient/wpd_node_unittest.cc', 'audio_processing/transient/wpd_tree_unittest.cc', + 'audio_processing/utility/block_mean_calculator_unittest.cc', 'audio_processing/utility/delay_estimator_unittest.cc', 'audio_processing/vad/gmm_unittest.cc', 'audio_processing/vad/pitch_based_vad_unittest.cc',