Major updates to the echo removal functionality in AEC3

This CL adds fairly significant changes to the echo removal
functionality, the main ones being.
-More centralized control over the echo removal.
-Updated echo suppression gain behavior.
-Significantly increased usage of the linear adaptive filter.
-New echo removal functionality when the linear filter is not usable.

This CL is chained to the CL https://codereview.webrtc.org/2784023002/

BUG=webrtc:6018

Review-Url: https://codereview.webrtc.org/2782423003
Cr-Commit-Position: refs/heads/master@{#17575}
This commit is contained in:
peah 2017-04-06 15:45:32 -07:00 committed by Commit bot
parent f51517a64f
commit 86afe9d661
40 changed files with 724 additions and 1157 deletions

View File

@ -73,8 +73,6 @@ rtc_static_library("audio_processing") {
"aec3/matched_filter_lag_aggregator.h",
"aec3/output_selector.cc",
"aec3/output_selector.h",
"aec3/power_echo_model.cc",
"aec3/power_echo_model.h",
"aec3/render_buffer.cc",
"aec3/render_buffer.h",
"aec3/render_delay_buffer.cc",
@ -591,7 +589,6 @@ if (rtc_include_tests) {
"aec3/matched_filter_lag_aggregator_unittest.cc",
"aec3/matched_filter_unittest.cc",
"aec3/output_selector_unittest.cc",
"aec3/power_echo_model_unittest.cc",
"aec3/render_buffer_unittest.cc",
"aec3/render_delay_buffer_unittest.cc",
"aec3/render_delay_controller_metrics_unittest.cc",

View File

@ -59,42 +59,35 @@ void UpdateErlEstimator(
}
}
// Resets the filter.
void ResetFilter(rtc::ArrayView<FftData> H) {
for (auto& H_j : H) {
H_j.Clear();
}
}
} // namespace
namespace aec3 {
// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
void AdaptPartitions(const RenderBuffer& X_buffer,
void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
size_t index = X_buffer.Position();
rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
size_t index = render_buffer.Position();
for (auto& H_j : H) {
const FftData& X = X_buffer_data[index];
const FftData& X = render_buffer_data[index];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k];
H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k];
}
index = index < (X_buffer_data.size() - 1) ? index + 1 : 0;
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Adapts the filter partitions. (SSE2 variant)
void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
const int lim1 =
std::min(X_buffer_data.size() - X_buffer.Position(), H.size());
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
FftData* H_j;
@ -106,7 +99,7 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
H_j = &H[0];
X = &X_buffer_data[X_buffer.Position()];
X = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
@ -127,13 +120,13 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
_mm_storeu_ps(&H_j->im[k], h);
}
X = &X_buffer_data[0];
X = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
H_j = &H[0];
X = &X_buffer_data[X_buffer.Position()];
X = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
@ -144,46 +137,47 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
X->im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
X = &X_buffer_data[0];
X = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
#endif
// Produces the filter output.
void ApplyFilter(const RenderBuffer& X_buffer,
void ApplyFilter(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S) {
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
size_t index = X_buffer.Position();
rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
size_t index = render_buffer.Position();
for (auto& H_j : H) {
const FftData& X = X_buffer_data[index];
const FftData& X = render_buffer_data[index];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k];
S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k];
}
index = index < (X_buffer_data.size() - 1) ? index + 1 : 0;
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Produces the filter output (SSE2 variant).
void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S) {
RTC_DCHECK_GE(H.size(), H.size() - 1);
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
const int lim1 =
std::min(X_buffer_data.size() - X_buffer.Position(), H.size());
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
const FftData* H_j = &H[0];
const FftData* X = &X_buffer_data[X_buffer.Position()];
const FftData* X = &render_buffer_data[render_buffer.Position()];
int j = 0;
int limit = lim1;
@ -209,11 +203,11 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
}
}
limit = lim2;
X = &X_buffer_data[0];
X = &render_buffer_data[0];
} while (j < lim2);
H_j = &H[0];
X = &X_buffer_data[X_buffer.Position()];
X = &render_buffer_data[render_buffer.Position()];
j = 0;
limit = lim1;
do {
@ -224,7 +218,7 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
}
limit = lim2;
X = &X_buffer_data[0];
X = &render_buffer_data[0];
} while (j < lim2);
}
#endif
@ -232,64 +226,61 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
} // namespace aec3
AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions,
bool use_filter_statistics,
Aec3Optimization optimization,
ApmDataDumper* data_dumper)
: data_dumper_(data_dumper),
fft_(),
optimization_(optimization),
H_(size_partitions) {
H_(size_partitions),
H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) {
RTC_DCHECK(data_dumper_);
ResetFilter(H_);
if (use_filter_statistics) {
H2_.reset(new std::vector<std::array<float, kFftLengthBy2Plus1>>(
size_partitions, std::array<float, kFftLengthBy2Plus1>()));
for (auto H2_k : *H2_) {
H2_k.fill(0.f);
}
erl_.reset(new std::array<float, kFftLengthBy2Plus1>());
erl_->fill(0.f);
for (auto& H_j : H_) {
H_j.Clear();
}
for (auto& H2_k : H2_) {
H2_k.fill(0.f);
}
erl_.fill(0.f);
}
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
void AdaptiveFirFilter::HandleEchoPathChange() {
ResetFilter(H_);
if (H2_) {
for (auto H2_k : *H2_) {
H2_k.fill(0.f);
}
RTC_DCHECK(erl_);
erl_->fill(0.f);
for (auto& H_j : H_) {
H_j.Clear();
}
for (auto& H2_k : H2_) {
H2_k.fill(0.f);
}
erl_.fill(0.f);
}
void AdaptiveFirFilter::Filter(const RenderBuffer& X_buffer, FftData* S) const {
void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
FftData* S) const {
RTC_DCHECK(S);
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::ApplyFilter_SSE2(X_buffer, H_, S);
aec3::ApplyFilter_SSE2(render_buffer, H_, S);
break;
#endif
default:
aec3::ApplyFilter(X_buffer, H_, S);
aec3::ApplyFilter(render_buffer, H_, S);
}
}
void AdaptiveFirFilter::Adapt(const RenderBuffer& X_buffer, const FftData& G) {
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
const FftData& G) {
// Adapt the filter.
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::AdaptPartitions_SSE2(X_buffer, G, H_);
aec3::AdaptPartitions_SSE2(render_buffer, G, H_);
break;
#endif
default:
aec3::AdaptPartitions(X_buffer, G, H_);
aec3::AdaptPartitions(render_buffer, G, H_);
}
// Constrain the filter partitions in a cyclic manner.
@ -298,13 +289,9 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& X_buffer, const FftData& G) {
? partition_to_constrain_ + 1
: 0;
// Optionally update the frequency response and echo return loss for the
// filter.
if (H2_) {
RTC_DCHECK(erl_);
UpdateFrequencyResponse(H_, H2_.get());
UpdateErlEstimator(*H2_, erl_.get());
}
// Update the frequency response and echo return loss for the filter.
UpdateFrequencyResponse(H_, &H2_);
UpdateErlEstimator(H2_, &erl_);
}
} // namespace webrtc

View File

@ -26,21 +26,21 @@
namespace webrtc {
namespace aec3 {
// Adapts the filter partitions.
void AdaptPartitions(const RenderBuffer& X_buffer,
void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H);
#if defined(WEBRTC_ARCH_X86_FAMILY)
void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H);
#endif
// Produces the filter output.
void ApplyFilter(const RenderBuffer& X_buffer,
void ApplyFilter(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S);
#if defined(WEBRTC_ARCH_X86_FAMILY)
void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S);
#endif
@ -51,17 +51,16 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
class AdaptiveFirFilter {
public:
AdaptiveFirFilter(size_t size_partitions,
bool use_filter_statistics,
Aec3Optimization optimization,
ApmDataDumper* data_dumper);
~AdaptiveFirFilter();
// Produces the output of the filter.
void Filter(const RenderBuffer& X_buffer, FftData* S) const;
void Filter(const RenderBuffer& render_buffer, FftData* S) const;
// Adapts the filter.
void Adapt(const RenderBuffer& X_buffer, const FftData& G);
void Adapt(const RenderBuffer& render_buffer, const FftData& G);
// Receives reports that known echo path changes have occured and adjusts
// the filter adaptation accordingly.
@ -70,25 +69,13 @@ class AdaptiveFirFilter {
// Returns the filter size.
size_t SizePartitions() const { return H_.size(); }
// Returns the filter based echo return loss. This method can only be used if
// the usage of filter statistics has been specified during the creation of
// the adaptive filter.
const std::array<float, kFftLengthBy2Plus1>& Erl() const {
RTC_DCHECK(erl_) << "The filter must be created with use_filter_statistics "
"set to true in order to be able to call retrieve the "
"ERL.";
return *erl_;
}
// Returns the filter based echo return loss.
const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
// Returns the frequency responses for the filter partitions. This method can
// only be used if the usage of filter statistics has been specified during
// the creation of the adaptive filter.
// Returns the frequency responses for the filter partitions.
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
FilterFrequencyResponse() const {
RTC_DCHECK(H2_) << "The filter must be created with use_filter_statistics "
"set to true in order to be able to call retrieve the "
"filter frequency responde.";
return *H2_;
return H2_;
}
void DumpFilter(const char* name) {
@ -103,8 +90,8 @@ class AdaptiveFirFilter {
const Aec3Fft fft_;
const Aec3Optimization optimization_;
std::vector<FftData> H_;
std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>> H2_;
std::unique_ptr<std::array<float, kFftLengthBy2Plus1>> erl_;
std::vector<std::array<float, kFftLengthBy2Plus1>> H2_;
std::array<float, kFftLengthBy2Plus1> erl_;
size_t partition_to_constrain_ = 0;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AdaptiveFirFilter);

View File

@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include <algorithm>
#include <numeric>
#include <string>
@ -22,8 +19,9 @@
#endif
#include "webrtc/base/arraysize.h"
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h"
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
@ -49,12 +47,10 @@ std::string ProduceDebugText(size_t delay) {
TEST(AdaptiveFirFilter, TestOptimizations) {
bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0);
if (use_sse2) {
FftBuffer X_buffer(Aec3Optimization::kNone, 12, std::vector<size_t>(1, 12));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12,
std::vector<size_t>(1, 12));
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
FftData X;
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
FftData S_C;
FftData S_SSE2;
FftData G;
@ -69,12 +65,11 @@ TEST(AdaptiveFirFilter, TestOptimizations) {
}
for (size_t k = 0; k < 500; ++k) {
RandomizeSampleVector(&random_generator, x);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
RandomizeSampleVector(&random_generator, x[0]);
render_buffer.Insert(x);
ApplyFilter_SSE2(X_buffer, H_SSE2, &S_SSE2);
ApplyFilter(X_buffer, H_C, &S_C);
ApplyFilter_SSE2(render_buffer, H_SSE2, &S_SSE2);
ApplyFilter(render_buffer, H_C, &S_C);
for (size_t j = 0; j < S_C.re.size(); ++j) {
EXPECT_FLOAT_EQ(S_C.re[j], S_SSE2.re[j]);
EXPECT_FLOAT_EQ(S_C.im[j], S_SSE2.im[j]);
@ -85,8 +80,8 @@ TEST(AdaptiveFirFilter, TestOptimizations) {
std::for_each(G.im.begin(), G.im.end(),
[&](float& a) { a = random_generator.Rand<float>(); });
AdaptPartitions_SSE2(X_buffer, G, H_SSE2);
AdaptPartitions(X_buffer, G, H_C);
AdaptPartitions_SSE2(render_buffer, G, H_SSE2);
AdaptPartitions(render_buffer, G, H_C);
for (size_t k = 0; k < H_C.size(); ++k) {
for (size_t j = 0; j < H_C[k].re.size(); ++j) {
@ -103,32 +98,17 @@ TEST(AdaptiveFirFilter, TestOptimizations) {
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null data dumper works.
TEST(AdaptiveFirFilter, NullDataDumper) {
EXPECT_DEATH(AdaptiveFirFilter(9, true, DetectOptimization(), nullptr), "");
EXPECT_DEATH(AdaptiveFirFilter(9, DetectOptimization(), nullptr), "");
}
// Verifies that the check for non-null filter output works.
TEST(AdaptiveFirFilter, NullFilterOutput) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
EXPECT_DEATH(filter.Filter(X_buffer, nullptr), "");
}
// Verifies that the check for whether filter statistics are being generated
// works when retrieving the ERL.
TEST(AdaptiveFirFilter, ErlAccessWhenNoFilterStatistics) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper);
EXPECT_DEATH(filter.Erl(), "");
}
// Verifies that the check for whether filter statistics are being generated
// works when retrieving the filter frequencyResponse.
TEST(AdaptiveFirFilter, FilterFrequencyResponseAccessWhenNoFilterStatistics) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper);
EXPECT_DEATH(filter.FilterFrequencyResponse(), "");
AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
EXPECT_DEATH(filter.Filter(render_buffer, nullptr), "");
}
#endif
@ -137,7 +117,7 @@ TEST(AdaptiveFirFilter, FilterFrequencyResponseAccessWhenNoFilterStatistics) {
// are turned on.
TEST(AdaptiveFirFilter, FilterStatisticsAccess) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
filter.Erl();
filter.FilterFrequencyResponse();
}
@ -146,8 +126,7 @@ TEST(AdaptiveFirFilter, FilterStatisticsAccess) {
TEST(AdaptiveFirFilter, FilterSize) {
ApmDataDumper data_dumper(42);
for (size_t filter_size = 1; filter_size < 5; ++filter_size) {
AdaptiveFirFilter filter(filter_size, false, DetectOptimization(),
&data_dumper);
AdaptiveFirFilter filter(filter_size, DetectOptimization(), &data_dumper);
EXPECT_EQ(filter_size, filter.SizePartitions());
}
}
@ -157,19 +136,18 @@ TEST(AdaptiveFirFilter, FilterSize) {
TEST(AdaptiveFirFilter, FilterAndAdapt) {
constexpr size_t kNumBlocksToProcess = 500;
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
ShadowFilterUpdateGain gain;
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> n(kBlockSize, 0.f);
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
FftData X;
std::vector<float> e(kBlockSize, 0.f);
std::array<float, kFftLength> s;
FftData S;
@ -178,6 +156,10 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
// [B,A] = butter(2,100/8000,'high')
constexpr CascadedBiQuadFilter::BiQuadCoefficients
kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f},
{-1.94448f, 0.94598f}};
Y2.fill(0.f);
E2_main.fill(0.f);
E2_shadow.fill(0.f);
@ -186,16 +168,27 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
for (size_t delay_samples : {0, 64, 150, 200, 301}) {
DelayBuffer<float> delay_buffer(delay_samples);
CascadedBiQuadFilter x_hp_filter(kHighPassFilterCoefficients, 1);
CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1);
SCOPED_TRACE(ProduceDebugText(delay_samples));
for (size_t k = 0; k < kNumBlocksToProcess; ++k) {
RandomizeSampleVector(&random_generator, x);
delay_buffer.Delay(x, y);
RandomizeSampleVector(&random_generator, x[0]);
delay_buffer.Delay(x[0], y);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
RandomizeSampleVector(&random_generator, n);
constexpr float kNoiseScaling = 1.f / 100.f;
std::transform(
y.begin(), y.end(), n.begin(), y.begin(),
[kNoiseScaling](float a, float b) { return a + b * kNoiseScaling; });
filter.Filter(X_buffer, &S);
x_hp_filter.Process(x[0]);
y_hp_filter.Process(y);
render_buffer.Insert(x);
render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(),
[&](float a, float b) { return a - b * kScale; });
@ -204,12 +197,13 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
});
fft.ZeroPaddedFft(e, &E);
gain.Compute(X_buffer, render_signal_analyzer, E, filter.SizePartitions(),
false, &G);
filter.Adapt(X_buffer, G);
gain.Compute(render_buffer, render_signal_analyzer, E,
filter.SizePartitions(), false, &G);
filter.Adapt(render_buffer, G);
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(filter.FilterFrequencyResponse(),
rtc::Optional<size_t>(), X_buffer, E2_main, E2_shadow,
Y2, x, EchoPathVariability(false, false), false);
rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
x[0], false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
@ -220,5 +214,3 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
}
} // namespace aec3
} // namespace webrtc
#endif

View File

@ -26,12 +26,15 @@ namespace webrtc {
enum class Aec3Optimization { kNone, kSse2 };
constexpr int kMetricsReportingIntervalBlocks = 10 * 250;
constexpr int kNumBlocksPerSecond = 250;
constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
constexpr int kMetricsComputationBlocks = 9;
constexpr int kMetricsCollectionBlocks =
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
constexpr int kAdaptiveFilterLength = 12;
constexpr int kResidualEchoPowerRenderWindowSize = 30;
constexpr size_t kFftLengthBy2 = 64;
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
@ -55,11 +58,15 @@ constexpr size_t kDownsampledRenderBufferSize =
kMatchedFilterWindowSizeSubBlocks +
1);
constexpr float kFixedEchoPathGain = 100;
constexpr size_t kRenderDelayBufferSize =
(3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize);
constexpr size_t kMaxApiCallsJitterBlocks = 10;
constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2;
static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks,
"Requirement to ensure buffer overflow detection");
constexpr size_t NumBandsForRate(int sample_rate_hz) {
return static_cast<size_t>(sample_rate_hz == 8000 ? 1

View File

@ -14,6 +14,7 @@
#include <numeric>
#include <vector>
#include "webrtc/base/array_view.h"
#include "webrtc/base/atomicops.h"
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
@ -21,23 +22,23 @@
namespace webrtc {
namespace {
constexpr float kMaxFilterEstimateStrength = 1000.f;
constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;
constexpr size_t kSaturationLeakageBlocks = 20;
// Compute the delay of the adaptive filter as the partition with a distinct
// peak.
void AnalyzeFilter(
// Computes delay of the adaptive filter.
rtc::Optional<size_t> EstimateFilterDelay(
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_frequency_response,
std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter,
std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength,
rtc::Optional<size_t>* filter_delay) {
const auto& H2 = filter_frequency_response;
adaptive_filter_frequency_response) {
const auto& H2 = adaptive_filter_frequency_response;
size_t reliable_delays_sum = 0;
size_t num_reliable_delays = 0;
constexpr size_t kUpperBin = kFftLengthBy2 - 5;
constexpr float kMinPeakMargin = 10.f;
const size_t kTailPartition = H2.size() - 1;
for (size_t k = 1; k < kUpperBin; ++k) {
// Find the maximum of H2[j].
int peak = 0;
for (size_t j = 0; j < H2.size(); ++j) {
if (H2[j][k] > H2[peak][k]) {
@ -45,43 +46,33 @@ void AnalyzeFilter(
}
}
if (H2[peak][k] == 0.f) {
(*filter_estimate_strength)[k] = 0.f;
} else if (H2[H2.size() - 1][k] == 0.f) {
(*filter_estimate_strength)[k] = kMaxFilterEstimateStrength;
} else {
(*filter_estimate_strength)[k] = std::min(
kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]);
}
constexpr float kMargin = 10.f;
if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) {
(*bands_with_reliable_filter)[k] = true;
// Count the peak as a delay only if the peak is sufficiently larger than
// the tail.
if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) {
reliable_delays_sum += peak;
++num_reliable_delays;
} else {
(*bands_with_reliable_filter)[k] = false;
}
}
(*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1];
std::fill(bands_with_reliable_filter->begin() + kUpperBin,
bands_with_reliable_filter->end(),
(*bands_with_reliable_filter)[kUpperBin - 1]);
(*filter_estimate_strength)[0] = (*filter_estimate_strength)[1];
std::fill(filter_estimate_strength->begin() + kUpperBin,
filter_estimate_strength->end(),
(*filter_estimate_strength)[kUpperBin - 1]);
*filter_delay =
num_reliable_delays > 20
? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays)
: rtc::Optional<size_t>();
// Return no delay if not sufficient delays have been found.
if (num_reliable_delays < 21) {
return rtc::Optional<size_t>();
}
const size_t delay = reliable_delays_sum / num_reliable_delays;
// Sanity check that the peak is not caused by a false strong DC-component in
// the filter.
for (size_t k = 1; k < kUpperBin; ++k) {
if (H2[delay][k] > H2[delay][0]) {
RTC_DCHECK_GT(H2.size(), delay);
return rtc::Optional<size_t>(delay);
}
}
return rtc::Optional<size_t>();
}
constexpr int kActiveRenderCounterInitial = 50;
constexpr int kActiveRenderCounterMax = 200;
constexpr int kEchoPathChangeCounterInitial = 50;
constexpr int kEchoPathChangeCounterMax = 3 * 250;
constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;
constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond;
} // namespace
@ -90,76 +81,80 @@ int AecState::instance_count_ = 0;
AecState::AecState()
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
echo_path_change_counter_(kEchoPathChangeCounterInitial),
active_render_counter_(kActiveRenderCounterInitial) {
bands_with_reliable_filter_.fill(false);
filter_estimate_strength_.fill(0.f);
}
echo_path_change_counter_(kEchoPathChangeCounterInitial) {}
AecState::~AecState() = default;
void AecState::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
if (echo_path_variability.AudioPathChanged()) {
blocks_since_last_saturation_ = 0;
active_render_blocks_ = 0;
echo_path_change_counter_ = kEchoPathChangeCounterMax;
usable_linear_estimate_ = false;
echo_leakage_detected_ = false;
capture_signal_saturation_ = false;
echo_saturation_ = false;
headset_detected_ = false;
previous_max_sample_ = 0.f;
}
}
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_frequency_response,
adaptive_filter_frequency_response,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& X_buffer,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
const EchoPathVariability& echo_path_variability,
bool echo_leakage_detected) {
filter_length_ = filter_frequency_response.size();
AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_,
&filter_estimate_strength_, &filter_delay_);
// Compute the externally provided delay in partitions. The truncation is
// intended here.
// Store input parameters.
echo_leakage_detected_ = echo_leakage_detected;
// Update counters.
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;
active_render_blocks_ += active_render_block ? 1 : 0;
--echo_path_change_counter_;
// Estimate delays.
filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
external_delay_ =
external_delay_samples
? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)
: rtc::Optional<size_t>();
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
active_render_blocks_ =
echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1;
echo_path_change_counter_ = echo_path_variability.AudioPathChanged()
? kEchoPathChangeCounterMax
: echo_path_change_counter_ - 1;
active_render_counter_ = x_energy > 10000.f * kFftLengthBy2
? kActiveRenderCounterMax
: active_render_counter_ - 1;
usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0;
echo_leakage_detected_ = echo_leakage_detected;
model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_;
if (usable_linear_estimate_) {
const auto& X2 = X_buffer.Spectrum(*filter_delay_);
// TODO(peah): Expose these as stats.
// Update the ERL and ERLE measures.
if (filter_delay_ && echo_path_change_counter_ <= 0) {
const auto& X2 = render_buffer.Spectrum(*filter_delay_);
erle_estimator_.Update(X2, Y2, E2_main);
erl_estimator_.Update(X2, Y2);
// TODO(peah): Add working functionality for headset detection. Until the
// functionality for that is working the headset detector is hardcoded to detect
// no headset.
#if 0
const auto& erl = erl_estimator_.Erl();
const int low_erl_band_count = std::count_if(
erl.begin(), erl.end(), [](float a) { return a <= 0.1f; });
const int noisy_band_count = std::count_if(
filter_estimate_strength_.begin(), filter_estimate_strength_.end(),
[](float a) { return a <= 10.f; });
headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20;
#endif
headset_detected_ = false;
} else {
headset_detected_ = false;
}
// Detect and flag echo saturation.
RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element(
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
const bool saturated_echo =
previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();
previous_max_sample_ = max_sample;
// Counts the blocks since saturation.
blocks_since_last_saturation_ =
saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
// Flag whether the linear filter estimate is usable.
usable_linear_estimate_ =
(!echo_saturation_) &&
active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&
filter_delay_ && echo_path_change_counter_ <= 0;
// After an amount of active render samples for which an echo should have been
// detected in the capture signal if the ERL was not infinite, flag that a
// headset is used.
headset_detected_ = !external_delay_ && !filter_delay_ &&
active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;
}
} // namespace webrtc

View File

@ -40,16 +40,8 @@ class AecState {
// Returns whether there has been echo leakage detected.
bool EchoLeakageDetected() const { return echo_leakage_detected_; }
// Returns whether it is possible at all to use the model based echo removal
// functionalities.
bool ModelBasedAecFeasible() const { return model_based_aec_feasible_; }
// Returns whether the render signal is currently active.
bool ActiveRender() const { return active_render_counter_ > 0; }
// Returns whether the number of active render blocks since an echo path
// change.
size_t ActiveRenderBlocks() const { return active_render_blocks_; }
bool ActiveRender() const { return active_render_blocks_ > 200; }
// Returns the ERLE.
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
@ -67,24 +59,12 @@ class AecState {
// Returns the externally provided delay.
rtc::Optional<size_t> ExternalDelay() const { return external_delay_; }
// Returns the bands where the linear filter is reliable.
const std::array<bool, kFftLengthBy2Plus1>& BandsWithReliableFilter() const {
return bands_with_reliable_filter_;
}
// Reports whether the filter is poorly aligned.
bool PoorlyAlignedFilter() const {
return FilterDelay() ? *FilterDelay() > 0.75f * filter_length_ : false;
}
// Returns the strength of the filter.
const std::array<float, kFftLengthBy2Plus1>& FilterEstimateStrength() const {
return filter_estimate_strength_;
}
// Returns whether the capture signal is saturated.
bool SaturatedCapture() const { return capture_signal_saturation_; }
// Returns whether the echo signal is saturated.
bool SaturatedEcho() const { return echo_saturation_; }
// Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation;
@ -93,16 +73,17 @@ class AecState {
// Returns whether a probable headset setup has been detected.
bool HeadsetDetected() const { return headset_detected_; }
// Takes appropriate action at an echo path change.
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
// Updates the aec state.
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_frequency_response,
adaptive_filter_frequency_response,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& X_buffer,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
const EchoPathVariability& echo_path_variability,
bool echo_leakage_detected);
private:
@ -111,18 +92,16 @@ class AecState {
ErlEstimator erl_estimator_;
ErleEstimator erle_estimator_;
int echo_path_change_counter_;
int active_render_counter_;
size_t active_render_blocks_ = 0;
bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false;
bool model_based_aec_feasible_ = false;
bool capture_signal_saturation_ = false;
bool echo_saturation_ = false;
bool headset_detected_ = false;
float previous_max_sample_ = 0.f;
rtc::Optional<size_t> filter_delay_;
rtc::Optional<size_t> external_delay_;
std::array<bool, kFftLengthBy2Plus1> bands_with_reliable_filter_;
std::array<float, kFftLengthBy2Plus1> filter_estimate_strength_;
size_t filter_length_;
size_t blocks_since_last_saturation_ = 1000;
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
};

View File

@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
#include "webrtc/test/gtest.h"
@ -22,13 +19,12 @@ namespace webrtc {
TEST(AecState, NormalUsage) {
ApmDataDumper data_dumper(42);
AecState state;
FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main = {};
std::array<float, kFftLengthBy2Plus1> Y2 = {};
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
EchoPathVariability echo_path_variability(false, false);
x.fill(0.f);
std::vector<std::array<float, kFftLengthBy2Plus1>>
converged_filter_frequency_response(10);
@ -38,165 +34,116 @@ TEST(AecState, NormalUsage) {
std::vector<std::array<float, kFftLengthBy2Plus1>>
diverged_filter_frequency_response = converged_filter_frequency_response;
converged_filter_frequency_response[2].fill(100.f);
converged_filter_frequency_response[2][0] = 1.f;
// Verify that model based aec feasibility and linear AEC usability are false
// when the filter is diverged and there is no external delay reported.
// Verify that linear AEC usability is false when the filter is diverged and
// there is no external delay reported.
state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
EXPECT_FALSE(state.ModelBasedAecFeasible());
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that model based aec feasibility is true and that linear AEC
// usability is false when the filter is diverged and there is an external
// delay reported.
state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
EXPECT_FALSE(state.ModelBasedAecFeasible());
for (int k = 0; k < 50; ++k) {
state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
}
EXPECT_TRUE(state.ModelBasedAecFeasible());
render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that linear AEC usability is true when the filter is converged
for (int k = 0; k < 50; ++k) {
std::fill(x[0].begin(), x[0].end(), 101.f);
for (int k = 0; k < 3000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
}
EXPECT_TRUE(state.UsableLinearEstimate());
// Verify that linear AEC usability becomes false after an echo path change is
// reported
echo_path_variability = EchoPathVariability(true, false);
state.HandleEchoPathChange(EchoPathVariability(true, false));
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that the active render detection works as intended.
x.fill(101.f);
std::fill(x[0].begin(), x[0].end(), 101.f);
state.HandleEchoPathChange(EchoPathVariability(true, true));
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
EXPECT_TRUE(state.ActiveRender());
x.fill(0.f);
for (int k = 0; k < 200; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
}
render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.ActiveRender());
x.fill(101.f);
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
for (int k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
render_buffer, E2_main, Y2, x[0], false);
}
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.EchoLeakageDetected());
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
true);
render_buffer, E2_main, Y2, x[0], true);
EXPECT_TRUE(state.EchoLeakageDetected());
// Verify that the bands containing reliable filter estimates are properly
// reported.
echo_path_variability = EchoPathVariability(false, false);
for (int k = 0; k < 200; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
}
FftData X;
X.re.fill(10000.f);
X.im.fill(0.f);
for (size_t k = 0; k < X_buffer.Buffer().size(); ++k) {
X_buffer.Insert(X);
}
Y2.fill(10.f * 1000.f * 1000.f);
E2_main.fill(100.f * Y2[0]);
E2_shadow.fill(100.f * Y2[0]);
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
E2_main.fill(0.1f * Y2[0]);
E2_shadow.fill(E2_main[0]);
for (size_t k = 0; k < Y2.size(); k += 2) {
E2_main[k] = Y2[k];
E2_shadow[k] = Y2[k];
}
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
const std::array<bool, kFftLengthBy2Plus1>& reliable_bands =
state.BandsWithReliableFilter();
EXPECT_EQ(reliable_bands[0], reliable_bands[1]);
for (size_t k = 1; k < kFftLengthBy2 - 5; ++k) {
EXPECT_TRUE(reliable_bands[k]);
}
for (size_t k = kFftLengthBy2 - 5; k < reliable_bands.size(); ++k) {
EXPECT_EQ(reliable_bands[kFftLengthBy2 - 6], reliable_bands[k]);
}
// Verify that the ERL is properly estimated
Y2.fill(10.f * X.re[0] * X.re[0]);
for (size_t k = 0; k < 100000; ++k) {
for (auto& x_k : x) {
x_k = std::vector<float>(kBlockSize, 0.f);
}
x[0][0] = 5000.f;
for (size_t k = 0; k < render_buffer.Buffer().size(); ++k) {
render_buffer.Insert(x);
}
Y2.fill(10.f * 10000.f * 10000.f);
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
const std::array<float, kFftLengthBy2Plus1>& erl = state.Erl();
std::for_each(erl.begin(), erl.end(),
[](float a) { EXPECT_NEAR(10.f, a, 0.1); });
EXPECT_EQ(erl[0], erl[1]);
for (size_t k = 1; k < erl.size() - 1; ++k) {
EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1);
}
EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]);
// Verify that the ERLE is properly estimated
E2_main.fill(1.f * X.re[0] * X.re[0]);
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(10.f * E2_main[0]);
for (size_t k = 0; k < 10000; ++k) {
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
std::for_each(state.Erle().begin(), state.Erle().end(),
[](float a) { EXPECT_NEAR(8.f, a, 0.1); });
{
const auto& erle = state.Erle();
EXPECT_EQ(erle[0], erle[1]);
for (size_t k = 1; k < erle.size() - 1; ++k) {
EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1);
}
EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]);
}
E2_main.fill(1.f * X.re[0] * X.re[0]);
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(5.f * E2_main[0]);
for (size_t k = 0; k < 10000; ++k) {
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
false);
render_buffer, E2_main, Y2, x[0], false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
std::for_each(state.Erle().begin(), state.Erle().end(),
[](float a) { EXPECT_NEAR(5.f, a, 0.1); });
{
const auto& erle = state.Erle();
EXPECT_EQ(erle[0], erle[1]);
for (size_t k = 1; k < erle.size() - 1; ++k) {
EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1);
}
EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]);
}
}
// Verifies the a non-significant delay is correctly identified.
TEST(AecState, NonSignificantDelay) {
AecState state;
FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
@ -208,8 +155,9 @@ TEST(AecState, NonSignificantDelay) {
}
// Verify that a non-significant filter delay is identified correctly.
state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
E2_shadow, Y2, x, echo_path_variability, false);
state.HandleEchoPathChange(echo_path_variability);
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
E2_main, Y2, x, false);
EXPECT_FALSE(state.FilterDelay());
}
@ -217,9 +165,9 @@ TEST(AecState, NonSignificantDelay) {
TEST(AecState, ConvergedFilterDelay) {
constexpr int kFilterLength = 10;
AecState state;
FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
@ -234,9 +182,10 @@ TEST(AecState, ConvergedFilterDelay) {
v.fill(0.01f);
}
frequency_response[k].fill(100.f);
state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
E2_shadow, Y2, x, echo_path_variability, false);
frequency_response[k][0] = 0.f;
state.HandleEchoPathChange(echo_path_variability);
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
E2_main, Y2, x, false);
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
@ -255,27 +204,27 @@ TEST(AecState, ExternalDelay) {
E2_shadow.fill(0.f);
Y2.fill(0.f);
x.fill(0.f);
FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
std::vector<size_t>(1, 30));
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
for (auto& v : frequency_response) {
v.fill(0.01f);
}
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
state.HandleEchoPathChange(EchoPathVariability(false, false));
state.Update(frequency_response, rtc::Optional<size_t>(k * kBlockSize + 5),
X_buffer, E2_main, E2_shadow, Y2, x,
EchoPathVariability(false, false), false);
render_buffer, E2_main, Y2, x, false);
EXPECT_TRUE(state.ExternalDelay());
EXPECT_EQ(k, state.ExternalDelay());
}
// Verify that the externally reported delay is properly unset when it is no
// longer present.
state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
E2_shadow, Y2, x, EchoPathVariability(false, false), false);
state.HandleEchoPathChange(EchoPathVariability(false, false));
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
E2_main, Y2, x, false);
EXPECT_FALSE(state.ExternalDelay());
}
} // namespace webrtc
#endif

View File

@ -188,6 +188,17 @@ void ComfortNoiseGenerator::Compute(
}
}
// Limit the noise to a floor of -96 dBFS.
constexpr float kNoiseFloor = 440.f;
for (auto& n : N2_) {
n = std::max(n, kNoiseFloor);
}
if (N2_initial_) {
for (auto& n : *N2_initial_) {
n = std::max(n, kNoiseFloor);
}
}
// Choose N2 estimate to use.
const std::array<float, kFftLengthBy2Plus1>& N2 =
N2_initial_ ? *N2_initial_ : N2_;

View File

@ -9,6 +9,7 @@
*/
#include "webrtc/modules/audio_processing/aec3/echo_remover.h"
#include <math.h>
#include <algorithm>
#include <memory>
#include <numeric>
@ -24,7 +25,6 @@
#include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h"
#include "webrtc/modules/audio_processing/aec3/fft_data.h"
#include "webrtc/modules/audio_processing/aec3/output_selector.h"
#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h"
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
@ -46,11 +46,6 @@ void LinearEchoPower(const FftData& E,
}
}
float BlockPower(const std::array<float, kBlockSize> x) {
return std::accumulate(x.begin(), x.end(), 0.f,
[](float a, float b) -> float { return a + b * b; });
}
// Class for removing the echo from the capture signal.
class EchoRemoverImpl final : public EchoRemover {
public:
@ -83,8 +78,6 @@ class EchoRemoverImpl final : public EchoRemover {
SuppressionGain suppression_gain_;
ComfortNoiseGenerator cng_;
SuppressionFilter suppression_filter_;
PowerEchoModel power_echo_model_;
RenderBuffer X_buffer_;
RenderSignalAnalyzer render_signal_analyzer_;
OutputSelector output_selector_;
ResidualEchoEstimator residual_echo_estimator_;
@ -106,12 +99,7 @@ EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz)
subtractor_(data_dumper_.get(), optimization_),
suppression_gain_(optimization_),
cng_(optimization_),
suppression_filter_(sample_rate_hz_),
X_buffer_(optimization_,
NumBandsForRate(sample_rate_hz_),
std::max(subtractor_.MinFarendBufferLength(),
power_echo_model_.MinFarendBufferLength()),
subtractor_.NumBlocksInRenderSums()) {
suppression_filter_(sample_rate_hz_) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
}
@ -134,23 +122,23 @@ void EchoRemoverImpl::ProcessCapture(
const std::vector<float>& x0 = x[0];
std::vector<float>& y0 = (*y)[0];
data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, &y0[0],
data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, &x0[0],
data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],
LowestBandRate(sample_rate_hz_), 1);
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
if (echo_path_variability.AudioPathChanged()) {
subtractor_.HandleEchoPathChange(echo_path_variability);
residual_echo_estimator_.HandleEchoPathChange(echo_path_variability);
aec_state_.HandleEchoPathChange(echo_path_variability);
}
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> S2_power;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> S2_linear;
std::array<float, kFftLengthBy2Plus1> G;
float high_bands_gain;
FftData Y;
FftData comfort_noise;
FftData high_band_comfort_noise;
@ -159,14 +147,13 @@ void EchoRemoverImpl::ProcessCapture(
auto& E2_main = subtractor_output.E2_main;
auto& E2_shadow = subtractor_output.E2_shadow;
auto& e_main = subtractor_output.e_main;
auto& e_shadow = subtractor_output.e_shadow;
// Analyze the render signal.
render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay());
// Perform linear echo cancellation.
subtractor_.Process(render_buffer, y0, render_signal_analyzer_,
aec_state_.SaturatedCapture(), &subtractor_output);
subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_,
&subtractor_output);
// Compute spectra.
fft_.ZeroPaddedFft(y0, &Y);
@ -175,36 +162,29 @@ void EchoRemoverImpl::ProcessCapture(
// Update the AEC state information.
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
echo_path_delay_samples, render_buffer, E2_main, E2_shadow,
Y2, x0, echo_path_variability, echo_leakage_detected_);
// Use the power model to estimate the echo.
// TODO(peah): Remove in upcoming CL.
// power_echo_model_.EstimateEcho(render_buffer, Y2, aec_state_, &S2_power);
S2_power.fill(0.f);
echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
echo_leakage_detected_);
// Choose the linear output.
output_selector_.FormLinearOutput(e_main, y0);
output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
// Estimate the residual echo power.
residual_echo_estimator_.Estimate(
output_selector_.UseSubtractorOutput(), aec_state_, render_buffer,
subtractor_.FilterFrequencyResponse(), E2_main, E2_shadow, S2_linear,
S2_power, Y2, &R2);
residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(),
aec_state_, render_buffer, S2_linear, Y2,
&R2);
// Estimate the comfort noise.
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
// Detect basic doubletalk.
const bool doubletalk = BlockPower(e_shadow) < BlockPower(e_main);
// A choose and apply echo suppression gain.
suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
doubletalk ? 0.001f : 0.0001f, &G);
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, y);
aec_state_.SaturatedEcho(), x, y->size(),
&high_bands_gain, &G);
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
high_bands_gain, y);
// Update the metrics.
metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);
@ -217,21 +197,16 @@ void EchoRemoverImpl::ProcessCapture(
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpRaw("aec3_using_subtractor_output",
output_selector_.UseSubtractorOutput() ? 1 : 0);
data_dumper_->DumpRaw("aec3_doubletalk", doubletalk ? 1 : 0);
data_dumper_->DumpRaw("aec3_E2", E2);
data_dumper_->DumpRaw("aec3_E2_main", E2_main);
data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow);
data_dumper_->DumpRaw("aec3_S2_linear", S2_linear);
data_dumper_->DumpRaw("aec3_S2_power", S2_power);
data_dumper_->DumpRaw("aec3_Y2", Y2);
data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0));
data_dumper_->DumpRaw("aec3_R2", R2);
data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle());
data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());
data_dumper_->DumpRaw("aec3_reliable_filter_bands",
aec_state_.BandsWithReliableFilter());
data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender());
data_dumper_->DumpRaw("aec3_model_based_aec_feasible",
aec_state_.ModelBasedAecFeasible());
data_dumper_->DumpRaw("aec3_usable_linear_estimate",
aec_state_.UsableLinearEstimate());
data_dumper_->DumpRaw(

View File

@ -221,9 +221,6 @@ void EchoRemoverMetrics::Update(
RTC_HISTOGRAM_BOOLEAN(
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
RTC_HISTOGRAM_BOOLEAN(
"WebRTC.Audio.EchoCanceller.ModelBasedAecFeasible",
static_cast<int>(aec_state.ModelBasedAecFeasible() ? 1 : 0));
RTC_HISTOGRAM_BOOLEAN(
"WebRTC.Audio.EchoCanceller.ActiveRender",
static_cast<int>(

View File

@ -49,13 +49,12 @@ void MainFilterUpdateGain::Compute(
FftData* gain_fft) {
RTC_DCHECK(gain_fft);
// Introducing shorter notation to improve readability.
const RenderBuffer& X_buffer = render_buffer;
const FftData& E_main = subtractor_output.E_main;
const auto& E2_main = subtractor_output.E2_main;
const auto& E2_shadow = subtractor_output.E2_shadow;
FftData* G = gain_fft;
const size_t size_partitions = filter.SizePartitions();
const auto& X2 = X_buffer.SpectralSum(size_partitions);
const auto& X2 = render_buffer.SpectralSum(size_partitions);
const auto& erl = filter.Erl();
++call_counter_;
@ -70,16 +69,15 @@ void MainFilterUpdateGain::Compute(
G->re.fill(0.f);
G->im.fill(0.f);
} else {
// Corresponds of WGN of power -46 dBFS.
constexpr float kX2Min = 44015068.0f;
// Corresponds to WGN of power -39 dBFS.
constexpr float kNoiseGatePower = 220075344.f;
std::array<float, kFftLengthBy2Plus1> mu;
// mu = H_error / (0.5* H_error* X2 + n * E2).
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
mu[k] =
X2[k] > kX2Min
? H_error_[k] /
(0.5f * H_error_[k] * X2[k] + size_partitions * E2_main[k])
: 0.f;
mu[k] = X2[k] > kNoiseGatePower
? H_error_[k] / (0.5f * H_error_[k] * X2[k] +
size_partitions * E2_main[k])
: 0.f;
}
// Avoid updating the filter close to narrow bands in the render signals.

View File

@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include <algorithm>
#include <numeric>
#include <string>
@ -20,7 +17,7 @@
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/fft_buffer.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
#include "webrtc/modules/audio_processing/aec3/subtractor_output.h"
@ -42,31 +39,30 @@ void RunFilterUpdateTest(int num_blocks_to_process,
std::array<float, kBlockSize>* y_last_block,
FftData* G_last_block) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper);
AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(),
std::vector<size_t>(1, main_filter.SizePartitions()));
RenderBuffer render_buffer(
Aec3Optimization::kNone, 3, main_filter.SizePartitions(),
std::vector<size_t>(1, main_filter.SizePartitions()));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
ShadowFilterUpdateGain shadow_gain;
MainFilterUpdateGain main_gain;
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
FftData X;
std::array<float, kFftLength> s;
FftData S;
FftData G;
SubtractorOutput output;
output.Reset();
FftData& E_main = output.E_main;
FftData& E_shadow = output.E_shadow;
FftData E_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1>& E2_main = output.E2_main;
std::array<float, kFftLengthBy2Plus1>& E2_shadow = output.E2_shadow;
std::array<float, kBlockSize>& e_main = output.e_main;
std::array<float, kBlockSize>& e_shadow = output.e_shadow;
Y2.fill(0.f);
@ -89,17 +85,16 @@ void RunFilterUpdateTest(int num_blocks_to_process,
// Create the render signal.
if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) {
std::fill(x.begin(), x.end(), 0.f);
std::fill(x[0].begin(), x[0].end(), 0.f);
} else {
RandomizeSampleVector(&random_generator, x);
RandomizeSampleVector(&random_generator, x[0]);
}
delay_buffer.Delay(x, y);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
delay_buffer.Delay(x[0], y);
render_buffer.Insert(x);
render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
// Apply the main filter.
main_filter.Filter(X_buffer, &S);
main_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_main.begin(),
@ -110,7 +105,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
fft.ZeroPaddedFft(e_main, &E_main);
// Apply the shadow filter.
shadow_filter.Filter(X_buffer, &S);
shadow_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_shadow.begin(),
@ -125,19 +120,20 @@ void RunFilterUpdateTest(int num_blocks_to_process,
E_shadow.Spectrum(Aec3Optimization::kNone, &output.E2_shadow);
// Adapt the shadow filter.
shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow,
shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow,
shadow_filter.SizePartitions(), saturation, &G);
shadow_filter.Adapt(X_buffer, G);
shadow_filter.Adapt(render_buffer, G);
// Adapt the main filter
main_gain.Compute(X_buffer, render_signal_analyzer, output, main_filter,
saturation, &G);
main_filter.Adapt(X_buffer, G);
main_gain.Compute(render_buffer, render_signal_analyzer, output,
main_filter, saturation, &G);
main_filter.Adapt(render_buffer, G);
// Update the delay.
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
rtc::Optional<size_t>(), X_buffer, E2_main, E2_shadow, Y2,
x, EchoPathVariability(false, false), false);
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
false);
}
std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
@ -159,14 +155,16 @@ std::string ProduceDebugText(size_t delay) {
// Verifies that the check for non-null output gain parameter works.
TEST(MainFilterUpdateGain, NullDataOutputGain) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
filter.SizePartitions(),
std::vector<size_t>(1, filter.SizePartitions()));
RenderSignalAnalyzer analyzer;
SubtractorOutput output;
MainFilterUpdateGain gain;
EXPECT_DEATH(gain.Compute(X_buffer, analyzer, output, filter, false, nullptr),
"");
EXPECT_DEATH(
gain.Compute(render_buffer, analyzer, output, filter, false, nullptr),
"");
}
#endif
@ -288,5 +286,3 @@ TEST(MainFilterUpdateGain, EchoPathChangeBehavior) {
}
} // namespace webrtc
#endif

View File

@ -34,11 +34,6 @@ void SmoothFrameTransition(bool from_y_to_e,
RTC_DCHECK_EQ(from_y_to_e ? 1.f : 0.f, averaging);
}
float BlockPower(rtc::ArrayView<const float> x) {
return std::accumulate(x.begin(), x.end(), 0.f,
[](float a, float b) -> float { return a + b * b; });
}
} // namespace
OutputSelector::OutputSelector() = default;
@ -46,24 +41,16 @@ OutputSelector::OutputSelector() = default;
OutputSelector::~OutputSelector() = default;
void OutputSelector::FormLinearOutput(
bool use_subtractor_output,
rtc::ArrayView<const float> subtractor_output,
rtc::ArrayView<float> capture) {
RTC_DCHECK_EQ(subtractor_output.size(), capture.size());
rtc::ArrayView<const float>& e_main = subtractor_output;
rtc::ArrayView<float> y = capture;
const bool subtractor_output_is_best =
BlockPower(y) > 1.5f * BlockPower(e_main);
output_change_counter_ = subtractor_output_is_best != use_subtractor_output_
? output_change_counter_ + 1
: 0;
if (subtractor_output_is_best != use_subtractor_output_ &&
((subtractor_output_is_best && output_change_counter_ > 3) ||
(!subtractor_output_is_best && output_change_counter_ > 10))) {
use_subtractor_output_ = subtractor_output_is_best;
if (use_subtractor_output != use_subtractor_output_) {
use_subtractor_output_ = use_subtractor_output;
SmoothFrameTransition(use_subtractor_output_, e_main, y);
output_change_counter_ = 0;
} else if (use_subtractor_output_) {
std::copy(e_main.begin(), e_main.end(), y.begin());
}

View File

@ -24,7 +24,8 @@ class OutputSelector {
~OutputSelector();
// Forms the most appropriate output signal.
void FormLinearOutput(rtc::ArrayView<const float> subtractor_output,
void FormLinearOutput(bool use_subtractor_output,
rtc::ArrayView<const float> subtractor_output,
rtc::ArrayView<float> capture);
// Returns true if the linear aec output is the one used.
@ -32,7 +33,6 @@ class OutputSelector {
private:
bool use_subtractor_output_ = false;
int output_change_counter_ = 0;
RTC_DISALLOW_COPY_AND_ASSIGN(OutputSelector);
};

View File

@ -23,49 +23,47 @@ namespace webrtc {
TEST(OutputSelector, ProperSwitching) {
OutputSelector selector;
constexpr int kNumBlocksToSwitchToSubtractor = 3;
constexpr int kNumBlocksToSwitchFromSubtractor = 10;
std::array<float, kBlockSize> weaker;
std::array<float, kBlockSize> stronger;
std::array<float, kBlockSize> y;
std::array<float, kBlockSize> e;
weaker.fill(10.f);
stronger.fill(20.f);
bool y_is_weakest = false;
const auto form_e_and_y = [&](bool y_equals_weaker) {
if (y_equals_weaker) {
std::copy(weaker.begin(), weaker.end(), y.begin());
std::copy(stronger.begin(), stronger.end(), e.begin());
} else {
std::copy(stronger.begin(), stronger.end(), y.begin());
std::copy(weaker.begin(), weaker.end(), e.begin());
}
std::array<float, kBlockSize> e_ref;
std::array<float, kBlockSize> y_ref;
auto init_blocks = [](std::array<float, kBlockSize>* e,
std::array<float, kBlockSize>* y) {
e->fill(10.f);
y->fill(20.f);
};
for (int k = 0; k < 30; ++k) {
// Verify that it takes a while for the signals transition to take effect.
const int num_blocks_to_switch = y_is_weakest
? kNumBlocksToSwitchFromSubtractor
: kNumBlocksToSwitchToSubtractor;
for (int j = 0; j < num_blocks_to_switch; ++j) {
form_e_and_y(y_is_weakest);
selector.FormLinearOutput(e, y);
EXPECT_EQ(stronger, y);
EXPECT_EQ(y_is_weakest, selector.UseSubtractorOutput());
}
init_blocks(&e_ref, &y_ref);
// Verify that the transition block is a mix between the signals.
form_e_and_y(y_is_weakest);
selector.FormLinearOutput(e, y);
EXPECT_NE(weaker, y);
EXPECT_NE(stronger, y);
EXPECT_EQ(!y_is_weakest, selector.UseSubtractorOutput());
init_blocks(&e, &y);
selector.FormLinearOutput(false, e, y);
EXPECT_EQ(y_ref, y);
y_is_weakest = !y_is_weakest;
}
init_blocks(&e, &y);
selector.FormLinearOutput(true, e, y);
EXPECT_NE(e_ref, y);
EXPECT_NE(y_ref, y);
init_blocks(&e, &y);
selector.FormLinearOutput(true, e, y);
EXPECT_EQ(e_ref, y);
init_blocks(&e, &y);
selector.FormLinearOutput(true, e, y);
EXPECT_EQ(e_ref, y);
init_blocks(&e, &y);
selector.FormLinearOutput(false, e, y);
EXPECT_NE(e_ref, y);
EXPECT_NE(y_ref, y);
init_blocks(&e, &y);
selector.FormLinearOutput(false, e, y);
EXPECT_EQ(y_ref, y);
init_blocks(&e, &y);
selector.FormLinearOutput(false, e, y);
EXPECT_EQ(y_ref, y);
}
} // namespace webrtc

View File

@ -1,111 +0,0 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
#include <string.h>
#include <algorithm>
#include "webrtc/base/optional.h"
namespace webrtc {
namespace {
// Computes the spectral power over that last 20 frames.
void RecentMaximum(const RenderBuffer& X_buffer,
std::array<float, kFftLengthBy2Plus1>* R2) {
R2->fill(0.f);
for (size_t j = 0; j < 20; ++j) {
std::transform(R2->begin(), R2->end(), X_buffer.Spectrum(j).begin(),
R2->begin(),
[](float a, float b) { return std::max(a, b); });
}
}
constexpr float kHInitial = 10.f;
constexpr int kUpdateCounterInitial = 300;
} // namespace
PowerEchoModel::PowerEchoModel() {
H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial));
}
PowerEchoModel::~PowerEchoModel() = default;
void PowerEchoModel::HandleEchoPathChange(
const EchoPathVariability& variability) {
if (variability.gain_change) {
H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial));
}
}
void PowerEchoModel::EstimateEcho(
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& capture_spectrum,
const AecState& aec_state,
std::array<float, kFftLengthBy2Plus1>* echo_spectrum) {
RTC_DCHECK(echo_spectrum);
const RenderBuffer& X_buffer = render_buffer;
const auto& Y2 = capture_spectrum;
std::array<float, kFftLengthBy2Plus1>* S2 = echo_spectrum;
// Choose delay to use.
const rtc::Optional<size_t> delay =
aec_state.FilterDelay()
? aec_state.FilterDelay()
: (aec_state.ExternalDelay() ? rtc::Optional<size_t>(std::min<size_t>(
*aec_state.ExternalDelay(),
X_buffer.Buffer().size() - 1))
: rtc::Optional<size_t>());
// Compute R2.
std::array<float, kFftLengthBy2Plus1> render_max;
if (!delay) {
RecentMaximum(render_buffer, &render_max);
}
const std::array<float, kFftLengthBy2Plus1>& X2_active =
delay ? render_buffer.Spectrum(*delay) : render_max;
if (!aec_state.SaturatedCapture()) {
// Corresponds of WGN of power -46dBFS.
constexpr float kX2Min = 44015068.0f;
const int max_update_counter_value = delay ? 300 : 500;
std::array<float, kFftLengthBy2Plus1> new_H2;
// new_H2 = Y2 / X2.
std::transform(X2_active.begin(), X2_active.end(), Y2.begin(),
new_H2.begin(),
[&](float a, float b) { return a > kX2Min ? b / a : -1.f; });
// Lambda for updating H2 in a maximum statistics manner.
auto H2_updater = [&](float a, CountedFloat b) {
if (a > 0) {
if (a > b.value) {
b.counter = max_update_counter_value;
b.value = a;
} else if (--b.counter <= 0) {
b.value = std::max(b.value * 0.9f, 1.f);
}
}
return b;
};
std::transform(new_H2.begin(), new_H2.end(), H2_.begin(), H2_.begin(),
H2_updater);
}
// S2 = H2*X2_active.
std::transform(H2_.begin(), H2_.end(), X2_active.begin(), S2->begin(),
[](CountedFloat a, float b) { return a.value * b; });
}
} // namespace webrtc

View File

@ -1,61 +0,0 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_
#include <array>
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/optional.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
namespace webrtc {
// Provides an echo model based on power spectral estimates that estimates the
// echo spectrum.
class PowerEchoModel {
public:
PowerEchoModel();
~PowerEchoModel();
// Ajusts the model according to echo path changes.
void HandleEchoPathChange(const EchoPathVariability& variability);
// Updates the echo model and estimates the echo spectrum.
void EstimateEcho(
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& capture_spectrum,
const AecState& aec_state,
std::array<float, kFftLengthBy2Plus1>* echo_spectrum);
// Returns the minimum required farend buffer length.
size_t MinFarendBufferLength() const { return kRenderBufferSize; }
private:
// Provides a float value that is coupled with a counter.
struct CountedFloat {
CountedFloat() : value(0.f), counter(0) {}
CountedFloat(float value, int counter) : value(value), counter(counter) {}
float value;
int counter;
};
const size_t kRenderBufferSize = 100;
std::array<CountedFloat, kFftLengthBy2Plus1> H2_;
RTC_DISALLOW_COPY_AND_ASSIGN(PowerEchoModel);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
#include <array>
#include <string>
#include <vector>
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
#include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null output parameter works.
TEST(PowerEchoModel, NullEstimateEchoOutput) {
PowerEchoModel model;
std::array<float, kFftLengthBy2Plus1> Y2;
AecState aec_state;
RenderBuffer X_buffer(Aec3Optimization::kNone, 3,
model.MinFarendBufferLength(),
std::vector<size_t>(1, model.MinFarendBufferLength()));
EXPECT_DEATH(model.EstimateEcho(X_buffer, Y2, aec_state, nullptr), "");
}
#endif
} // namespace webrtc

View File

@ -102,10 +102,11 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands)
: optimization_(DetectOptimization()),
fft_buffer_(optimization_,
num_bands,
std::max(30, kAdaptiveFilterLength),
std::vector<size_t>(1, kAdaptiveFilterLength)),
fft_buffer_(
optimization_,
num_bands,
std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength),
std::vector<size_t>(1, kAdaptiveFilterLength)),
api_call_jitter_buffer_(num_bands) {
buffer_.fill(std::vector<std::vector<float>>(
num_bands, std::vector<float>(kBlockSize, 0.f)));
@ -175,23 +176,19 @@ void RenderDelayBufferImpl::SetDelay(size_t delay) {
// If there is a new delay set, clear the fft buffer.
fft_buffer_.Clear();
const size_t max_delay = buffer_.size() - 1;
if (max_delay < delay) {
if ((buffer_.size() - 1) < delay) {
// If the desired delay is larger than the delay buffer, shorten the delay
// buffer size to achieve the desired alignment with the available buffer
// size.
const size_t delay_decrease = delay - max_delay;
RTC_DCHECK_LT(delay_decrease, buffer_.size());
downsampled_render_buffer_.position =
(downsampled_render_buffer_.position + kSubBlockSize * delay_decrease) %
(downsampled_render_buffer_.position +
kSubBlockSize * (delay - (buffer_.size() - 1))) %
downsampled_render_buffer_.buffer.size();
last_insert_index_ =
(last_insert_index_ + buffer_.size() - delay_decrease) % buffer_.size();
RTC_DCHECK_EQ(max_delay, delay_ - delay_decrease);
delay_ = max_delay;
(last_insert_index_ - (delay - (buffer_.size() - 1)) + buffer_.size()) %
buffer_.size();
delay_ = buffer_.size() - 1;
} else {
delay_ = delay;
}

View File

@ -110,7 +110,7 @@ size_t RenderDelayControllerImpl::GetDelay(
// Compute and set new render delay buffer delay.
const size_t new_delay =
ComputeNewBufferDelay(delay_, echo_path_delay_samples_);
if (new_delay != delay_ && align_call_counter_ > 250) {
if (new_delay != delay_ && align_call_counter_ > kNumBlocksPerSecond) {
delay_ = new_delay;
}
@ -119,7 +119,7 @@ size_t RenderDelayControllerImpl::GetDelay(
const int headroom = echo_path_delay_samples_ - delay_ * kBlockSize;
RTC_DCHECK_LE(0, headroom);
headroom_samples_ = rtc::Optional<size_t>(headroom);
} else if (++blocks_since_last_delay_estimate_ > 250 * 20) {
} else if (++blocks_since_last_delay_estimate_ > 20 * kNumBlocksPerSecond) {
headroom_samples_ = rtc::Optional<size_t>();
}

View File

@ -52,7 +52,7 @@ void RenderDelayControllerMetrics::Update(rtc::Optional<size_t> delay_samples,
delay_blocks_ = delay_blocks;
}
}
} else if (++initial_call_counter_ == 5 * 250) {
} else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) {
initial_update = false;
}

View File

@ -28,7 +28,7 @@ class RenderSignalAnalyzer {
~RenderSignalAnalyzer();
// Updates the render signal analysis with the most recent render signal.
void Update(const RenderBuffer& X_buffer,
void Update(const RenderBuffer& render_buffer,
const rtc::Optional<size_t>& delay_partitions);
// Returns true if the render signal is poorly exciting.

View File

@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include <math.h>
#include <array>
#include <vector>
@ -21,8 +18,8 @@
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
#include "webrtc/modules/audio_processing/aec3/fft_buffer.h"
#include "webrtc/modules/audio_processing/aec3/fft_data.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h"
#include "webrtc/test/gtest.h"
@ -59,19 +56,20 @@ TEST(RenderSignalAnalyzer, NullMaskOutput) {
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
RenderSignalAnalyzer analyzer;
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::array<float, kBlockSize> x_old;
FftData X;
Aec3Fft fft;
FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
std::vector<size_t>(1, 1));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
for (size_t k = 0; k < 100; ++k) {
RandomizeSampleVector(&random_generator, x);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
analyzer.Update(X_buffer, rtc::Optional<size_t>(0));
RandomizeSampleVector(&random_generator, x[0]);
fft.PaddedFft(x[0], x_old, &X);
render_buffer.Insert(x);
analyzer.Update(render_buffer, rtc::Optional<size_t>(0));
}
mask.fill(1.f);
@ -85,11 +83,11 @@ TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
RenderSignalAnalyzer analyzer;
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::array<float, kBlockSize> x_old;
FftData X;
Aec3Fft fft;
FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
std::vector<size_t>(1, 1));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
constexpr int kSinusFrequencyBin = 32;
@ -98,12 +96,10 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
size_t sample_counter = 0;
for (size_t k = 0; k < 100; ++k) {
ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
&sample_counter, x);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
analyzer.Update(
X_buffer,
known_delay ? rtc::Optional<size_t>(0) : rtc::Optional<size_t>());
&sample_counter, x[0]);
render_buffer.Insert(x);
analyzer.Update(render_buffer, known_delay ? rtc::Optional<size_t>(0)
: rtc::Optional<size_t>());
}
};
@ -124,5 +120,3 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
}
} // namespace webrtc
#endif

View File

@ -10,7 +10,7 @@
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
#include <math.h>
#include <numeric>
#include <vector>
#include "webrtc/base/checks.h"
@ -18,143 +18,75 @@
namespace webrtc {
namespace {
constexpr float kSaturationLeakageFactor = 10.f;
constexpr size_t kSaturationLeakageBlocks = 10;
constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250;
// Estimates the residual echo power when there is no detection correlation
// between the render and capture signals.
void InfiniteErlPowerEstimate(
size_t active_render_blocks,
size_t blocks_since_last_saturation,
const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
std::array<float, kFftLengthBy2Plus1>* R2) {
if (active_render_blocks > 20 * 250) {
// After an amount of active render samples for which an echo should have
// been detected in the capture signal if the ERL was not infinite, set the
// residual echo to 0.
R2->fill(0.f);
} else {
// Before certainty has been reached about the presence of echo, use the
// fallback echo power estimate as the residual echo estimate. Add a leakage
// factor when there is saturation.
std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin());
if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
std::for_each(R2->begin(), R2->end(),
[](float& a) { a *= kSaturationLeakageFactor; });
}
// Estimates the echo generating signal power as gated maximal power over a time
// window.
void EchoGeneratingPower(const RenderBuffer& render_buffer,
size_t min_delay,
size_t max_delay,
std::array<float, kFftLengthBy2Plus1>* X2) {
X2->fill(0.f);
for (size_t k = min_delay; k <= max_delay; ++k) {
std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
X2->begin(),
[](float a, float b) { return std::max(a, b); });
}
// Apply soft noise gate of -78 dBFS.
constexpr float kNoiseGatePower = 27509.42f;
std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) {
if (kNoiseGatePower > a) {
a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
}
});
}
// Estimates the echo power in an half-duplex manner.
void HalfDuplexPowerEstimate(bool active_render,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) {
// Set the residual echo power to the power of the capture signal.
if (active_render) {
std::copy(Y2.begin(), Y2.end(), R2->begin());
} else {
R2->fill(0.f);
}
}
// Estimates the residual echo power based on gains.
void GainBasedPowerEstimate(
size_t external_delay,
const RenderBuffer& X_buffer,
size_t blocks_since_last_saturation,
size_t active_render_blocks,
const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
std::array<float, kFftLengthBy2Plus1>* R2) {
const auto& X2 = X_buffer.Spectrum(external_delay);
// Base the residual echo power on gain of the linear echo path estimate if
// that is reliable, otherwise use the fallback echo path estimate. Add a
// leakage factor when there is saturation.
if (active_render_blocks > kEchoPathChangeConvergenceBlocks) {
for (size_t k = 0; k < R2->size(); ++k) {
(*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k]
: S2_fallback[k];
}
} else {
for (size_t k = 0; k < R2->size(); ++k) {
(*R2)[k] = S2_fallback[k];
}
}
if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
std::for_each(R2->begin(), R2->end(),
[](float& a) { a *= kSaturationLeakageFactor; });
}
}
// Estimates the residual echo power based on the linear echo path.
void ErleBasedPowerEstimate(
bool headset_detected,
const RenderBuffer& X_buffer,
bool using_subtractor_output,
size_t linear_filter_based_delay,
size_t blocks_since_last_saturation,
bool poorly_aligned_filter,
const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
// Estimates the residual echo power based on the erle and the linear power
// estimate.
void LinearResidualPowerEstimate(
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
const std::array<float, kFftLengthBy2Plus1>& Y2,
const std::array<float, kFftLengthBy2Plus1>& erle,
const std::array<float, kFftLengthBy2Plus1>& erl,
std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
std::array<float, kFftLengthBy2Plus1>* R2) {
// Residual echo power after saturation.
if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
for (size_t k = 0; k < R2->size(); ++k) {
(*R2)[k] = kSaturationLeakageFactor *
(bands_with_reliable_filter[k] && using_subtractor_output
? S2_linear[k]
: std::min(S2_fallback[k], Y2[k]));
}
return;
}
std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f);
std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
[](float a, float b) {
RTC_DCHECK_LT(0.f, a);
return b / a;
});
}
// Residual echo power when a headset is used.
if (headset_detected) {
const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
for (size_t k = 0; k < R2->size(); ++k) {
RTC_DCHECK_LT(0.f, erle[k]);
(*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
? S2_linear[k] / erle[k]
: std::min(S2_fallback[k], Y2[k]);
(*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]);
}
return;
}
// Estimates the residual echo power based on the estimate of the echo path
// gain.
void NonLinearResidualPowerEstimate(
const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2,
const std::array<float, kFftLengthBy2Plus1>& R2_old,
std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
std::array<float, kFftLengthBy2Plus1>* R2) {
// Compute preliminary residual echo.
// TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
// 20 dB.
std::transform(X2.begin(), X2.end(), R2->begin(),
[](float a) { return a * kFixedEchoPathGain; });
// Residual echo power when the adaptive filter is poorly aligned.
if (poorly_aligned_filter) {
for (size_t k = 0; k < R2->size(); ++k) {
(*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
? S2_linear[k]
: std::min(S2_fallback[k], Y2[k]);
}
return;
}
// Residual echo power when there is no recent saturation, no headset detected
// and when the adaptive filter is well aligned.
for (size_t k = 0; k < R2->size(); ++k) {
RTC_DCHECK_LT(0.f, erle[k]);
const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
(*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
? S2_linear[k] / erle[k]
: std::min(echo_path_gain[k] * X2[k], Y2[k]);
// Update hold counter.
(*R2_hold_counter)[k] =
R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1;
// Compute the residual echo by holding a maximum echo powers and an echo
// fading corresponding to a room with an RT60 value of about 50 ms.
(*R2)[k] = (*R2_hold_counter)[k] < 2
? std::max((*R2)[k], R2_old[k])
: std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]);
}
}
} // namespace
ResidualEchoEstimator::ResidualEchoEstimator() {
echo_path_gain_.fill(100.f);
R2_old_.fill(0.f);
R2_hold_counter_.fill(0);
}
ResidualEchoEstimator::~ResidualEchoEstimator() = default;
@ -162,71 +94,53 @@ ResidualEchoEstimator::~ResidualEchoEstimator() = default;
void ResidualEchoEstimator::Estimate(
bool using_subtractor_output,
const AecState& aec_state,
const RenderBuffer& X_buffer,
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) {
RTC_DCHECK(R2);
const rtc::Optional<size_t>& linear_filter_based_delay =
aec_state.FilterDelay();
// Update the echo path gain.
if (linear_filter_based_delay) {
std::copy(H2[*linear_filter_based_delay].begin(),
H2[*linear_filter_based_delay].end(), echo_path_gain_.begin());
constexpr float kEchoPathGainHeadroom = 10.f;
std::for_each(
echo_path_gain_.begin(), echo_path_gain_.end(),
[kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; });
// Return zero residual echo power when a headset is detected.
if (aec_state.HeadsetDetected()) {
R2->fill(0.f);
R2_old_.fill(0.f);
R2_hold_counter_.fill(0.f);
return;
}
// Counts the blocks since saturation.
if (aec_state.SaturatedCapture()) {
blocks_since_last_saturation_ = 0;
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
if (aec_state.ExternalDelay() || aec_state.FilterDelay()) {
const int delay =
static_cast<int>(aec_state.FilterDelay() ? *aec_state.FilterDelay()
: *aec_state.ExternalDelay());
// Computes the spectral power over that blocks surrounding the delauy..
EchoGeneratingPower(
render_buffer, std::max(0, delay - 1),
std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2);
} else {
++blocks_since_last_saturation_;
// Computes the spectral power over that last 30 blocks.
EchoGeneratingPower(render_buffer, 0,
kResidualEchoPowerRenderWindowSize - 1, &X2);
}
const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter();
if (aec_state.UsableLinearEstimate()) {
// Residual echo power estimation when the adaptive filter is reliable.
RTC_DCHECK(linear_filter_based_delay);
ErleBasedPowerEstimate(
aec_state.HeadsetDetected(), X_buffer, using_subtractor_output,
*linear_filter_based_delay, blocks_since_last_saturation_,
aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter,
echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(),
aec_state.Erl(), R2);
} else if (aec_state.ModelBasedAecFeasible()) {
// Residual echo power when the adaptive filter is not reliable but still an
// external echo path delay is provided (and hence can be estimated).
RTC_DCHECK(aec_state.ExternalDelay());
GainBasedPowerEstimate(
*aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_,
aec_state.ActiveRenderBlocks(), bands_with_reliable_filter,
echo_path_gain_, S2_fallback, R2);
} else if (aec_state.EchoLeakageDetected()) {
// Residual echo power when an external residual echo detection algorithm
// has deemed the echo canceller to leak echoes.
HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2);
// Estimate the residual echo power.
if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) {
LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_,
R2);
} else {
// Residual echo power when none of the other cases are fulfilled.
InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(),
blocks_since_last_saturation_, S2_fallback, R2);
NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2);
}
}
void ResidualEchoEstimator::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
if (echo_path_variability.AudioPathChanged()) {
blocks_since_last_saturation_ = 0;
echo_path_gain_.fill(100.f);
// If the echo is saturated, estimate the echo power as the maximum echo power
// with a leakage factor.
if (aec_state.SaturatedEcho()) {
constexpr float kSaturationLeakageFactor = 100.f;
R2->fill((*std::max_element(R2->begin(), R2->end())) *
kSaturationLeakageFactor);
}
std::copy(R2->begin(), R2->end(), R2_old_.begin());
}
} // namespace webrtc

View File

@ -30,20 +30,14 @@ class ResidualEchoEstimator {
void Estimate(bool using_subtractor_output,
const AecState& aec_state,
const RenderBuffer& X_buffer,
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2);
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
private:
std::array<float, kFftLengthBy2Plus1> echo_path_gain_;
size_t blocks_since_last_saturation_ = 1000;
std::array<float, kFftLengthBy2Plus1> R2_old_;
std::array<int, kFftLengthBy2Plus1> R2_hold_counter_;
RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator);
};

View File

@ -10,8 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
@ -22,20 +20,16 @@ namespace webrtc {
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null output gains works.
TEST(ResidualEchoEstimator, NullOutputGains) {
// Verifies that the check for non-null output residual echo power works.
TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) {
AecState aec_state;
FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector<size_t>(1, 10));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10,
std::vector<size_t>(1, 10));
std::vector<std::array<float, kFftLengthBy2Plus1>> H2;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> S2_linear;
std::array<float, kFftLengthBy2Plus1> S2_fallback;
std::array<float, kFftLengthBy2Plus1> Y2;
EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, X_buffer, H2,
E2_main, E2_shadow, S2_linear,
S2_fallback, Y2, nullptr),
EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, render_buffer,
S2_linear, Y2, nullptr),
"");
}
@ -44,7 +38,8 @@ TEST(ResidualEchoEstimator, NullOutputGains) {
TEST(ResidualEchoEstimator, BasicTest) {
ResidualEchoEstimator estimator;
AecState aec_state;
FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector<size_t>(1, 10));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10,
std::vector<size_t>(1, 10));
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> S2_linear;
@ -52,7 +47,7 @@ TEST(ResidualEchoEstimator, BasicTest) {
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> R2;
EchoPathVariability echo_path_variability(false, false);
std::array<float, kBlockSize> x;
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<std::array<float, kFftLengthBy2Plus1>> H2(10);
Random random_generator(42U);
FftData X;
@ -63,6 +58,7 @@ TEST(ResidualEchoEstimator, BasicTest) {
H2_k.fill(0.01f);
}
H2[2].fill(10.f);
H2[2][0] = 0.1f;
constexpr float kLevel = 10.f;
E2_shadow.fill(kLevel);
@ -71,21 +67,20 @@ TEST(ResidualEchoEstimator, BasicTest) {
S2_fallback.fill(kLevel);
Y2.fill(kLevel);
for (int k = 0; k < 100; ++k) {
RandomizeSampleVector(&random_generator, x);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
for (int k = 0; k < 2000; ++k) {
RandomizeSampleVector(&random_generator, x[0]);
std::for_each(x[0].begin(), x[0].end(), [](float& a) { a /= 30.f; });
fft.PaddedFft(x[0], x_old, &X);
render_buffer.Insert(x);
aec_state.Update(H2, rtc::Optional<size_t>(2), X_buffer, E2_main, E2_shadow,
Y2, x, echo_path_variability, false);
aec_state.HandleEchoPathChange(echo_path_variability);
aec_state.Update(H2, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2,
x[0], false);
estimator.Estimate(true, aec_state, X_buffer, H2, E2_main, E2_shadow,
S2_linear, S2_fallback, Y2, &R2);
estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
}
std::for_each(R2.begin(), R2.end(),
[&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); });
}
} // namespace webrtc
#endif

View File

@ -18,7 +18,7 @@
namespace webrtc {
void ShadowFilterUpdateGain::Compute(
const RenderBuffer& X_buffer,
const RenderBuffer& render_buffer,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_shadow,
size_t size_partitions,
@ -40,12 +40,14 @@ void ShadowFilterUpdateGain::Compute(
}
// Compute mu.
constexpr float kX2Min = 44015068.0f;
// Corresponds to WGN of power -39 dBFS.
constexpr float kNoiseGatePower = 220075344.f;
constexpr float kMuFixed = .5f;
std::array<float, kFftLengthBy2Plus1> mu;
const auto& X2 = X_buffer.SpectralSum(size_partitions);
std::transform(X2.begin(), X2.end(), mu.begin(),
[&](float a) { return a > kX2Min ? kMuFixed / a : 0.f; });
const auto& X2 = render_buffer.SpectralSum(size_partitions);
std::transform(X2.begin(), X2.end(), mu.begin(), [&](float a) {
return a > kNoiseGatePower ? kMuFixed / a : 0.f;
});
// Avoid updating the filter close to narrow bands in the render signals.
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);

View File

@ -22,7 +22,7 @@ namespace webrtc {
class ShadowFilterUpdateGain {
public:
// Computes the gain.
void Compute(const RenderBuffer& X_buffer,
void Compute(const RenderBuffer& render_buffer,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_shadow,
size_t size_partitions,

View File

@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include <algorithm>
#include <numeric>
#include <string>
@ -37,20 +34,20 @@ void RunFilterUpdateTest(int num_blocks_to_process,
std::array<float, kBlockSize>* y_last_block,
FftData* G_last_block) {
ApmDataDumper data_dumper(42);
AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper);
AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper);
AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(),
std::vector<size_t>(1, main_filter.SizePartitions()));
RenderBuffer render_buffer(
Aec3Optimization::kNone, 3, main_filter.SizePartitions(),
std::vector<size_t>(1, main_filter.SizePartitions()));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
ShadowFilterUpdateGain shadow_gain;
Random random_generator(42U);
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
FftData X;
std::array<float, kFftLength> s;
FftData S;
FftData G;
@ -67,14 +64,13 @@ void RunFilterUpdateTest(int num_blocks_to_process,
k) != blocks_with_saturation.end();
// Create the render signal.
RandomizeSampleVector(&random_generator, x);
delay_buffer.Delay(x, y);
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
RandomizeSampleVector(&random_generator, x[0]);
delay_buffer.Delay(x[0], y);
render_buffer.Insert(x);
render_signal_analyzer.Update(
X_buffer, rtc::Optional<size_t>(delay_samples / kBlockSize));
render_buffer, rtc::Optional<size_t>(delay_samples / kBlockSize));
shadow_filter.Filter(X_buffer, &S);
shadow_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_shadow.begin(),
@ -84,9 +80,9 @@ void RunFilterUpdateTest(int num_blocks_to_process,
});
fft.ZeroPaddedFft(e_shadow, &E_shadow);
shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow,
shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow,
shadow_filter.SizePartitions(), saturation, &G);
shadow_filter.Adapt(X_buffer, G);
shadow_filter.Adapt(render_buffer, G);
}
std::copy(e_shadow.begin(), e_shadow.end(), e_last_block->begin());
@ -108,11 +104,12 @@ std::string ProduceDebugText(size_t delay) {
// Verifies that the check for non-null output gain parameter works.
TEST(ShadowFilterUpdateGain, NullDataOutputGain) {
ApmDataDumper data_dumper(42);
FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
std::vector<size_t>(1, 1));
RenderSignalAnalyzer analyzer;
FftData E;
ShadowFilterUpdateGain gain;
EXPECT_DEATH(gain.Compute(X_buffer, analyzer, E, 1, false, nullptr), "");
EXPECT_DEATH(gain.Compute(render_buffer, analyzer, E, 1, false, nullptr), "");
}
#endif
@ -188,5 +185,3 @@ TEST(ShadowFilterUpdateGain, SaturationBehavior) {
}
} // namespace webrtc
#endif

View File

@ -20,11 +20,11 @@ namespace webrtc {
namespace {
void ComputeError(const Aec3Fft& fft,
const FftData& S,
rtc::ArrayView<const float> y,
std::array<float, kBlockSize>* e,
FftData* E) {
void PredictionError(const Aec3Fft& fft,
const FftData& S,
rtc::ArrayView<const float> y,
std::array<float, kBlockSize>* e,
FftData* E) {
std::array<float, kFftLength> s;
fft.Ifft(S, &s);
constexpr float kScale = 1.0f / kFftLengthBy2;
@ -37,24 +37,13 @@ void ComputeError(const Aec3Fft& fft,
}
} // namespace
std::vector<size_t> Subtractor::NumBlocksInRenderSums() const {
if (kMainFilterSizePartitions != kShadowFilterSizePartitions) {
return {kMainFilterSizePartitions, kShadowFilterSizePartitions};
} else {
return {kMainFilterSizePartitions};
}
}
Subtractor::Subtractor(ApmDataDumper* data_dumper,
Aec3Optimization optimization)
: fft_(),
data_dumper_(data_dumper),
optimization_(optimization),
main_filter_(kMainFilterSizePartitions, true, optimization, data_dumper_),
shadow_filter_(kShadowFilterSizePartitions,
false,
optimization,
data_dumper_) {
main_filter_(kAdaptiveFilterLength, optimization, data_dumper_),
shadow_filter_(kAdaptiveFilterLength, optimization, data_dumper_) {
RTC_DCHECK(data_dumper_);
}
@ -72,42 +61,43 @@ void Subtractor::HandleEchoPathChange(
void Subtractor::Process(const RenderBuffer& render_buffer,
const rtc::ArrayView<const float> capture,
const RenderSignalAnalyzer& render_signal_analyzer,
bool saturation,
const AecState& aec_state,
SubtractorOutput* output) {
RTC_DCHECK_EQ(kBlockSize, capture.size());
rtc::ArrayView<const float> y = capture;
const RenderBuffer& X_buffer = render_buffer;
FftData& E_main = output->E_main;
FftData& E_shadow = output->E_shadow;
FftData E_shadow;
std::array<float, kBlockSize>& e_main = output->e_main;
std::array<float, kBlockSize>& e_shadow = output->e_shadow;
FftData S;
FftData& G = S;
// Form and analyze the output of the main filter.
main_filter_.Filter(X_buffer, &S);
ComputeError(fft_, S, y, &e_main, &E_main);
// Form the output of the main filter.
main_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_main, &E_main);
// Form and analyze the output of the shadow filter.
shadow_filter_.Filter(X_buffer, &S);
ComputeError(fft_, S, y, &e_shadow, &E_shadow);
// Form the output of the shadow filter.
shadow_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_shadow, &E_shadow);
// Compute spectra for future use.
E_main.Spectrum(optimization_, &output->E2_main);
E_shadow.Spectrum(optimization_, &output->E2_shadow);
// Update the main filter.
G_main_.Compute(X_buffer, render_signal_analyzer, *output, main_filter_,
saturation, &G);
main_filter_.Adapt(X_buffer, G);
G_main_.Compute(render_buffer, render_signal_analyzer, *output, main_filter_,
aec_state.SaturatedCapture(), &G);
main_filter_.Adapt(render_buffer, G);
data_dumper_->DumpRaw("aec3_subtractor_G_main", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_main", G.im);
// Update the shadow filter.
G_shadow_.Compute(X_buffer, render_signal_analyzer, E_shadow,
shadow_filter_.SizePartitions(), saturation, &G);
shadow_filter_.Adapt(X_buffer, G);
G_shadow_.Compute(render_buffer, render_signal_analyzer, E_shadow,
shadow_filter_.SizePartitions(),
aec_state.SaturatedCapture(), &G);
shadow_filter_.Adapt(render_buffer, G);
data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.im);

View File

@ -19,6 +19,7 @@
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
#include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
@ -39,18 +40,9 @@ class Subtractor {
void Process(const RenderBuffer& render_buffer,
const rtc::ArrayView<const float> capture,
const RenderSignalAnalyzer& render_signal_analyzer,
bool saturation,
const AecState& aec_state,
SubtractorOutput* output);
// Returns a vector with the number of blocks included in the render buffer
// sums.
std::vector<size_t> NumBlocksInRenderSums() const;
// Returns the minimum required farend buffer length.
size_t MinFarendBufferLength() const {
return std::max(kMainFilterSizePartitions, kShadowFilterSizePartitions);
}
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
// Returns the block-wise frequency response of the main adaptive filter.
@ -60,9 +52,6 @@ class Subtractor {
}
private:
const size_t kMainFilterSizePartitions = 12;
const size_t kShadowFilterSizePartitions = 12;
const Aec3Fft fft_;
ApmDataDumper* data_dumper_;
const Aec3Optimization optimization_;

View File

@ -23,7 +23,6 @@ struct SubtractorOutput {
std::array<float, kBlockSize> e_main;
std::array<float, kBlockSize> e_shadow;
FftData E_main;
FftData E_shadow;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
@ -32,8 +31,6 @@ struct SubtractorOutput {
e_shadow.fill(0.f);
E_main.re.fill(0.f);
E_main.im.fill(0.f);
E_shadow.re.fill(0.f);
E_shadow.im.fill(0.f);
E2_main.fill(0.f);
E2_shadow.fill(0.f);
}

View File

@ -10,8 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/subtractor.h"
// TODO(peah): Reactivate once the next CL has landed.
#if 0
#include <algorithm>
#include <numeric>
#include <string>
@ -30,17 +28,15 @@ float RunSubtractorTest(int num_blocks_to_process,
const std::vector<int>& blocks_with_echo_path_changes) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
std::vector<float> x(kBlockSize, 0.f);
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
std::array<float, kBlockSize> x_old;
SubtractorOutput output;
FftBuffer X_buffer(
Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
Random random_generator(42U);
Aec3Fft fft;
FftData X;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
@ -52,15 +48,14 @@ float RunSubtractorTest(int num_blocks_to_process,
DelayBuffer<float> delay_buffer(delay_samples);
for (int k = 0; k < num_blocks_to_process; ++k) {
RandomizeSampleVector(&random_generator, x);
RandomizeSampleVector(&random_generator, x[0]);
if (uncorrelated_inputs) {
RandomizeSampleVector(&random_generator, y);
} else {
delay_buffer.Delay(x, y);
delay_buffer.Delay(x[0], y);
}
fft.PaddedFft(x, x_old, &X);
X_buffer.Insert(X);
render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
render_buffer.Insert(x);
render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
// Handle echo path changes.
if (std::find(blocks_with_echo_path_changes.begin(),
@ -68,12 +63,13 @@ float RunSubtractorTest(int num_blocks_to_process,
k) != blocks_with_echo_path_changes.end()) {
subtractor.HandleEchoPathChange(EchoPathVariability(true, true));
}
subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output);
subtractor.Process(render_buffer, y, render_signal_analyzer, aec_state,
&output);
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(subtractor.FilterFrequencyResponse(),
rtc::Optional<size_t>(delay_samples / kBlockSize),
X_buffer, E2_main, E2_shadow, Y2, x,
EchoPathVariability(false, false), false);
render_buffer, E2_main, Y2, x[0], false);
}
const float output_power = std::inner_product(
@ -107,31 +103,29 @@ TEST(Subtractor, NullDataDumper) {
TEST(Subtractor, DISABLED_NullOutput) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
FftBuffer X_buffer(
Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
std::vector<float> y(kBlockSize, 0.f);
EXPECT_DEATH(
subtractor.Process(X_buffer, y, render_signal_analyzer, false, nullptr),
"");
EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer,
AecState(), nullptr),
"");
}
// Verifies the check for the capture signal size.
TEST(Subtractor, WrongCaptureSize) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
FftBuffer X_buffer(
Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
std::vector<float> y(kBlockSize - 1, 0.f);
SubtractorOutput output;
EXPECT_DEATH(
subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output),
"");
EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer,
AecState(), &output),
"");
}
#endif
@ -175,5 +169,3 @@ TEST(Subtractor, EchoPathChangeReset) {
}
} // namespace webrtc
#endif

View File

@ -74,6 +74,7 @@ void SuppressionFilter::ApplyGain(
const FftData& comfort_noise,
const FftData& comfort_noise_high_band,
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
float high_bands_gain,
std::vector<std::vector<float>>* e) {
RTC_DCHECK(e);
RTC_DCHECK_EQ(e->size(), NumBandsForRate(sample_rate_hz_));
@ -138,11 +139,7 @@ void SuppressionFilter::ApplyGain(
fft_.Ifft(E, &time_domain_high_band_noise);
// Scale and apply the noise to the signals.
RTC_DCHECK_LT(3, suppression_gain.size());
float high_bands_gain = *std::min_element(suppression_gain.begin() + 32,
suppression_gain.end());
float high_bands_noise_scaling =
const float high_bands_noise_scaling =
0.4f * std::max(1.f - high_bands_gain, 0.f);
std::transform(

View File

@ -27,6 +27,7 @@ class SuppressionFilter {
void ApplyGain(const FftData& comfort_noise,
const FftData& comfort_noise_high_bands,
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
float high_bands_gain,
std::vector<std::vector<float>>* e);
private:

View File

@ -44,8 +44,9 @@ TEST(SuppressionFilter, NullOutput) {
FftData cn_high_bands;
std::array<float, kFftLengthBy2Plus1> gain;
EXPECT_DEATH(
SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, nullptr), "");
EXPECT_DEATH(SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, 1.0f,
nullptr),
"");
}
// Verifies the check for allowed sample rate.
@ -70,7 +71,7 @@ TEST(SuppressionFilter, ComfortNoiseInUnityGain) {
std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f));
std::vector<std::vector<float>> e_ref = e;
filter.ApplyGain(cn, cn_high_bands, gain, &e);
filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
for (size_t k = 0; k < e.size(); ++k) {
EXPECT_EQ(e_ref[k], e[k]);
@ -102,7 +103,7 @@ TEST(SuppressionFilter, SignalSuppression) {
e[0]);
e0_input =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input);
filter.ApplyGain(cn, cn_high_bands, gain, &e);
filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
e0_output =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output);
}
@ -136,7 +137,7 @@ TEST(SuppressionFilter, SignalTransparency) {
e[0]);
e0_input =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input);
filter.ApplyGain(cn, cn_high_bands, gain, &e);
filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
e0_output =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output);
}
@ -166,7 +167,7 @@ TEST(SuppressionFilter, Delay) {
}
}
filter.ApplyGain(cn, cn_high_bands, gain, &e);
filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
if (k > 2) {
for (size_t j = 0; j < 2; ++j) {
for (size_t i = 0; i < kBlockSize; ++i) {

View File

@ -17,6 +17,7 @@
#include <math.h>
#include <algorithm>
#include <functional>
#include <numeric>
#include "webrtc/base/checks.h"
@ -33,9 +34,9 @@ void GainPostProcessing(std::array<float, kFftLengthBy2Plus1>* gain_squared) {
// filter on the upper-frequency gains influencing the overall achieved
// gain. TODO(peah): Update this when new anti-aliasing filters are
// implemented.
constexpr size_t kAntiAliasingImpactLimit = 64 * 0.7f;
constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000;
std::for_each(gain_squared->begin() + kAntiAliasingImpactLimit,
gain_squared->end(),
gain_squared->end() - 1,
[gain_squared, kAntiAliasingImpactLimit](float& a) {
a = std::min(a, (*gain_squared)[kAntiAliasingImpactLimit]);
});
@ -43,8 +44,8 @@ void GainPostProcessing(std::array<float, kFftLengthBy2Plus1>* gain_squared) {
}
constexpr int kNumIterations = 2;
constexpr float kEchoMaskingMargin = 1.f / 10.f;
constexpr float kBandMaskingFactor = 1.f / 2.f;
constexpr float kEchoMaskingMargin = 1.f / 20.f;
constexpr float kBandMaskingFactor = 1.f / 10.f;
constexpr float kTimeMaskingFactor = 1.f / 10.f;
} // namespace
@ -137,8 +138,8 @@ void ComputeGains_SSE2(
std::transform(gain_squared->begin() + 1, gain_squared->end() - 1,
previous_gain_squared->begin(), gain_squared->begin() + 1,
[](float a, float b) {
return b < 0.0001f ? std::min(a, 0.0001f)
: std::min(a, b * 2.f);
return b < 0.001f ? std::min(a, 0.001f)
: std::min(a, b * 2.f);
});
// Process the gains to avoid artefacts caused by gain realization in the
@ -249,8 +250,8 @@ void ComputeGains(
std::transform(gain_squared->begin() + 1, gain_squared->end() - 1,
previous_gain_squared->begin(), gain_squared->begin() + 1,
[](float a, float b) {
return b < 0.0001f ? std::min(a, 0.0001f)
: std::min(a, b * 2.f);
return b < 0.001f ? std::min(a, 0.001f)
: std::min(a, b * 2.f);
});
// Process the gains to avoid artefacts caused by gain realization in the
@ -274,6 +275,43 @@ void ComputeGains(
} // namespace aec3
// Computes an upper bound on the gain to apply for high frequencies.
float HighFrequencyGainBound(bool saturated_echo,
const std::vector<std::vector<float>>& render) {
if (render.size() == 1) {
return 1.f;
}
// Always attenuate the upper bands when there is saturated echo.
if (saturated_echo) {
return 0.001f;
}
// Compute the upper and lower band energies.
float low_band_energy =
std::accumulate(render[0].begin(), render[0].end(), 0.f,
[](float a, float b) -> float { return a + b * b; });
float high_band_energies = 0.f;
for (size_t k = 1; k < render.size(); ++k) {
high_band_energies = std::max(
high_band_energies,
std::accumulate(render[k].begin(), render[k].end(), 0.f,
[](float a, float b) -> float { return a + b * b; }));
}
// If there is more power in the lower frequencies than the upper frequencies,
// or if the power in upper frequencies is low, do not bound the gain in the
// upper bands.
if (high_band_energies < low_band_energy ||
high_band_energies < kSubBlockSize * 10.f * 10.f) {
return 1.f;
}
// In all other cases, bound the gain for upper frequencies.
RTC_DCHECK_LE(low_band_energy, high_band_energies);
return 0.01f * sqrtf(low_band_energy / high_band_energies);
}
SuppressionGain::SuppressionGain(Aec3Optimization optimization)
: optimization_(optimization) {
previous_gain_squared_.fill(1.f);
@ -284,21 +322,41 @@ void SuppressionGain::GetGain(
const std::array<float, kFftLengthBy2Plus1>& nearend_power,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_power,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_power,
float strong_nearend_margin,
std::array<float, kFftLengthBy2Plus1>* gain) {
RTC_DCHECK(gain);
bool saturated_echo,
const std::vector<std::vector<float>>& render,
size_t num_capture_bands,
float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
RTC_DCHECK(high_bands_gain);
RTC_DCHECK(low_band_gain);
// Choose margin to use.
const float margin = saturated_echo ? 0.001f : 0.01f;
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
aec3::ComputeGains_SSE2(nearend_power, residual_echo_power,
comfort_noise_power, strong_nearend_margin,
&previous_gain_squared_, &previous_masker_, gain);
aec3::ComputeGains_SSE2(
nearend_power, residual_echo_power, comfort_noise_power, margin,
&previous_gain_squared_, &previous_masker_, low_band_gain);
break;
#endif
default:
aec3::ComputeGains(nearend_power, residual_echo_power,
comfort_noise_power, strong_nearend_margin,
&previous_gain_squared_, &previous_masker_, gain);
comfort_noise_power, margin, &previous_gain_squared_,
&previous_masker_, low_band_gain);
}
if (num_capture_bands > 1) {
// Compute the gain for upper frequencies.
const float min_high_band_gain =
HighFrequencyGainBound(saturated_echo, render);
*high_bands_gain =
*std::min_element(low_band_gain->begin() + 32, low_band_gain->end());
*high_bands_gain = std::min(*high_bands_gain, min_high_band_gain);
} else {
*high_bands_gain = 1.f;
}
}

View File

@ -12,6 +12,7 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_
#include <array>
#include <vector>
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
@ -48,8 +49,11 @@ class SuppressionGain {
void GetGain(const std::array<float, kFftLengthBy2Plus1>& nearend_power,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_power,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_power,
float strong_nearend_margin,
std::array<float, kFftLengthBy2Plus1>* gain);
bool saturated_echo,
const std::vector<std::vector<float>>& render,
size_t num_capture_bands,
float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain);
private:
const Aec3Optimization optimization_;

View File

@ -25,9 +25,16 @@ TEST(SuppressionGain, NullOutputGains) {
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
EXPECT_DEATH(
SuppressionGain(DetectOptimization()).GetGain(E2, R2, N2, 0.1f, nullptr),
"");
E2.fill(0.f);
R2.fill(0.f);
N2.fill(0.f);
float high_bands_gain;
EXPECT_DEATH(SuppressionGain(DetectOptimization())
.GetGain(E2, R2, N2, false,
std::vector<std::vector<float>>(
3, std::vector<float>(kBlockSize, 0.f)),
1, &high_bands_gain, nullptr),
"");
}
#endif
@ -109,17 +116,19 @@ TEST(SuppressionGain, TestOptimizations) {
// Does a sanity check that the gains are correctly computed.
TEST(SuppressionGain, BasicGainComputation) {
SuppressionGain suppression_gain(DetectOptimization());
float high_bands_gain;
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
std::array<float, kFftLengthBy2Plus1> g;
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
// Ensure that a strong noise is detected to mask any echoes.
E2.fill(10.f);
R2.fill(0.1f);
N2.fill(100.f);
for (int k = 0; k < 10; ++k) {
suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(1.f, a, 0.001); });
@ -129,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) {
R2.fill(0.1f);
N2.fill(0.f);
for (int k = 0; k < 10; ++k) {
suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(1.f, a, 0.001); });
@ -139,7 +148,7 @@ TEST(SuppressionGain, BasicGainComputation) {
R2.fill(100.f);
N2.fill(0.f);
for (int k = 0; k < 10; ++k) {
suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(0.f, a, 0.001); });