Transparency improvements in the echo canceller 3
This CL adds two changes: -Adaptive adjustment of the echo suppression to both cover the cases when the echo path well covers the room, and when when it does not. -Identification of the case when the echo is too low to be audible and adaptive handling of this case in the echo suppression. BUG=webrtc:7519, webrtc:7956,webrtc:7957 Review-Url: https://codereview.webrtc.org/2974583004 Cr-Commit-Position: refs/heads/master@{#18962}
This commit is contained in:
parent
863f03ba38
commit
2910357621
@ -25,22 +25,6 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Constrains the a partiton of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero.
|
||||
void Constrain(const Aec3Fft& fft, FftData* H) {
|
||||
std::array<float, kFftLength> h;
|
||||
fft.Ifft(*H, &h);
|
||||
constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[kScale](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
fft.Fft(&h, H);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the frequency response of the filter.
|
||||
@ -434,6 +418,7 @@ AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions,
|
||||
H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) {
|
||||
RTC_DCHECK(data_dumper_);
|
||||
|
||||
h_.fill(0.f);
|
||||
for (auto& H_j : H_) {
|
||||
H_j.Clear();
|
||||
}
|
||||
@ -446,6 +431,7 @@ AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions,
|
||||
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
|
||||
|
||||
void AdaptiveFirFilter::HandleEchoPathChange() {
|
||||
h_.fill(0.f);
|
||||
for (auto& H_j : H_) {
|
||||
H_j.Clear();
|
||||
}
|
||||
@ -493,10 +479,7 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
}
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
Constrain(fft_, &H_[partition_to_constrain_]);
|
||||
partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
Constrain();
|
||||
|
||||
// Update the frequency response and echo return loss for the filter.
|
||||
switch (optimization_) {
|
||||
@ -518,4 +501,25 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
// Constrains the a partiton of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero.
|
||||
void AdaptiveFirFilter::Constrain() {
|
||||
std::array<float, kFftLength> h;
|
||||
fft_.Ifft(H_[partition_to_constrain_], &h);
|
||||
|
||||
constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[kScale](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
|
||||
std::copy(h.begin(), h.begin() + kFftLengthBy2,
|
||||
h_.begin() + partition_to_constrain_ * kFftLengthBy2);
|
||||
|
||||
fft_.Fft(&h, &H_[partition_to_constrain_]);
|
||||
|
||||
partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -119,6 +119,12 @@ class AdaptiveFirFilter {
|
||||
return H2_;
|
||||
}
|
||||
|
||||
// Returns the estimate of the impulse response.
|
||||
const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
FilterImpulseResponse() const {
|
||||
return h_;
|
||||
}
|
||||
|
||||
void DumpFilter(const char* name) {
|
||||
for (auto& H : H_) {
|
||||
data_dumper_->DumpRaw(name, H.re);
|
||||
@ -127,11 +133,15 @@ class AdaptiveFirFilter {
|
||||
}
|
||||
|
||||
private:
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
void Constrain();
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const Aec3Fft fft_;
|
||||
const Aec3Optimization optimization_;
|
||||
std::vector<FftData> H_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> H2_;
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> h_;
|
||||
std::array<float, kFftLengthBy2Plus1> erl_;
|
||||
size_t partition_to_constrain_ = 0;
|
||||
|
||||
|
||||
@ -308,7 +308,8 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
|
||||
AecState aec_state(0.f);
|
||||
RenderSignalAnalyzer render_signal_analyzer;
|
||||
std::vector<float> e(kBlockSize, 0.f);
|
||||
std::array<float, kFftLength> s;
|
||||
std::array<float, kFftLength> s_scratch;
|
||||
std::array<float, kBlockSize> s;
|
||||
FftData S;
|
||||
FftData G;
|
||||
FftData E;
|
||||
@ -348,20 +349,24 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
|
||||
render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
|
||||
|
||||
filter.Filter(render_buffer, &S);
|
||||
fft.Ifft(S, &s);
|
||||
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(),
|
||||
fft.Ifft(S, &s_scratch);
|
||||
std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
|
||||
e.begin(),
|
||||
[&](float a, float b) { return a - b * kScale; });
|
||||
std::for_each(e.begin(), e.end(),
|
||||
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
|
||||
fft.ZeroPaddedFft(e, &E);
|
||||
for (size_t k = 0; k < kBlockSize; ++k) {
|
||||
s[k] = kScale * s_scratch[k + kFftLengthBy2];
|
||||
}
|
||||
|
||||
gain.Compute(render_buffer, render_signal_analyzer, E,
|
||||
filter.SizePartitions(), false, &G);
|
||||
filter.Adapt(render_buffer, G);
|
||||
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
|
||||
aec_state.Update(filter.FilterFrequencyResponse(),
|
||||
rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
|
||||
x[0], false);
|
||||
filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
|
||||
render_buffer, E2_main, Y2, x[0], s, false);
|
||||
}
|
||||
// Verify that the filter is able to perform well.
|
||||
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
|
||||
|
||||
@ -33,14 +33,16 @@ constexpr int kMetricsComputationBlocks = 9;
|
||||
constexpr int kMetricsCollectionBlocks =
|
||||
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
|
||||
|
||||
constexpr int kAdaptiveFilterLength = 12;
|
||||
constexpr int kResidualEchoPowerRenderWindowSize = 30;
|
||||
|
||||
constexpr size_t kFftLengthBy2 = 64;
|
||||
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
|
||||
constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
|
||||
constexpr size_t kFftLength = 2 * kFftLengthBy2;
|
||||
|
||||
constexpr int kAdaptiveFilterLength = 12;
|
||||
constexpr int kResidualEchoPowerRenderWindowSize = 30;
|
||||
constexpr int kAdaptiveFilterTimeDomainLength =
|
||||
kAdaptiveFilterLength * kFftLengthBy2;
|
||||
|
||||
constexpr size_t kMaxNumBands = 3;
|
||||
constexpr size_t kSubFrameLength = 80;
|
||||
|
||||
|
||||
@ -78,11 +78,11 @@ constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;
|
||||
|
||||
int AecState::instance_count_ = 0;
|
||||
|
||||
AecState::AecState(float echo_decay)
|
||||
AecState::AecState(float reverb_decay)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
echo_path_change_counter_(kEchoPathChangeCounterInitial),
|
||||
echo_decay_factor_(echo_decay) {}
|
||||
reverb_decay_(reverb_decay) {}
|
||||
|
||||
AecState::~AecState() = default;
|
||||
|
||||
@ -111,12 +111,18 @@ void AecState::HandleEchoPathChange(
|
||||
|
||||
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
adaptive_filter_frequency_response,
|
||||
const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
adaptive_filter_impulse_response,
|
||||
const rtc::Optional<size_t>& external_delay_samples,
|
||||
const RenderBuffer& render_buffer,
|
||||
const std::array<float, kFftLengthBy2Plus1>& E2_main,
|
||||
const std::array<float, kFftLengthBy2Plus1>& Y2,
|
||||
rtc::ArrayView<const float> x,
|
||||
const std::array<float, kBlockSize>& s,
|
||||
bool echo_leakage_detected) {
|
||||
// Update the echo audibility evaluator.
|
||||
echo_audibility_.Update(x, s);
|
||||
|
||||
// Store input parameters.
|
||||
echo_leakage_detected_ = echo_leakage_detected;
|
||||
|
||||
@ -179,6 +185,126 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
!external_delay_ && !filter_delay_ &&
|
||||
(!render_received_ ||
|
||||
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
|
||||
|
||||
// Update the room reverb estimate.
|
||||
UpdateReverb(adaptive_filter_impulse_response);
|
||||
}
|
||||
|
||||
void AecState::UpdateReverb(
|
||||
const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
impulse_response) {
|
||||
if ((!(filter_delay_ && usable_linear_estimate_)) ||
|
||||
(*filter_delay_ > kAdaptiveFilterLength - 4)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Form the data to match against by squaring the impulse response
|
||||
// coefficients.
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;
|
||||
std::transform(impulse_response.begin(), impulse_response.end(),
|
||||
matching_data.begin(), [](float a) { return a * a; });
|
||||
|
||||
// Avoid matching against noise in the model by subtracting an estimate of the
|
||||
// model noise power.
|
||||
constexpr size_t kTailLength = 64;
|
||||
constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;
|
||||
const float tail_power = *std::max_element(matching_data.begin() + tail_index,
|
||||
matching_data.end());
|
||||
std::for_each(matching_data.begin(), matching_data.begin() + tail_index,
|
||||
[tail_power](float& a) { a = std::max(0.f, a - tail_power); });
|
||||
|
||||
// Identify the peak index of the impulse response.
|
||||
const size_t peak_index = *std::max_element(
|
||||
matching_data.begin(), matching_data.begin() + tail_index);
|
||||
|
||||
if (peak_index + 128 < tail_index) {
|
||||
size_t start_index = peak_index + 64;
|
||||
// Compute the matching residual error for the current candidate to match.
|
||||
float residual_sqr_sum = 0.f;
|
||||
float d_k = reverb_decay_to_test_;
|
||||
for (size_t k = start_index; k < tail_index; ++k) {
|
||||
if (matching_data[start_index + 1] == 0.f) {
|
||||
break;
|
||||
}
|
||||
|
||||
float residual = matching_data[k] - matching_data[peak_index] * d_k;
|
||||
residual_sqr_sum += residual * residual;
|
||||
d_k *= reverb_decay_to_test_;
|
||||
}
|
||||
|
||||
// If needed, update the best candidate for the reverb decay.
|
||||
if (reverb_decay_candidate_residual_ < 0.f ||
|
||||
residual_sqr_sum < reverb_decay_candidate_residual_) {
|
||||
reverb_decay_candidate_residual_ = residual_sqr_sum;
|
||||
reverb_decay_candidate_ = reverb_decay_to_test_;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the next reverb candidate to evaluate such that all candidates will
|
||||
// be evaluated within one second.
|
||||
reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);
|
||||
|
||||
// If all reverb candidates have been evaluated, choose the best one as the
|
||||
// reverb decay.
|
||||
if (reverb_decay_to_test_ >= 0.9965f) {
|
||||
if (reverb_decay_candidate_residual_ < 0.f) {
|
||||
// Transform the decay to be in the unit of blocks.
|
||||
reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2);
|
||||
|
||||
// Limit the estimated reverb_decay_ to the maximum one needed in practice
|
||||
// to minimize the impact of incorrect estimates.
|
||||
reverb_decay_ = std::min(0.8f, reverb_decay_);
|
||||
}
|
||||
reverb_decay_to_test_ = 0.9f;
|
||||
reverb_decay_candidate_residual_ = -1.f;
|
||||
}
|
||||
|
||||
// For noisy impulse responses, assume a fixed tail length.
|
||||
if (tail_power > 0.0005f) {
|
||||
reverb_decay_ = 0.7f;
|
||||
}
|
||||
data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);
|
||||
data_dumper_->DumpRaw("aec3_tail_power", tail_power);
|
||||
}
|
||||
|
||||
void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
|
||||
const std::array<float, kBlockSize>& s) {
|
||||
auto result_x = std::minmax_element(x.begin(), x.end());
|
||||
auto result_s = std::minmax_element(s.begin(), s.end());
|
||||
const float x_abs =
|
||||
std::max(std::abs(*result_x.first), std::abs(*result_x.second));
|
||||
const float s_abs =
|
||||
std::max(std::abs(*result_s.first), std::abs(*result_s.second));
|
||||
|
||||
if (x_abs < 5.f) {
|
||||
++low_farend_counter_;
|
||||
} else {
|
||||
low_farend_counter_ = 0;
|
||||
}
|
||||
|
||||
// The echo is deemed as not audible if the echo estimate is on the level of
|
||||
// the quantization noise in the FFTs and the nearend level is sufficiently
|
||||
// strong to mask that by ensuring that the playout and AGC gains do not boost
|
||||
// any residual echo that is below the quantization noise level. Furthermore,
|
||||
// cases where the render signal is very close to zero are also identified as
|
||||
// not producing audible echo.
|
||||
inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
|
||||
inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
|
||||
}
|
||||
|
||||
void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {
|
||||
const float e_max = *std::max_element(e.begin(), e.end());
|
||||
const float e_min = *std::min_element(e.begin(), e.end());
|
||||
const float e_abs = std::max(std::abs(e_max), std::abs(e_min));
|
||||
|
||||
if (max_nearend_ < e_abs) {
|
||||
max_nearend_ = e_abs;
|
||||
max_nearend_counter_ = 0;
|
||||
} else {
|
||||
if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {
|
||||
max_nearend_ *= 0.995f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -31,7 +31,7 @@ class ApmDataDumper;
|
||||
// Handles the state and the conditions for the echo removal functionality.
|
||||
class AecState {
|
||||
public:
|
||||
explicit AecState(float echo_decay);
|
||||
explicit AecState(float reverb_decay);
|
||||
~AecState();
|
||||
|
||||
// Returns whether the linear filter estimate is usable.
|
||||
@ -78,23 +78,50 @@ class AecState {
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Returns the decay factor for the echo reverberation.
|
||||
// TODO(peah): Make this adaptive.
|
||||
float ReverbDecayFactor() const { return echo_decay_factor_; }
|
||||
float ReverbDecay() const { return reverb_decay_; }
|
||||
|
||||
// Returns whether the echo suppression gain should be forced to zero.
|
||||
bool ForcedZeroGain() const { return force_zero_gain_; }
|
||||
|
||||
// Returns whether the echo in the capture signal is audible.
|
||||
bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); }
|
||||
|
||||
// Updates the aec state with the AEC output signal.
|
||||
void UpdateWithOutput(rtc::ArrayView<const float> e) {
|
||||
echo_audibility_.UpdateWithOutput(e);
|
||||
}
|
||||
|
||||
// Updates the aec state.
|
||||
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
adaptive_filter_frequency_response,
|
||||
const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
adaptive_filter_impulse_response,
|
||||
const rtc::Optional<size_t>& external_delay_samples,
|
||||
const RenderBuffer& render_buffer,
|
||||
const std::array<float, kFftLengthBy2Plus1>& E2_main,
|
||||
const std::array<float, kFftLengthBy2Plus1>& Y2,
|
||||
rtc::ArrayView<const float> x,
|
||||
const std::array<float, kBlockSize>& s_main,
|
||||
bool echo_leakage_detected);
|
||||
|
||||
private:
|
||||
class EchoAudibility {
|
||||
public:
|
||||
void Update(rtc::ArrayView<const float> x,
|
||||
const std::array<float, kBlockSize>& s);
|
||||
void UpdateWithOutput(rtc::ArrayView<const float> e);
|
||||
bool InaudibleEcho() const { return inaudible_echo_; }
|
||||
|
||||
private:
|
||||
float max_nearend_ = 0.f;
|
||||
size_t max_nearend_counter_ = 0;
|
||||
size_t low_farend_counter_ = 0;
|
||||
bool inaudible_echo_ = false;
|
||||
};
|
||||
|
||||
void UpdateReverb(const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
impulse_response);
|
||||
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
ErlEstimator erl_estimator_;
|
||||
@ -113,7 +140,12 @@ class AecState {
|
||||
rtc::Optional<size_t> filter_delay_;
|
||||
rtc::Optional<size_t> external_delay_;
|
||||
size_t blocks_since_last_saturation_ = 1000;
|
||||
const float echo_decay_factor_;
|
||||
float reverb_decay_;
|
||||
float reverb_decay_to_test_ = 0.9f;
|
||||
float reverb_decay_candidate_ = 0.f;
|
||||
float reverb_decay_candidate_residual_ = -1.f;
|
||||
EchoAudibility echo_audibility_;
|
||||
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AecState);
|
||||
};
|
||||
|
||||
|
||||
@ -25,6 +25,8 @@ TEST(AecState, NormalUsage) {
|
||||
std::array<float, kFftLengthBy2Plus1> Y2 = {};
|
||||
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
|
||||
EchoPathVariability echo_path_variability(false, false);
|
||||
std::array<float, kBlockSize> s;
|
||||
s.fill(100.f);
|
||||
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>
|
||||
converged_filter_frequency_response(10);
|
||||
@ -36,47 +38,57 @@ TEST(AecState, NormalUsage) {
|
||||
converged_filter_frequency_response[2].fill(100.f);
|
||||
converged_filter_frequency_response[2][0] = 1.f;
|
||||
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
|
||||
impulse_response.fill(0.f);
|
||||
|
||||
// Verify that linear AEC usability is false when the filter is diverged and
|
||||
// there is no external delay reported.
|
||||
state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(diverged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
EXPECT_FALSE(state.UsableLinearEstimate());
|
||||
|
||||
// Verify that linear AEC usability is true when the filter is converged
|
||||
std::fill(x[0].begin(), x[0].end(), 101.f);
|
||||
for (int k = 0; k < 3000; ++k) {
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
}
|
||||
EXPECT_TRUE(state.UsableLinearEstimate());
|
||||
|
||||
// Verify that linear AEC usability becomes false after an echo path change is
|
||||
// reported
|
||||
state.HandleEchoPathChange(EchoPathVariability(true, false));
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
EXPECT_FALSE(state.UsableLinearEstimate());
|
||||
|
||||
// Verify that the active render detection works as intended.
|
||||
std::fill(x[0].begin(), x[0].end(), 101.f);
|
||||
state.HandleEchoPathChange(EchoPathVariability(true, true));
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
EXPECT_FALSE(state.ActiveRender());
|
||||
|
||||
for (int k = 0; k < 1000; ++k) {
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
}
|
||||
EXPECT_TRUE(state.ActiveRender());
|
||||
|
||||
// Verify that echo leakage is properly reported.
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
EXPECT_FALSE(state.EchoLeakageDetected());
|
||||
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], true);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
true);
|
||||
EXPECT_TRUE(state.EchoLeakageDetected());
|
||||
|
||||
// Verify that the ERL is properly estimated
|
||||
@ -91,8 +103,9 @@ TEST(AecState, NormalUsage) {
|
||||
|
||||
Y2.fill(10.f * 10000.f * 10000.f);
|
||||
for (size_t k = 0; k < 1000; ++k) {
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(state.UsableLinearEstimate());
|
||||
@ -107,8 +120,9 @@ TEST(AecState, NormalUsage) {
|
||||
E2_main.fill(1.f * 10000.f * 10000.f);
|
||||
Y2.fill(10.f * E2_main[0]);
|
||||
for (size_t k = 0; k < 1000; ++k) {
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
}
|
||||
ASSERT_TRUE(state.UsableLinearEstimate());
|
||||
{
|
||||
@ -127,8 +141,9 @@ TEST(AecState, NormalUsage) {
|
||||
E2_main.fill(1.f * 10000.f * 10000.f);
|
||||
Y2.fill(5.f * E2_main[0]);
|
||||
for (size_t k = 0; k < 1000; ++k) {
|
||||
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
state.Update(converged_filter_frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
|
||||
false);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(state.UsableLinearEstimate());
|
||||
@ -155,6 +170,8 @@ TEST(AecState, NonSignificantDelay) {
|
||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
||||
std::array<float, kBlockSize> x;
|
||||
EchoPathVariability echo_path_variability(false, false);
|
||||
std::array<float, kBlockSize> s;
|
||||
s.fill(100.f);
|
||||
x.fill(0.f);
|
||||
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
|
||||
@ -162,10 +179,13 @@ TEST(AecState, NonSignificantDelay) {
|
||||
v.fill(0.01f);
|
||||
}
|
||||
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
|
||||
impulse_response.fill(0.f);
|
||||
|
||||
// Verify that a non-significant filter delay is identified correctly.
|
||||
state.HandleEchoPathChange(echo_path_variability);
|
||||
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
|
||||
E2_main, Y2, x, false);
|
||||
state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
|
||||
render_buffer, E2_main, Y2, x, s, false);
|
||||
EXPECT_FALSE(state.FilterDelay());
|
||||
}
|
||||
|
||||
@ -179,11 +199,16 @@ TEST(AecState, ConvergedFilterDelay) {
|
||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
||||
std::array<float, kBlockSize> x;
|
||||
EchoPathVariability echo_path_variability(false, false);
|
||||
std::array<float, kBlockSize> s;
|
||||
s.fill(100.f);
|
||||
x.fill(0.f);
|
||||
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(
|
||||
kFilterLength);
|
||||
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
|
||||
impulse_response.fill(0.f);
|
||||
|
||||
// Verify that the filter delay for a converged filter is properly identified.
|
||||
for (int k = 0; k < kFilterLength; ++k) {
|
||||
for (auto& v : frequency_response) {
|
||||
@ -192,8 +217,8 @@ TEST(AecState, ConvergedFilterDelay) {
|
||||
frequency_response[k].fill(100.f);
|
||||
frequency_response[k][0] = 0.f;
|
||||
state.HandleEchoPathChange(echo_path_variability);
|
||||
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
|
||||
E2_main, Y2, x, false);
|
||||
state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
|
||||
render_buffer, E2_main, Y2, x, s, false);
|
||||
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
|
||||
if (k != (kFilterLength - 1)) {
|
||||
EXPECT_EQ(k, state.FilterDelay());
|
||||
@ -208,6 +233,8 @@ TEST(AecState, ExternalDelay) {
|
||||
std::array<float, kFftLengthBy2Plus1> E2_shadow;
|
||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
||||
std::array<float, kBlockSize> x;
|
||||
std::array<float, kBlockSize> s;
|
||||
s.fill(100.f);
|
||||
E2_main.fill(0.f);
|
||||
E2_shadow.fill(0.f);
|
||||
Y2.fill(0.f);
|
||||
@ -219,10 +246,14 @@ TEST(AecState, ExternalDelay) {
|
||||
v.fill(0.01f);
|
||||
}
|
||||
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
|
||||
impulse_response.fill(0.f);
|
||||
|
||||
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
|
||||
state.HandleEchoPathChange(EchoPathVariability(false, false));
|
||||
state.Update(frequency_response, rtc::Optional<size_t>(k * kBlockSize + 5),
|
||||
render_buffer, E2_main, Y2, x, false);
|
||||
state.Update(frequency_response, impulse_response,
|
||||
rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
|
||||
E2_main, Y2, x, s, false);
|
||||
EXPECT_TRUE(state.ExternalDelay());
|
||||
EXPECT_EQ(k, state.ExternalDelay());
|
||||
}
|
||||
@ -230,8 +261,8 @@ TEST(AecState, ExternalDelay) {
|
||||
// Verify that the externally reported delay is properly unset when it is no
|
||||
// longer present.
|
||||
state.HandleEchoPathChange(EchoPathVariability(false, false));
|
||||
state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
|
||||
E2_main, Y2, x, false);
|
||||
state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
|
||||
render_buffer, E2_main, Y2, x, s, false);
|
||||
EXPECT_FALSE(state.ExternalDelay());
|
||||
}
|
||||
|
||||
|
||||
@ -131,6 +131,8 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
LowestBandRate(sample_rate_hz_), 1);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],
|
||||
LowestBandRate(sample_rate_hz_), 1);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_capture_input", y0);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_render_input", x0);
|
||||
|
||||
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
|
||||
|
||||
@ -167,13 +169,15 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
|
||||
// Update the AEC state information.
|
||||
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
|
||||
subtractor_.FilterImpulseResponse(),
|
||||
echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
|
||||
echo_leakage_detected_);
|
||||
subtractor_output.s_main, echo_leakage_detected_);
|
||||
|
||||
// Choose the linear output.
|
||||
output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
|
||||
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
|
||||
LowestBandRate(sample_rate_hz_), 1);
|
||||
data_dumper_->DumpRaw("aec3_output_linear", y0);
|
||||
const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
|
||||
|
||||
// Estimate the residual echo power.
|
||||
@ -194,7 +198,14 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
// Update the metrics.
|
||||
metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);
|
||||
|
||||
// Update the aec state with the aec output characteristics.
|
||||
aec_state_.UpdateWithOutput(y0);
|
||||
|
||||
// Debug outputs for the purpose of development and analysis.
|
||||
data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
|
||||
&subtractor_output.s_main[0],
|
||||
LowestBandRate(sample_rate_hz_), 1);
|
||||
data_dumper_->DumpRaw("aec3_output", y0);
|
||||
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum());
|
||||
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
|
||||
data_dumper_->DumpWav("aec3_output",
|
||||
|
||||
@ -55,7 +55,8 @@ void RunFilterUpdateTest(int num_blocks_to_process,
|
||||
std::vector<float> y(kBlockSize, 0.f);
|
||||
AecState aec_state(0.f);
|
||||
RenderSignalAnalyzer render_signal_analyzer;
|
||||
std::array<float, kFftLength> s;
|
||||
std::array<float, kFftLength> s_scratch;
|
||||
std::array<float, kBlockSize> s;
|
||||
FftData S;
|
||||
FftData G;
|
||||
SubtractorOutput output;
|
||||
@ -96,18 +97,21 @@ void RunFilterUpdateTest(int num_blocks_to_process,
|
||||
|
||||
// Apply the main filter.
|
||||
main_filter.Filter(render_buffer, &S);
|
||||
fft.Ifft(S, &s);
|
||||
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
|
||||
fft.Ifft(S, &s_scratch);
|
||||
std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
|
||||
e_main.begin(),
|
||||
[&](float a, float b) { return a - b * kScale; });
|
||||
std::for_each(e_main.begin(), e_main.end(),
|
||||
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
|
||||
fft.ZeroPaddedFft(e_main, &E_main);
|
||||
for (size_t k = 0; k < kBlockSize; ++k) {
|
||||
s[k] = kScale * s_scratch[k + kFftLengthBy2];
|
||||
}
|
||||
|
||||
// Apply the shadow filter.
|
||||
shadow_filter.Filter(render_buffer, &S);
|
||||
fft.Ifft(S, &s);
|
||||
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
|
||||
fft.Ifft(S, &s_scratch);
|
||||
std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
|
||||
e_shadow.begin(),
|
||||
[&](float a, float b) { return a - b * kScale; });
|
||||
std::for_each(e_shadow.begin(), e_shadow.end(),
|
||||
@ -131,8 +135,9 @@ void RunFilterUpdateTest(int num_blocks_to_process,
|
||||
// Update the delay.
|
||||
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
|
||||
aec_state.Update(main_filter.FilterFrequencyResponse(),
|
||||
main_filter.FilterImpulseResponse(),
|
||||
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
|
||||
false);
|
||||
s, false);
|
||||
}
|
||||
|
||||
std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
|
||||
|
||||
@ -111,7 +111,7 @@ void ResidualEchoEstimator::Estimate(
|
||||
const int filter_delay = *aec_state.FilterDelay();
|
||||
LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
|
||||
AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
|
||||
aec_state.ReverbDecayFactor(), R2);
|
||||
aec_state.ReverbDecay(), R2);
|
||||
} else {
|
||||
// Estimate the echo generating signal power.
|
||||
std::array<float, kFftLengthBy2Plus1> X2;
|
||||
@ -142,7 +142,12 @@ void ResidualEchoEstimator::Estimate(
|
||||
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
|
||||
std::min(static_cast<size_t>(kAdaptiveFilterLength),
|
||||
delay.value_or(kAdaptiveFilterLength)),
|
||||
aec_state.ReverbDecayFactor(), R2);
|
||||
aec_state.ReverbDecay(), R2);
|
||||
}
|
||||
|
||||
// If the echo is deemed inaudible, set the residual echo to zero.
|
||||
if (aec_state.InaudibleEcho()) {
|
||||
R2->fill(0.f);
|
||||
}
|
||||
|
||||
// If the echo is saturated, estimate the echo power as the maximum echo power
|
||||
|
||||
@ -52,6 +52,7 @@ TEST(ResidualEchoEstimator, BasicTest) {
|
||||
Random random_generator(42U);
|
||||
FftData X;
|
||||
std::array<float, kBlockSize> x_old;
|
||||
std::array<float, kBlockSize> s;
|
||||
Aec3Fft fft;
|
||||
|
||||
for (auto& H2_k : H2) {
|
||||
@ -60,6 +61,11 @@ TEST(ResidualEchoEstimator, BasicTest) {
|
||||
H2[2].fill(10.f);
|
||||
H2[2][0] = 0.1f;
|
||||
|
||||
std::array<float, kAdaptiveFilterTimeDomainLength> h;
|
||||
h.fill(0.f);
|
||||
|
||||
s.fill(100.f);
|
||||
|
||||
constexpr float kLevel = 10.f;
|
||||
E2_shadow.fill(kLevel);
|
||||
E2_main.fill(kLevel);
|
||||
@ -74,8 +80,8 @@ TEST(ResidualEchoEstimator, BasicTest) {
|
||||
render_buffer.Insert(x);
|
||||
|
||||
aec_state.HandleEchoPathChange(echo_path_variability);
|
||||
aec_state.Update(H2, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2,
|
||||
x[0], false);
|
||||
aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
|
||||
Y2, x[0], s, false);
|
||||
|
||||
estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
|
||||
}
|
||||
|
||||
@ -25,15 +25,22 @@ void PredictionError(const Aec3Fft& fft,
|
||||
const FftData& S,
|
||||
rtc::ArrayView<const float> y,
|
||||
std::array<float, kBlockSize>* e,
|
||||
FftData* E) {
|
||||
std::array<float, kFftLength> s;
|
||||
fft.Ifft(S, &s);
|
||||
FftData* E,
|
||||
std::array<float, kBlockSize>* s) {
|
||||
std::array<float, kFftLength> s_scratch;
|
||||
fft.Ifft(S, &s_scratch);
|
||||
constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e->begin(),
|
||||
[&](float a, float b) { return a - b * kScale; });
|
||||
std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
|
||||
e->begin(), [&](float a, float b) { return a - b * kScale; });
|
||||
std::for_each(e->begin(), e->end(),
|
||||
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
|
||||
fft.ZeroPaddedFft(*e, E);
|
||||
|
||||
if (s) {
|
||||
for (size_t k = 0; k < s->size(); ++k) {
|
||||
(*s)[k] = kScale * s_scratch[k + kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@ -47,7 +54,7 @@ Subtractor::Subtractor(ApmDataDumper* data_dumper,
|
||||
RTC_DCHECK(data_dumper_);
|
||||
}
|
||||
|
||||
Subtractor::~Subtractor() {}
|
||||
Subtractor::~Subtractor() = default;
|
||||
|
||||
void Subtractor::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
@ -76,11 +83,11 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
|
||||
// Form the output of the main filter.
|
||||
main_filter_.Filter(render_buffer, &S);
|
||||
PredictionError(fft_, S, y, &e_main, &E_main);
|
||||
PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
|
||||
|
||||
// Form the output of the shadow filter.
|
||||
shadow_filter_.Filter(render_buffer, &S);
|
||||
PredictionError(fft_, S, y, &e_shadow, &E_shadow);
|
||||
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
|
||||
|
||||
// Compute spectra for future use.
|
||||
E_main.Spectrum(optimization_, &output->E2_main);
|
||||
|
||||
@ -45,12 +45,18 @@ class Subtractor {
|
||||
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Returns the block-wise frequency response of the main adaptive filter.
|
||||
// Returns the block-wise frequency response for the main adaptive filter.
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
FilterFrequencyResponse() const {
|
||||
return main_filter_.FilterFrequencyResponse();
|
||||
}
|
||||
|
||||
// Returns the estimate of the impulse response for the main adaptive filter.
|
||||
const std::array<float, kAdaptiveFilterTimeDomainLength>&
|
||||
FilterImpulseResponse() const {
|
||||
return main_filter_.FilterImpulseResponse();
|
||||
}
|
||||
|
||||
private:
|
||||
const Aec3Fft fft_;
|
||||
ApmDataDumper* data_dumper_;
|
||||
|
||||
@ -20,6 +20,7 @@ namespace webrtc {
|
||||
|
||||
// Stores the values being returned from the echo subtractor.
|
||||
struct SubtractorOutput {
|
||||
std::array<float, kBlockSize> s_main;
|
||||
std::array<float, kBlockSize> e_main;
|
||||
std::array<float, kBlockSize> e_shadow;
|
||||
FftData E_main;
|
||||
@ -27,6 +28,7 @@ struct SubtractorOutput {
|
||||
std::array<float, kFftLengthBy2Plus1> E2_shadow;
|
||||
|
||||
void Reset() {
|
||||
s_main.fill(0.f);
|
||||
e_main.fill(0.f);
|
||||
e_shadow.fill(0.f);
|
||||
E_main.re.fill(0.f);
|
||||
|
||||
@ -68,8 +68,9 @@ float RunSubtractorTest(int num_blocks_to_process,
|
||||
|
||||
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
|
||||
aec_state.Update(subtractor.FilterFrequencyResponse(),
|
||||
subtractor.FilterImpulseResponse(),
|
||||
rtc::Optional<size_t>(delay_samples / kBlockSize),
|
||||
render_buffer, E2_main, Y2, x[0], false);
|
||||
render_buffer, E2_main, Y2, x[0], output.s_main, false);
|
||||
}
|
||||
|
||||
const float output_power = std::inner_product(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user