AEC3: Add fallback to mono processing if no stereo is detected for some time
If playout audio is temporarily stereo, the AEC will currently enter stereo processing mode indefinitely. To save CPU and improve AEC performance, this CL adds support for falling back to mono after a period of no stereo. The feature is enabled by default in the AEC3 config. Bug: chromium:1295710 Change-Id: I690b5b22f8407f950bf41f3bcaa9ca0138452157 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258421 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36502}
This commit is contained in:
parent
13fe3674ff
commit
fa07b43074
@ -240,6 +240,7 @@ struct RTC_EXPORT EchoCanceller3Config {
|
||||
struct MultiChannel {
|
||||
bool detect_stereo_content = true;
|
||||
float stereo_detection_threshold = 0.0f;
|
||||
int stereo_detection_timeout_threshold_seconds = 300;
|
||||
} multi_channel;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
@ -421,6 +421,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
|
||||
&cfg.multi_channel.detect_stereo_content);
|
||||
ReadParam(section, "stereo_detection_threshold",
|
||||
&cfg.multi_channel.stereo_detection_threshold);
|
||||
ReadParam(section, "stereo_detection_timeout_threshold_seconds",
|
||||
&cfg.multi_channel.stereo_detection_timeout_threshold_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
@ -750,7 +752,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
|
||||
ost << "\"detect_stereo_content\": "
|
||||
<< (config.multi_channel.detect_stereo_content ? "true" : "false") << ",";
|
||||
ost << "\"stereo_detection_threshold\": "
|
||||
<< config.multi_channel.stereo_detection_threshold;
|
||||
<< config.multi_channel.stereo_detection_threshold << ",";
|
||||
ost << "\"stereo_detection_timeout_threshold_seconds\": "
|
||||
<< config.multi_channel.stereo_detection_timeout_threshold_seconds;
|
||||
ost << "}";
|
||||
|
||||
ost << "}";
|
||||
|
||||
@ -33,8 +33,8 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
||||
cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f;
|
||||
cfg.multi_channel.detect_stereo_content =
|
||||
!cfg.multi_channel.detect_stereo_content;
|
||||
cfg.multi_channel.stereo_detection_threshold =
|
||||
cfg.multi_channel.stereo_detection_threshold + 1.0f;
|
||||
cfg.multi_channel.stereo_detection_threshold += 1.0f;
|
||||
cfg.multi_channel.stereo_detection_timeout_threshold_seconds += 1;
|
||||
std::string json_string = Aec3ConfigToJsonString(cfg);
|
||||
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
|
||||
|
||||
@ -83,5 +83,8 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
||||
cfg_transformed.multi_channel.detect_stereo_content);
|
||||
EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold,
|
||||
cfg_transformed.multi_channel.stereo_detection_threshold);
|
||||
EXPECT_EQ(
|
||||
cfg.multi_channel.stereo_detection_timeout_threshold_seconds,
|
||||
cfg_transformed.multi_channel.stereo_detection_timeout_threshold_seconds);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -23,21 +23,23 @@ bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
|
||||
multichannel_config.delay.fixed_capture_delay_samples) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.filter.export_linear_aec_output !=
|
||||
multichannel_config.filter.export_linear_aec_output) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.filter.high_pass_filter_echo_reference !=
|
||||
multichannel_config.filter.high_pass_filter_echo_reference) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.multi_channel.detect_stereo_content !=
|
||||
multichannel_config.multi_channel.detect_stereo_content) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds !=
|
||||
multichannel_config.multi_channel
|
||||
.stereo_detection_timeout_threshold_seconds) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -702,7 +702,9 @@ EchoCanceller3::EchoCanceller3(
|
||||
config_selector_.active_config().multi_channel.detect_stereo_content,
|
||||
num_render_input_channels_,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_threshold),
|
||||
.multi_channel.stereo_detection_threshold,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_timeout_threshold_seconds),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_transfer_queue_(
|
||||
|
||||
@ -17,6 +17,8 @@ namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper stereo signal. To allow for differences
|
||||
// introduced by hardware drivers, a threshold `detection_threshold` is used for
|
||||
@ -43,21 +45,37 @@ bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
|
||||
MultiChannelContentDetector::MultiChannelContentDetector(
|
||||
bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold)
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds)
|
||||
: detect_stereo_content_(detect_stereo_content),
|
||||
detection_threshold_(detection_threshold),
|
||||
detection_timeout_threshold_frames_(
|
||||
stereo_detection_timeout_threshold_seconds > 0
|
||||
? absl::make_optional(stereo_detection_timeout_threshold_seconds *
|
||||
kNumFramesPerSecond)
|
||||
: absl::nullopt),
|
||||
proper_multichannel_content_detected_(!detect_stereo_content &&
|
||||
num_render_input_channels > 1) {}
|
||||
|
||||
bool MultiChannelContentDetector::UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame) {
|
||||
bool previous_proper_multichannel_content_detected_ =
|
||||
if (!detect_stereo_content_)
|
||||
return false;
|
||||
|
||||
const bool previous_proper_multichannel_content_detected =
|
||||
proper_multichannel_content_detected_;
|
||||
if (detect_stereo_content_ && !proper_multichannel_content_detected_) {
|
||||
proper_multichannel_content_detected_ =
|
||||
IsProperStereo(frame, detection_threshold_);
|
||||
|
||||
if (IsProperStereo(frame, detection_threshold_)) {
|
||||
proper_multichannel_content_detected_ = true;
|
||||
frames_since_stereo_detected_ = 0;
|
||||
} else {
|
||||
++frames_since_stereo_detected_;
|
||||
if (detection_timeout_threshold_frames_ &&
|
||||
frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
|
||||
proper_multichannel_content_detected_ = false;
|
||||
}
|
||||
}
|
||||
return previous_proper_multichannel_content_detected_ !=
|
||||
return previous_proper_multichannel_content_detected !=
|
||||
proper_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Analyzes audio content to determine whether the contained audio is proper
|
||||
@ -23,9 +25,13 @@ namespace webrtc {
|
||||
// detection.
|
||||
class MultiChannelContentDetector {
|
||||
public:
|
||||
// If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is
|
||||
// applied: Once multichannel is detected, the detector remains in that state
|
||||
// for its lifetime.
|
||||
MultiChannelContentDetector(bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold);
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds);
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper multichannel signal. Returns a bool
|
||||
@ -40,7 +46,9 @@ class MultiChannelContentDetector {
|
||||
private:
|
||||
const bool detect_stereo_content_;
|
||||
const float detection_threshold_;
|
||||
const absl::optional<int> detection_timeout_threshold_frames_;
|
||||
bool proper_multichannel_content_detected_;
|
||||
int frames_since_stereo_detected_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -15,23 +15,29 @@
|
||||
namespace webrtc {
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfMono) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
@ -46,16 +52,20 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
@ -67,9 +77,11 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
@ -81,9 +93,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
|
||||
@ -105,9 +119,11 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
|
||||
constexpr float kThreshold = 1.0f;
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
@ -121,9 +137,11 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
|
||||
constexpr float kThreshold = 1.0f;
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold);
|
||||
MultiChannelContentDetector mc(
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
@ -135,4 +153,78 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
class MultiChannelContentDetectorTimeoutBehavior
|
||||
: public ::testing::Test,
|
||||
public ::testing::WithParamInterface<std::tuple<bool, int>> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector,
|
||||
MultiChannelContentDetectorTimeoutBehavior,
|
||||
::testing::Combine(::testing::Values(false, true),
|
||||
::testing::Values(0, 1, 10)));
|
||||
|
||||
TEST_P(MultiChannelContentDetectorTimeoutBehavior,
|
||||
TimeOutBehaviorForNonTrueStereo) {
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
const bool detect_stereo_content = std::get<0>(GetParam());
|
||||
const int stereo_stereo_detection_timeout_threshold_seconds =
|
||||
std::get<1>(GetParam());
|
||||
const int stereo_detection_timeout_threshold_frames =
|
||||
stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
|
||||
|
||||
MultiChannelContentDetector mc(
|
||||
detect_stereo_content,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
stereo_stereo_detection_timeout_threshold_seconds);
|
||||
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
|
||||
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
|
||||
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
|
||||
|
||||
// Pass fake stereo frames and verify the content detection.
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass a true stereo frame and verify that it is properly detected.
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
}
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
// Pass fake stereo frames until any timeouts are about to occur.
|
||||
for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
// Pass a fake stereo frame and verify that any timeouts properly occur.
|
||||
if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
|
||||
EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
// Pass fake stereo frames and verify the behavior after any timeout.
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
if (detect_stereo_content &&
|
||||
stereo_detection_timeout_threshold_frames > 0) {
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user