AEC3: Add hysteresis period before entering stereo processing

Even if playout audio is only very briefly stereo, the AEC will enter stereo processing mode. To save CPU and improve AEC performance, this CL adds a hysteresis period before treating playout as stereo.

The feature is enabled by default in the AEC3 config.

Bug: chromium:1295710
Change-Id: I29116ab2e7823e25a02aa3b66a1c619f1d966d9e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258479
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36503}
This commit is contained in:
Sam Zackrisson 2022-04-08 16:14:06 +02:00 committed by WebRTC LUCI CQ
parent fa07b43074
commit cf7f7f9fa0
9 changed files with 295 additions and 67 deletions

View File

@ -241,6 +241,7 @@ struct RTC_EXPORT EchoCanceller3Config {
bool detect_stereo_content = true;
float stereo_detection_threshold = 0.0f;
int stereo_detection_timeout_threshold_seconds = 300;
float stereo_detection_hysteresis_seconds = 2.0f;
} multi_channel;
};
} // namespace webrtc

View File

@ -423,6 +423,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
&cfg.multi_channel.stereo_detection_threshold);
ReadParam(section, "stereo_detection_timeout_threshold_seconds",
&cfg.multi_channel.stereo_detection_timeout_threshold_seconds);
ReadParam(section, "stereo_detection_hysteresis_seconds",
&cfg.multi_channel.stereo_detection_hysteresis_seconds);
}
}
@ -754,7 +756,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
ost << "\"stereo_detection_threshold\": "
<< config.multi_channel.stereo_detection_threshold << ",";
ost << "\"stereo_detection_timeout_threshold_seconds\": "
<< config.multi_channel.stereo_detection_timeout_threshold_seconds;
<< config.multi_channel.stereo_detection_timeout_threshold_seconds << ",";
ost << "\"stereo_detection_hysteresis_seconds\": "
<< config.multi_channel.stereo_detection_hysteresis_seconds;
ost << "}";
ost << "}";

View File

@ -35,6 +35,7 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
!cfg.multi_channel.detect_stereo_content;
cfg.multi_channel.stereo_detection_threshold += 1.0f;
cfg.multi_channel.stereo_detection_timeout_threshold_seconds += 1;
cfg.multi_channel.stereo_detection_hysteresis_seconds += 1;
std::string json_string = Aec3ConfigToJsonString(cfg);
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
@ -86,5 +87,7 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
EXPECT_EQ(
cfg.multi_channel.stereo_detection_timeout_threshold_seconds,
cfg_transformed.multi_channel.stereo_detection_timeout_threshold_seconds);
EXPECT_EQ(cfg.multi_channel.stereo_detection_hysteresis_seconds,
cfg_transformed.multi_channel.stereo_detection_hysteresis_seconds);
}
} // namespace webrtc

View File

@ -704,7 +704,9 @@ EchoCanceller3::EchoCanceller3(
config_selector_.active_config()
.multi_channel.stereo_detection_threshold,
config_selector_.active_config()
.multi_channel.stereo_detection_timeout_threshold_seconds),
.multi_channel.stereo_detection_timeout_threshold_seconds,
config_selector_.active_config()
.multi_channel.stereo_detection_hysteresis_seconds),
output_framer_(num_bands_, num_capture_channels_),
capture_blocker_(num_bands_, num_capture_channels_),
render_transfer_queue_(
@ -772,12 +774,12 @@ void EchoCanceller3::Initialize() {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
num_render_channels_to_aec_ =
multichannel_content_detector_.IsMultiChannelContentDetected()
multichannel_content_detector_.IsProperMultiChannelContentDetected()
? num_render_input_channels_
: 1;
config_selector_.Update(
multichannel_content_detector_.IsMultiChannelContentDetected());
multichannel_content_detector_.IsProperMultiChannelContentDetected());
for (std::vector<std::vector<float>>& block_band : render_block_) {
block_band.resize(num_render_channels_to_aec_);

View File

@ -143,6 +143,8 @@ class EchoCanceller3 : public EchoControl {
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
DetectionOfProperStereoUsingThreshold);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
DetectionOfProperStereoUsingHysteresis);
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
StereoContentDetectionForMonoSignals);
@ -158,7 +160,7 @@ class EchoCanceller3 : public EchoControl {
// Only for testing. Returns whether stereo processing is active.
bool StereoRenderProcessingActiveForTesting() const {
return multichannel_content_detector_.IsMultiChannelContentDetected();
return multichannel_content_detector_.IsProperMultiChannelContentDetected();
}
// Only for testing.

View File

@ -947,6 +947,7 @@ TEST(EchoCanceller3, DetectionOfProperStereo) {
mono_config.multi_channel.detect_stereo_content = true;
mono_config.multi_channel.stereo_detection_threshold = 0.0f;
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
multichannel_config = mono_config;
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
multichannel_config->filter.coarse_initial.length_blocks =
@ -994,6 +995,7 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
mono_config.multi_channel.detect_stereo_content = true;
mono_config.multi_channel.stereo_detection_threshold =
kStereoDetectionThreshold;
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
multichannel_config = mono_config;
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
multichannel_config->filter.coarse_initial.length_blocks =
@ -1024,6 +1026,64 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
kNumBlocksForSurroundConfig);
}
TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) {
constexpr int kSampleRateHz = 16000;
constexpr int kNumChannels = 2;
AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
/*input_num_channels=*/kNumChannels,
/*input_rate=*/kSampleRateHz,
/*buffer_num_channels=*/kNumChannels,
/*output_rate=*/kSampleRateHz,
/*output_num_channels=*/kNumChannels);
constexpr size_t kNumBlocksForMonoConfig = 1;
constexpr size_t kNumBlocksForSurroundConfig = 2;
EchoCanceller3Config mono_config;
absl::optional<EchoCanceller3Config> surround_config;
mono_config.multi_channel.detect_stereo_content = true;
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f;
surround_config = mono_config;
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
surround_config->filter.coarse_initial.length_blocks =
kNumBlocksForSurroundConfig;
EchoCanceller3 aec3(mono_config, surround_config,
/*sample_rate_hz=*/kSampleRateHz,
/*num_render_channels=*/kNumChannels,
/*num_capture_input_channels=*/kNumChannels);
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
EXPECT_EQ(
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
kNumBlocksForMonoConfig);
RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
EXPECT_EQ(
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
kNumBlocksForMonoConfig);
constexpr int kNumFramesPerSecond = 100;
for (int k = 0;
k < static_cast<int>(
kNumFramesPerSecond *
mono_config.multi_channel.stereo_detection_hysteresis_seconds);
++k) {
RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
EXPECT_EQ(
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
kNumBlocksForMonoConfig);
}
RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
EXPECT_EQ(
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
kNumBlocksForSurroundConfig);
}
TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
constexpr int kSampleRateHz = 16000;
constexpr int kNumChannels = 2;

View File

@ -13,6 +13,8 @@
#include <cmath>
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
@ -23,7 +25,7 @@ constexpr int kNumFramesPerSecond = 100;
// whether the signal is a proper stereo signal. To allow for differences
// introduced by hardware drivers, a threshold `detection_threshold` is used for
// the detection.
bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
float detection_threshold) {
if (frame[0].size() < 2) {
return false;
@ -46,7 +48,8 @@ MultiChannelContentDetector::MultiChannelContentDetector(
bool detect_stereo_content,
int num_render_input_channels,
float detection_threshold,
int stereo_detection_timeout_threshold_seconds)
int stereo_detection_timeout_threshold_seconds,
float stereo_detection_hysteresis_seconds)
: detect_stereo_content_(detect_stereo_content),
detection_threshold_(detection_threshold),
detection_timeout_threshold_frames_(
@ -54,29 +57,46 @@ MultiChannelContentDetector::MultiChannelContentDetector(
? absl::make_optional(stereo_detection_timeout_threshold_seconds *
kNumFramesPerSecond)
: absl::nullopt),
proper_multichannel_content_detected_(!detect_stereo_content &&
num_render_input_channels > 1) {}
stereo_detection_hysteresis_frames_(static_cast<int>(
stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
persistent_multichannel_content_detected_(
!detect_stereo_content && num_render_input_channels > 1) {}
bool MultiChannelContentDetector::UpdateDetection(
const std::vector<std::vector<std::vector<float>>>& frame) {
if (!detect_stereo_content_)
if (!detect_stereo_content_) {
RTC_DCHECK_EQ(frame[0].size() > 1,
persistent_multichannel_content_detected_);
return false;
const bool previous_proper_multichannel_content_detected =
proper_multichannel_content_detected_;
if (IsProperStereo(frame, detection_threshold_)) {
proper_multichannel_content_detected_ = true;
frames_since_stereo_detected_ = 0;
} else {
++frames_since_stereo_detected_;
if (detection_timeout_threshold_frames_ &&
frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
proper_multichannel_content_detected_ = false;
}
const bool previous_persistent_multichannel_content_detected =
persistent_multichannel_content_detected_;
const bool stereo_detected_in_frame =
HasStereoContent(frame, detection_threshold_);
consecutive_frames_with_stereo_ =
stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
frames_since_stereo_detected_last_ =
stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;
// Detect persistent multichannel content.
if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
persistent_multichannel_content_detected_ = true;
}
return previous_proper_multichannel_content_detected !=
proper_multichannel_content_detected_;
if (detection_timeout_threshold_frames_.has_value() &&
frames_since_stereo_detected_last_ >=
*detection_timeout_threshold_frames_) {
persistent_multichannel_content_detected_ = false;
}
// Detect temporary multichannel content.
temporary_multichannel_content_detected_ =
persistent_multichannel_content_detected_ ? false
: stereo_detected_in_frame;
return previous_persistent_multichannel_content_detected !=
persistent_multichannel_content_detected_;
}
} // namespace webrtc

View File

@ -31,24 +31,33 @@ class MultiChannelContentDetector {
MultiChannelContentDetector(bool detect_stereo_content,
int num_render_input_channels,
float detection_threshold,
int stereo_detection_timeout_threshold_seconds);
int stereo_detection_timeout_threshold_seconds,
float stereo_detection_hysteresis_seconds);
// Compares the left and right channels in the render `frame` to determine
// whether the signal is a proper multichannel signal. Returns a bool
// indicating whether a change in the multichannel was detected.
// indicating whether a change in the proper multichannel content was
// detected.
bool UpdateDetection(
const std::vector<std::vector<std::vector<float>>>& frame);
bool IsMultiChannelContentDetected() const {
return proper_multichannel_content_detected_;
bool IsProperMultiChannelContentDetected() const {
return persistent_multichannel_content_detected_;
}
bool IsTemporaryMultiChannelContentDetectedForTesting() const {
return temporary_multichannel_content_detected_;
}
private:
const bool detect_stereo_content_;
const float detection_threshold_;
const absl::optional<int> detection_timeout_threshold_frames_;
bool proper_multichannel_content_detected_;
int frames_since_stereo_detected_ = 0;
const int stereo_detection_hysteresis_frames_;
bool persistent_multichannel_content_detected_;
bool temporary_multichannel_content_detected_ = false;
int64_t frames_since_stereo_detected_last_ = 0;
int64_t consecutive_frames_with_stereo_ = 0;
};
} // namespace webrtc

View File

@ -19,8 +19,9 @@ TEST(MultiChannelContentDetector, HandlingOfMono) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
@ -28,8 +29,9 @@ TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
@ -37,8 +39,9 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
@ -46,7 +49,7 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -56,8 +59,9 @@ TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
@ -65,13 +69,14 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -81,13 +86,14 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -97,14 +103,15 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0);
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
@ -112,7 +119,7 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -123,14 +130,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
/*stereo_detection_timeout_threshold_seconds=*/0);
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -141,14 +149,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
/*stereo_detection_timeout_threshold_seconds=*/0);
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
@ -166,16 +175,16 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
TimeOutBehaviorForNonTrueStereo) {
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
const int stereo_stereo_detection_timeout_threshold_seconds =
const int stereo_detection_timeout_threshold_seconds =
std::get<1>(GetParam());
const int stereo_detection_timeout_threshold_frames =
stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
MultiChannelContentDetector mc(
detect_stereo_content,
MultiChannelContentDetector mc(detect_stereo_content,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
stereo_stereo_detection_timeout_threshold_seconds);
stereo_detection_timeout_threshold_seconds,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
@ -186,9 +195,9 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content) {
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
}
@ -198,21 +207,21 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
}
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
// Pass fake stereo frames until any timeouts are about to occur.
for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass a fake stereo frame and verify that any timeouts properly occur.
if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass fake stereo frames and verify the behavior after any timeout.
@ -220,11 +229,129 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content &&
stereo_detection_timeout_threshold_frames > 0) {
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
}
}
class MultiChannelContentDetectorHysteresisBehavior
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<bool, float>> {};
INSTANTIATE_TEST_SUITE_P(
MultiChannelContentDetector,
MultiChannelContentDetectorHysteresisBehavior,
::testing::Combine(::testing::Values(false, true),
::testing::Values(0.0f, 0.1f, 0.2f)));
TEST_P(MultiChannelContentDetectorHysteresisBehavior,
PeriodBeforeStereoDetectionIsTriggered) {
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam());
const int stereo_detection_hysteresis_frames =
stereo_detection_hysteresis_seconds * kNumFramesPerSecond;
MultiChannelContentDetector mc(
detect_stereo_content,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
stereo_detection_hysteresis_seconds);
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
// Pass fake stereo frames and verify the content detection.
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content) {
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
// Pass a two true stereo frames and verify that they are properly detected.
ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 ||
stereo_detection_hysteresis_frames == 0);
for (int k = 0; k < 2; ++k) {
if (detect_stereo_content) {
if (stereo_detection_hysteresis_seconds == 0.0f) {
if (k == 0) {
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
}
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
}
if (stereo_detection_hysteresis_seconds == 0.0f) {
return;
}
// Pass true stereo frames until any timeouts are about to occur.
for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) {
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
}
// Pass a true stereo frame and verify that it is properly detected.
if (detect_stereo_content) {
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
// Pass an additional true stereo frame and verify that it is properly
// detected.
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
// Pass a fake stereo frame and verify that it is properly detected.
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
} else {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
}
}
} // namespace webrtc