AEC3: Add hysteresis period before entering stereo processing
Even if playout audio is only very briefly stereo, the AEC will enter stereo processing mode. To save CPU and improve AEC performance, this CL adds a hysteresis period before treating playout as stereo. The feature is enabled by default in the AEC3 config. Bug: chromium:1295710 Change-Id: I29116ab2e7823e25a02aa3b66a1c619f1d966d9e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258479 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36503}
This commit is contained in:
parent
fa07b43074
commit
cf7f7f9fa0
@ -241,6 +241,7 @@ struct RTC_EXPORT EchoCanceller3Config {
|
||||
bool detect_stereo_content = true;
|
||||
float stereo_detection_threshold = 0.0f;
|
||||
int stereo_detection_timeout_threshold_seconds = 300;
|
||||
float stereo_detection_hysteresis_seconds = 2.0f;
|
||||
} multi_channel;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
@ -423,6 +423,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
|
||||
&cfg.multi_channel.stereo_detection_threshold);
|
||||
ReadParam(section, "stereo_detection_timeout_threshold_seconds",
|
||||
&cfg.multi_channel.stereo_detection_timeout_threshold_seconds);
|
||||
ReadParam(section, "stereo_detection_hysteresis_seconds",
|
||||
&cfg.multi_channel.stereo_detection_hysteresis_seconds);
|
||||
}
|
||||
}
|
||||
|
||||
@ -754,7 +756,9 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
|
||||
ost << "\"stereo_detection_threshold\": "
|
||||
<< config.multi_channel.stereo_detection_threshold << ",";
|
||||
ost << "\"stereo_detection_timeout_threshold_seconds\": "
|
||||
<< config.multi_channel.stereo_detection_timeout_threshold_seconds;
|
||||
<< config.multi_channel.stereo_detection_timeout_threshold_seconds << ",";
|
||||
ost << "\"stereo_detection_hysteresis_seconds\": "
|
||||
<< config.multi_channel.stereo_detection_hysteresis_seconds;
|
||||
ost << "}";
|
||||
|
||||
ost << "}";
|
||||
|
||||
@ -35,6 +35,7 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
||||
!cfg.multi_channel.detect_stereo_content;
|
||||
cfg.multi_channel.stereo_detection_threshold += 1.0f;
|
||||
cfg.multi_channel.stereo_detection_timeout_threshold_seconds += 1;
|
||||
cfg.multi_channel.stereo_detection_hysteresis_seconds += 1;
|
||||
std::string json_string = Aec3ConfigToJsonString(cfg);
|
||||
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
|
||||
|
||||
@ -86,5 +87,7 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
||||
EXPECT_EQ(
|
||||
cfg.multi_channel.stereo_detection_timeout_threshold_seconds,
|
||||
cfg_transformed.multi_channel.stereo_detection_timeout_threshold_seconds);
|
||||
EXPECT_EQ(cfg.multi_channel.stereo_detection_hysteresis_seconds,
|
||||
cfg_transformed.multi_channel.stereo_detection_hysteresis_seconds);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -704,7 +704,9 @@ EchoCanceller3::EchoCanceller3(
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_threshold,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_timeout_threshold_seconds),
|
||||
.multi_channel.stereo_detection_timeout_threshold_seconds,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_hysteresis_seconds),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_transfer_queue_(
|
||||
@ -772,12 +774,12 @@ void EchoCanceller3::Initialize() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
|
||||
num_render_channels_to_aec_ =
|
||||
multichannel_content_detector_.IsMultiChannelContentDetected()
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected()
|
||||
? num_render_input_channels_
|
||||
: 1;
|
||||
|
||||
config_selector_.Update(
|
||||
multichannel_content_detector_.IsMultiChannelContentDetected());
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected());
|
||||
|
||||
for (std::vector<std::vector<float>>& block_band : render_block_) {
|
||||
block_band.resize(num_render_channels_to_aec_);
|
||||
|
||||
@ -143,6 +143,8 @@ class EchoCanceller3 : public EchoControl {
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingThreshold);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingHysteresis);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
StereoContentDetectionForMonoSignals);
|
||||
|
||||
@ -158,7 +160,7 @@ class EchoCanceller3 : public EchoControl {
|
||||
|
||||
// Only for testing. Returns whether stereo processing is active.
|
||||
bool StereoRenderProcessingActiveForTesting() const {
|
||||
return multichannel_content_detector_.IsMultiChannelContentDetected();
|
||||
return multichannel_content_detector_.IsProperMultiChannelContentDetected();
|
||||
}
|
||||
|
||||
// Only for testing.
|
||||
|
||||
@ -947,6 +947,7 @@ TEST(EchoCanceller3, DetectionOfProperStereo) {
|
||||
|
||||
mono_config.multi_channel.detect_stereo_content = true;
|
||||
mono_config.multi_channel.stereo_detection_threshold = 0.0f;
|
||||
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
|
||||
multichannel_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
multichannel_config->filter.coarse_initial.length_blocks =
|
||||
@ -994,6 +995,7 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
|
||||
mono_config.multi_channel.detect_stereo_content = true;
|
||||
mono_config.multi_channel.stereo_detection_threshold =
|
||||
kStereoDetectionThreshold;
|
||||
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f;
|
||||
multichannel_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
multichannel_config->filter.coarse_initial.length_blocks =
|
||||
@ -1024,6 +1026,64 @@ TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
|
||||
kNumBlocksForSurroundConfig);
|
||||
}
|
||||
|
||||
TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) {
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
constexpr int kNumChannels = 2;
|
||||
AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
|
||||
/*input_num_channels=*/kNumChannels,
|
||||
/*input_rate=*/kSampleRateHz,
|
||||
/*buffer_num_channels=*/kNumChannels,
|
||||
/*output_rate=*/kSampleRateHz,
|
||||
/*output_num_channels=*/kNumChannels);
|
||||
|
||||
constexpr size_t kNumBlocksForMonoConfig = 1;
|
||||
constexpr size_t kNumBlocksForSurroundConfig = 2;
|
||||
EchoCanceller3Config mono_config;
|
||||
absl::optional<EchoCanceller3Config> surround_config;
|
||||
|
||||
mono_config.multi_channel.detect_stereo_content = true;
|
||||
mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f;
|
||||
surround_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
surround_config->filter.coarse_initial.length_blocks =
|
||||
kNumBlocksForSurroundConfig;
|
||||
|
||||
EchoCanceller3 aec3(mono_config, surround_config,
|
||||
/*sample_rate_hz=*/kSampleRateHz,
|
||||
/*num_render_channels=*/kNumChannels,
|
||||
/*num_capture_input_channels=*/kNumChannels);
|
||||
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
for (int k = 0;
|
||||
k < static_cast<int>(
|
||||
kNumFramesPerSecond *
|
||||
mono_config.multi_channel.stereo_detection_hysteresis_seconds);
|
||||
++k) {
|
||||
RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
}
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
|
||||
EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForSurroundConfig);
|
||||
}
|
||||
|
||||
TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
constexpr int kNumChannels = 2;
|
||||
|
||||
@ -13,6 +13,8 @@
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
@ -23,8 +25,8 @@ constexpr int kNumFramesPerSecond = 100;
|
||||
// whether the signal is a proper stereo signal. To allow for differences
|
||||
// introduced by hardware drivers, a threshold `detection_threshold` is used for
|
||||
// the detection.
|
||||
bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
|
||||
float detection_threshold) {
|
||||
bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
|
||||
float detection_threshold) {
|
||||
if (frame[0].size() < 2) {
|
||||
return false;
|
||||
}
|
||||
@ -46,7 +48,8 @@ MultiChannelContentDetector::MultiChannelContentDetector(
|
||||
bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds)
|
||||
int stereo_detection_timeout_threshold_seconds,
|
||||
float stereo_detection_hysteresis_seconds)
|
||||
: detect_stereo_content_(detect_stereo_content),
|
||||
detection_threshold_(detection_threshold),
|
||||
detection_timeout_threshold_frames_(
|
||||
@ -54,29 +57,46 @@ MultiChannelContentDetector::MultiChannelContentDetector(
|
||||
? absl::make_optional(stereo_detection_timeout_threshold_seconds *
|
||||
kNumFramesPerSecond)
|
||||
: absl::nullopt),
|
||||
proper_multichannel_content_detected_(!detect_stereo_content &&
|
||||
num_render_input_channels > 1) {}
|
||||
stereo_detection_hysteresis_frames_(static_cast<int>(
|
||||
stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
|
||||
persistent_multichannel_content_detected_(
|
||||
!detect_stereo_content && num_render_input_channels > 1) {}
|
||||
|
||||
bool MultiChannelContentDetector::UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame) {
|
||||
if (!detect_stereo_content_)
|
||||
if (!detect_stereo_content_) {
|
||||
RTC_DCHECK_EQ(frame[0].size() > 1,
|
||||
persistent_multichannel_content_detected_);
|
||||
return false;
|
||||
|
||||
const bool previous_proper_multichannel_content_detected =
|
||||
proper_multichannel_content_detected_;
|
||||
|
||||
if (IsProperStereo(frame, detection_threshold_)) {
|
||||
proper_multichannel_content_detected_ = true;
|
||||
frames_since_stereo_detected_ = 0;
|
||||
} else {
|
||||
++frames_since_stereo_detected_;
|
||||
if (detection_timeout_threshold_frames_ &&
|
||||
frames_since_stereo_detected_ >= *detection_timeout_threshold_frames_) {
|
||||
proper_multichannel_content_detected_ = false;
|
||||
}
|
||||
}
|
||||
return previous_proper_multichannel_content_detected !=
|
||||
proper_multichannel_content_detected_;
|
||||
|
||||
const bool previous_persistent_multichannel_content_detected =
|
||||
persistent_multichannel_content_detected_;
|
||||
const bool stereo_detected_in_frame =
|
||||
HasStereoContent(frame, detection_threshold_);
|
||||
|
||||
consecutive_frames_with_stereo_ =
|
||||
stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
|
||||
frames_since_stereo_detected_last_ =
|
||||
stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;
|
||||
|
||||
// Detect persistent multichannel content.
|
||||
if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
|
||||
persistent_multichannel_content_detected_ = true;
|
||||
}
|
||||
if (detection_timeout_threshold_frames_.has_value() &&
|
||||
frames_since_stereo_detected_last_ >=
|
||||
*detection_timeout_threshold_frames_) {
|
||||
persistent_multichannel_content_detected_ = false;
|
||||
}
|
||||
|
||||
// Detect temporary multichannel content.
|
||||
temporary_multichannel_content_detected_ =
|
||||
persistent_multichannel_content_detected_ ? false
|
||||
: stereo_detected_in_frame;
|
||||
|
||||
return previous_persistent_multichannel_content_detected !=
|
||||
persistent_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -31,24 +31,33 @@ class MultiChannelContentDetector {
|
||||
MultiChannelContentDetector(bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds);
|
||||
int stereo_detection_timeout_threshold_seconds,
|
||||
float stereo_detection_hysteresis_seconds);
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper multichannel signal. Returns a bool
|
||||
// indicating whether a change in the multichannel was detected.
|
||||
// indicating whether a change in the proper multichannel content was
|
||||
// detected.
|
||||
bool UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame);
|
||||
|
||||
bool IsMultiChannelContentDetected() const {
|
||||
return proper_multichannel_content_detected_;
|
||||
bool IsProperMultiChannelContentDetected() const {
|
||||
return persistent_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
bool IsTemporaryMultiChannelContentDetectedForTesting() const {
|
||||
return temporary_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool detect_stereo_content_;
|
||||
const float detection_threshold_;
|
||||
const absl::optional<int> detection_timeout_threshold_frames_;
|
||||
bool proper_multichannel_content_detected_;
|
||||
int frames_since_stereo_detected_ = 0;
|
||||
const int stereo_detection_hysteresis_frames_;
|
||||
bool persistent_multichannel_content_detected_;
|
||||
bool temporary_multichannel_content_detected_ = false;
|
||||
int64_t frames_since_stereo_detected_last_ = 0;
|
||||
int64_t consecutive_frames_with_stereo_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -19,8 +19,9 @@ TEST(MultiChannelContentDetector, HandlingOfMono) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
|
||||
@ -28,8 +29,9 @@ TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
|
||||
/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
@ -37,8 +39,9 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
@ -46,7 +49,7 @@ TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -56,8 +59,9 @@ TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
|
||||
@ -65,13 +69,14 @@ TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -81,13 +86,14 @@ TEST(MultiChannelContentDetector, DetectionWhenStereo) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -97,14 +103,15 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
|
||||
@ -112,7 +119,7 @@ TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -123,14 +130,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -141,14 +149,15 @@ TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
|
||||
/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0);
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
|
||||
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
@ -166,16 +175,16 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
|
||||
TimeOutBehaviorForNonTrueStereo) {
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
const bool detect_stereo_content = std::get<0>(GetParam());
|
||||
const int stereo_stereo_detection_timeout_threshold_seconds =
|
||||
const int stereo_detection_timeout_threshold_seconds =
|
||||
std::get<1>(GetParam());
|
||||
const int stereo_detection_timeout_threshold_frames =
|
||||
stereo_stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
|
||||
stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
|
||||
|
||||
MultiChannelContentDetector mc(
|
||||
detect_stereo_content,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
stereo_stereo_detection_timeout_threshold_seconds);
|
||||
MultiChannelContentDetector mc(detect_stereo_content,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
stereo_detection_timeout_threshold_seconds,
|
||||
/*stereo_detection_hysteresis_seconds=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
|
||||
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
|
||||
|
||||
@ -186,9 +195,9 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,21 +207,21 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
}
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
|
||||
// Pass fake stereo frames until any timeouts are about to occur.
|
||||
for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
// Pass a fake stereo frame and verify that any timeouts properly occur.
|
||||
if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
|
||||
EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
// Pass fake stereo frames and verify the behavior after any timeout.
|
||||
@ -220,11 +229,129 @@ TEST_P(MultiChannelContentDetectorTimeoutBehavior,
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
if (detect_stereo_content &&
|
||||
stereo_detection_timeout_threshold_frames > 0) {
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class MultiChannelContentDetectorHysteresisBehavior
|
||||
: public ::testing::Test,
|
||||
public ::testing::WithParamInterface<std::tuple<bool, float>> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
MultiChannelContentDetector,
|
||||
MultiChannelContentDetectorHysteresisBehavior,
|
||||
::testing::Combine(::testing::Values(false, true),
|
||||
::testing::Values(0.0f, 0.1f, 0.2f)));
|
||||
|
||||
TEST_P(MultiChannelContentDetectorHysteresisBehavior,
|
||||
PeriodBeforeStereoDetectionIsTriggered) {
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
const bool detect_stereo_content = std::get<0>(GetParam());
|
||||
const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam());
|
||||
const int stereo_detection_hysteresis_frames =
|
||||
stereo_detection_hysteresis_seconds * kNumFramesPerSecond;
|
||||
|
||||
MultiChannelContentDetector mc(
|
||||
detect_stereo_content,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f,
|
||||
/*stereo_detection_timeout_threshold_seconds=*/0,
|
||||
stereo_detection_hysteresis_seconds);
|
||||
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
|
||||
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
|
||||
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
|
||||
|
||||
// Pass fake stereo frames and verify the content detection.
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
} else {
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
}
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
|
||||
// Pass a two true stereo frames and verify that they are properly detected.
|
||||
ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 ||
|
||||
stereo_detection_hysteresis_frames == 0);
|
||||
for (int k = 0; k < 2; ++k) {
|
||||
if (detect_stereo_content) {
|
||||
if (stereo_detection_hysteresis_seconds == 0.0f) {
|
||||
if (k == 0) {
|
||||
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
}
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
}
|
||||
|
||||
if (stereo_detection_hysteresis_seconds == 0.0f) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Pass true stereo frames until any timeouts are about to occur.
|
||||
for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) {
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass a true stereo frame and verify that it is properly detected.
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
|
||||
// Pass an additional true stereo frame and verify that it is properly
|
||||
// detected.
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
|
||||
// Pass a fake stereo frame and verify that it is properly detected.
|
||||
if (detect_stereo_content) {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
} else {
|
||||
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
|
||||
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
|
||||
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetectedForTesting());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user