From 4d12174ca5470f7b75d1c36f05bd460d11def8db Mon Sep 17 00:00:00 2001 From: Sam Zackrisson Date: Wed, 13 Apr 2022 11:04:49 +0200 Subject: [PATCH] AEC3: Add metrics for stereo detection code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two metrics for stereo detection: - An enum indicating whether the last 10 seconds contained persistent stereo content or not, logged every 10 seconds. - An enum indicating whether any persistent stereo content at all has been detected, logged at the end of the AEC lifetime. These metrics allow us to assess: - What proportion of all audio is treated as stereo. - What proportion of sessions encounter any significant stereo content. If this is unexpectedly high, the stereo detection code may need fine tuning. Metrics are only logged for component lifetimes exceeding 5 seconds: This is to avoid brief AEC lifetimes due to internal resets etc within APM. Corresponding Chrome CL for XML histogram declarations: https://crrev.com/c/3579317 Bug: chromium:1295710 Change-Id: I93e2bf74588cf4bb2a8922dbfad079bccab01456 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258760 Reviewed-by: Per Ã…hgren Commit-Queue: Sam Zackrisson Cr-Commit-Position: refs/heads/main@{#36537} --- modules/audio_processing/aec3/BUILD.gn | 1 + .../aec3/multi_channel_content_detector.cc | 48 +++++++- .../aec3/multi_channel_content_detector.h | 29 +++++ ...multi_channel_content_detector_unittest.cc | 113 ++++++++++++++++++ 4 files changed, 190 insertions(+), 1 deletion(-) diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index 928afef13f..fee776fb39 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -319,6 +319,7 @@ if (rtc_include_tests) { "../../../rtc_base:stringutils", "../../../rtc_base/system:arch", "../../../system_wrappers", + "../../../system_wrappers:metrics", "../../../test:field_trial", "../../../test:test_support", "../utility:cascaded_biquad_filter", diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc index 8d1bd9108c..98068964d9 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector.cc +++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc @@ -1,4 +1,3 @@ - /* * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. * @@ -14,6 +13,7 @@ #include #include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" namespace webrtc { @@ -42,8 +42,48 @@ bool HasStereoContent(const std::vector>>& frame, return false; } +// In order to avoid logging metrics for very short lifetimes that are unlikely +// to reflect real calls and that may dilute the "real" data, logging is limited +// to lifetimes of at leats 5 seconds. +constexpr int kMinNumberOfFramesRequiredToLogMetrics = 500; + +// Continuous metrics are logged every 10 seconds. +constexpr int kFramesPer10Seconds = 1000; + } // namespace +MultiChannelContentDetector::MetricsLogger::MetricsLogger() {} + +MultiChannelContentDetector::MetricsLogger::~MetricsLogger() { + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.PersistentMultichannelContentEverDetected", + any_multichannel_content_detected_ ? 1 : 0); +} + +void MultiChannelContentDetector::MetricsLogger::Update( + bool persistent_multichannel_content_detected) { + ++frame_counter_; + if (persistent_multichannel_content_detected) { + any_multichannel_content_detected_ = true; + ++persistent_multichannel_frame_counter_; + } + + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + if (frame_counter_ % kFramesPer10Seconds != 0) + return; + const bool mostly_multichannel_last_10_seconds = + (persistent_multichannel_frame_counter_ >= kFramesPer10Seconds / 2); + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.ProcessingPersistentMultichannelContent", + mostly_multichannel_last_10_seconds ? 1 : 0); + + persistent_multichannel_frame_counter_ = 0; +} + MultiChannelContentDetector::MultiChannelContentDetector( bool detect_stereo_content, int num_render_input_channels, @@ -59,6 +99,9 @@ MultiChannelContentDetector::MultiChannelContentDetector( : absl::nullopt), stereo_detection_hysteresis_frames_(static_cast( stereo_detection_hysteresis_seconds * kNumFramesPerSecond)), + metrics_logger_((detect_stereo_content && num_render_input_channels > 1) + ? std::make_unique() + : nullptr), persistent_multichannel_content_detected_( !detect_stereo_content && num_render_input_channels > 1) {} @@ -95,6 +138,9 @@ bool MultiChannelContentDetector::UpdateDetection( persistent_multichannel_content_detected_ ? false : stereo_detected_in_frame; + if (metrics_logger_) + metrics_logger_->Update(persistent_multichannel_content_detected_); + return previous_persistent_multichannel_content_detected != persistent_multichannel_content_detected_; } diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h index ad5f4f2886..1c6cc1e7a8 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector.h +++ b/modules/audio_processing/aec3/multi_channel_content_detector.h @@ -23,6 +23,7 @@ namespace webrtc { // multichannel, or only upmixed mono. To allow for differences introduced by // hardware drivers, a threshold `detection_threshold` is used for the // detection. +// Logs metrics continously and upon destruction. class MultiChannelContentDetector { public: // If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is @@ -50,10 +51,38 @@ class MultiChannelContentDetector { } private: + // Tracks and logs metrics for the amount of multichannel content detected. + class MetricsLogger { + public: + MetricsLogger(); + + // The destructor logs call summary statistics. + ~MetricsLogger(); + + // Updates and logs metrics. + void Update(bool persistent_multichannel_content_detected); + + private: + int frame_counter_ = 0; + + // Counts the number of frames of persistent multichannel audio observed + // during the current metrics collection interval. + int persistent_multichannel_frame_counter_ = 0; + + // Indicates whether persistent multichannel content has ever been detected. + bool any_multichannel_content_detected_ = false; + }; + const bool detect_stereo_content_; const float detection_threshold_; const absl::optional detection_timeout_threshold_frames_; const int stereo_detection_hysteresis_frames_; + + // Collects and reports metrics on the amount of multichannel content + // detected. Only created if |num_render_input_channels| > 1 and + // |detect_stereo_content_| is true. + const std::unique_ptr metrics_logger_; + bool persistent_multichannel_content_detected_; bool temporary_multichannel_content_detected_ = false; int64_t frames_since_stereo_detected_last_ = 0; diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc index 86817da017..8d38dd0991 100644 --- a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc +++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc @@ -10,6 +10,7 @@ #include "modules/audio_processing/aec3/multi_channel_content_detector.h" +#include "system_wrappers/include/metrics.h" #include "test/gtest.h" namespace webrtc { @@ -354,4 +355,116 @@ TEST_P(MultiChannelContentDetectorHysteresisBehavior, } } +class MultiChannelContentDetectorMetricsDisabled + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + /*no prefix*/, + MultiChannelContentDetectorMetricsDisabled, + ::testing::Values(std::tuple(false, 2), + std::tuple(true, 1))); + +// Test that no metrics are logged when they are clearly uninteresting and would +// dilute relevant data: when the reference audio is single channel, or when +// dynamic detection is disabled. +TEST_P(MultiChannelContentDetectorMetricsDisabled, ReportsNoMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int channel_count = std::get<1>(GetParam()); + std::vector>> audio_frame = { + std::vector>(channel_count, + std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/detect_stereo_content, + /*num_render_input_channels=*/channel_count, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 20 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 3 seconds, no metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsNoMetricsForShortLifetime) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + constexpr int kTooFewFramesToLogMetrics = 3 * kNumFramesPerSecond; + std::vector>> audio_frame = { + std::vector>(2, std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < kTooFewFramesToLogMetrics; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 25 seconds, metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 10 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(true_stereo_frame); + } + for (int k = 0; k < 15 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(fake_stereo_frame); + } + } + // After 10 seconds of true stereo and the remainder fake stereo, we expect + // one lifetime metric sample (multichannel detected) and two periodic samples + // (one multichannel, one mono). + + // Check lifetime metric. + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected", 1)); + + // Check periodic metric. + EXPECT_METRIC_EQ( + 2, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 0)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 1)); +} + } // namespace webrtc