Added a new echo likelihood stat that reports the maximum value from a previous time period.

BUG=webrtc:6797 Review-Url: https://codereview.webrtc.org/2629563003 Cr-Commit-Position: refs/heads/master@{#16079}
2017-01-15 08:29:46 -08:00 · 2017-01-15 08:29:46 -08:00 · 4e477a1d7b
commit 4e477a1d7b
parent cc1e1aa424
19 changed files with 231 additions and 23 deletions
--- a/webrtc/api/mediastreaminterface.h
+++ b/webrtc/api/mediastreaminterface.h
@ -203,14 +203,16 @@ class AudioSourceInterface : public MediaSourceInterface {
 class AudioProcessorInterface : public rtc::RefCountInterface {
 public:
  struct AudioProcessorStats {
-    AudioProcessorStats() : typing_noise_detected(false),
-                            echo_return_loss(0),
-                            echo_return_loss_enhancement(0),
-                            echo_delay_median_ms(0),
-                            echo_delay_std_ms(0),
-                            aec_quality_min(0.0),
-                            residual_echo_likelihood(0.0f),
-                            aec_divergent_filter_fraction(0.0) {}
+    AudioProcessorStats()
+        : typing_noise_detected(false),
+          echo_return_loss(0),
+          echo_return_loss_enhancement(0),
+          echo_delay_median_ms(0),
+          echo_delay_std_ms(0),
+          aec_quality_min(0.0),
+          residual_echo_likelihood(0.0f),
+          residual_echo_likelihood_recent_max(0.0f),
+          aec_divergent_filter_fraction(0.0) {}
    ~AudioProcessorStats() {}

    bool typing_noise_detected;
@ -220,6 +222,7 @@ class AudioProcessorInterface : public rtc::RefCountInterface {
    int echo_delay_std_ms;
    float aec_quality_min;
    float residual_echo_likelihood;
+    float residual_echo_likelihood_recent_max;
    float aec_divergent_filter_fraction;
  };

--- a/webrtc/api/statscollector.cc
+++ b/webrtc/api/statscollector.cc
@ -104,7 +104,8 @@ void SetAudioProcessingStats(StatsReport* report,
                             int echo_delay_median_ms,
                             float aec_quality_min,
                             int echo_delay_std_ms,
-                             float residual_echo_likelihood) {
+                             float residual_echo_likelihood,
+                             float residual_echo_likelihood_recent_max) {
  report->AddBoolean(StatsReport::kStatsValueNameTypingNoiseState,
                     typing_noise_detected);
  if (aec_quality_min >= 0.0f) {
@ -127,6 +128,9 @@ void SetAudioProcessingStats(StatsReport* report,
  if (residual_echo_likelihood >= 0.0f) {
    report->AddFloat(StatsReport::kStatsValueNameResidualEchoLikelihood,
                     residual_echo_likelihood);
+    report->AddFloat(
+        StatsReport::kStatsValueNameResidualEchoLikelihoodRecentMax,
+        residual_echo_likelihood_recent_max);
  }
 }

@ -187,7 +191,7 @@ void ExtractStats(const cricket::VoiceSenderInfo& info, StatsReport* report) {
      report, info.typing_noise_detected, info.echo_return_loss,
      info.echo_return_loss_enhancement, info.echo_delay_median_ms,
      info.aec_quality_min, info.echo_delay_std_ms,
-      info.residual_echo_likelihood);
+      info.residual_echo_likelihood, info.residual_echo_likelihood_recent_max);

  RTC_DCHECK_GE(info.audio_level, 0);
  const IntForAdd ints[] = {
@ -940,7 +944,8 @@ void StatsCollector::UpdateReportFromAudioTrack(AudioTrackInterface* track,
        report, stats.typing_noise_detected, stats.echo_return_loss,
        stats.echo_return_loss_enhancement, stats.echo_delay_median_ms,
        stats.aec_quality_min, stats.echo_delay_std_ms,
-        stats.residual_echo_likelihood);
+        stats.residual_echo_likelihood,
+        stats.residual_echo_likelihood_recent_max);

    report->AddFloat(StatsReport::kStatsValueNameAecDivergentFilterFraction,
                     stats.aec_divergent_filter_fraction);
--- a/webrtc/api/statscollector_unittest.cc
+++ b/webrtc/api/statscollector_unittest.cc
@ -414,6 +414,11 @@ void VerifyVoiceSenderInfoReport(const StatsReport* report,
                       &value_in_report));
  EXPECT_EQ(rtc::ToString<float>(sinfo.residual_echo_likelihood),
            value_in_report);
+  EXPECT_TRUE(GetValue(
+      report, StatsReport::kStatsValueNameResidualEchoLikelihoodRecentMax,
+      &value_in_report));
+  EXPECT_EQ(rtc::ToString<float>(sinfo.residual_echo_likelihood_recent_max),
+            value_in_report);
  EXPECT_TRUE(GetValue(report, StatsReport::kStatsValueNameAudioInputLevel,
                       &value_in_report));
  EXPECT_EQ(rtc::ToString<int>(sinfo.audio_level), value_in_report);
--- a/webrtc/api/statstypes.cc
+++ b/webrtc/api/statstypes.cc
@ -576,6 +576,8 @@ const char* StatsReport::Value::display_name() const {
      return "remoteCertificateId";
    case kStatsValueNameResidualEchoLikelihood:
      return "googResidualEchoLikelihood";
+    case kStatsValueNameResidualEchoLikelihoodRecentMax:
+      return "googResidualEchoLikelihoodRecentMax";
    case kStatsValueNameRetransmitBitrate:
      return "googRetransmitBitrate";
    case kStatsValueNameRtt:
--- a/webrtc/api/statstypes.h
+++ b/webrtc/api/statstypes.h
@ -202,6 +202,7 @@ class StatsReport {
    kStatsValueNameRemoteCertificateId,
    kStatsValueNameRenderDelayMs,
    kStatsValueNameResidualEchoLikelihood,
+    kStatsValueNameResidualEchoLikelihoodRecentMax,
    kStatsValueNameRetransmitBitrate,
    kStatsValueNameRtt,
    kStatsValueNameSecondaryDecodedRate,
--- a/webrtc/audio/audio_send_stream.cc
+++ b/webrtc/audio/audio_send_stream.cc
@ -210,6 +210,8 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
      audio_processing_stats.echo_return_loss_enhancement.instant();
  stats.residual_echo_likelihood =
      audio_processing_stats.residual_echo_likelihood;
+  stats.residual_echo_likelihood_recent_max =
+      audio_processing_stats.residual_echo_likelihood_recent_max;

  internal::AudioState* audio_state =
      static_cast<internal::AudioState*>(audio_state_.get());
--- a/webrtc/call/audio_send_stream.h
+++ b/webrtc/call/audio_send_stream.h
@ -52,6 +52,7 @@ class AudioSendStream {
    int32_t echo_return_loss = -100;
    int32_t echo_return_loss_enhancement = -100;
    float residual_echo_likelihood = -1.0f;
+    float residual_echo_likelihood_recent_max = -1.0f;
    bool typing_noise_detected = false;
  };

--- a/webrtc/media/base/mediachannel.h
+++ b/webrtc/media/base/mediachannel.h
@ -632,8 +632,8 @@ struct VoiceSenderInfo : public MediaSenderInfo {
        echo_return_loss(0),
        echo_return_loss_enhancement(0),
        residual_echo_likelihood(0.0f),
-        typing_noise_detected(false) {
-  }
+        residual_echo_likelihood_recent_max(0.0f),
+        typing_noise_detected(false) {}

  int ext_seqnum;
  int jitter_ms;
@ -644,6 +644,7 @@ struct VoiceSenderInfo : public MediaSenderInfo {
  int echo_return_loss;
  int echo_return_loss_enhancement;
  float residual_echo_likelihood;
+  float residual_echo_likelihood_recent_max;
  bool typing_noise_detected;
 };

--- a/webrtc/media/engine/webrtcvoiceengine.cc
+++ b/webrtc/media/engine/webrtcvoiceengine.cc
@ -2615,6 +2615,8 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) {
    sinfo.echo_return_loss = stats.echo_return_loss;
    sinfo.echo_return_loss_enhancement = stats.echo_return_loss_enhancement;
    sinfo.residual_echo_likelihood = stats.residual_echo_likelihood;
+    sinfo.residual_echo_likelihood_recent_max =
+        stats.residual_echo_likelihood_recent_max;
    sinfo.typing_noise_detected = (send_ ? stats.typing_noise_detected : false);
    info->senders.push_back(sinfo);
  }
--- a/webrtc/media/engine/webrtcvoiceengine_unittest.cc
+++ b/webrtc/media/engine/webrtcvoiceengine_unittest.cc
@ -480,6 +480,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test {
    stats.echo_return_loss = 890;
    stats.echo_return_loss_enhancement = 1234;
    stats.residual_echo_likelihood = 0.432f;
+    stats.residual_echo_likelihood_recent_max = 0.6f;
    stats.typing_noise_detected = true;
    return stats;
  }
@ -509,6 +510,8 @@ class WebRtcVoiceEngineTestFake : public testing::Test {
    EXPECT_EQ(info.echo_return_loss_enhancement,
              stats.echo_return_loss_enhancement);
    EXPECT_EQ(info.residual_echo_likelihood, stats.residual_echo_likelihood);
+    EXPECT_EQ(info.residual_echo_likelihood_recent_max,
+              stats.residual_echo_likelihood_recent_max);
    EXPECT_EQ(info.typing_noise_detected,
              stats.typing_noise_detected && is_sending);
  }
--- a/webrtc/modules/BUILD.gn
+++ b/webrtc/modules/BUILD.gn
@ -606,6 +606,7 @@ if (rtc_include_tests) {
        "audio_processing/echo_control_mobile_unittest.cc",
        "audio_processing/echo_detector/circular_buffer_unittest.cc",
        "audio_processing/echo_detector/mean_variance_estimator_unittest.cc",
+        "audio_processing/echo_detector/moving_max_unittest.cc",
        "audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc",
        "audio_processing/gain_control_unittest.cc",
        "audio_processing/level_controller/level_controller_unittest.cc",
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@ -71,6 +71,8 @@ rtc_static_library("audio_processing") {
    "echo_detector/circular_buffer.h",
    "echo_detector/mean_variance_estimator.cc",
    "echo_detector/mean_variance_estimator.h",
+    "echo_detector/moving_max.cc",
+    "echo_detector/moving_max.h",
    "echo_detector/normalized_covariance_estimator.cc",
    "echo_detector/normalized_covariance_estimator.h",
    "gain_control_for_experimental_agc.cc",
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -1585,6 +1585,19 @@ int AudioProcessingImpl::StopDebugRecording() {
 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
 }

+AudioProcessing::AudioProcessingStatistics::AudioProcessingStatistics() {
+  residual_echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f);
+  echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f);
+  echo_return_loss_enhancement.Set(-100.0f, -100.0f, -100.0f, -100.0f);
+  a_nlp.Set(-100.0f, -100.0f, -100.0f, -100.0f);
+}
+
+AudioProcessing::AudioProcessingStatistics::AudioProcessingStatistics(
+    const AudioProcessingStatistics& other) = default;
+
+AudioProcessing::AudioProcessingStatistics::~AudioProcessingStatistics() =
+    default;
+
 // TODO(ivoc): Remove this when GetStatistics() becomes pure virtual.
 AudioProcessing::AudioProcessingStatistics AudioProcessing::GetStatistics()
    const {
@ -1606,6 +1619,8 @@ AudioProcessing::AudioProcessingStatistics AudioProcessingImpl::GetStatistics()
  }
  stats.residual_echo_likelihood =
      private_submodules_->residual_echo_detector->echo_likelihood();
+  stats.residual_echo_likelihood_recent_max =
+      private_submodules_->residual_echo_detector->echo_likelihood_recent_max();
  public_submodules_->echo_cancellation->GetDelayMetrics(
      &stats.delay_median, &stats.delay_standard_deviation,
      &stats.fraction_poor_delays);
--- a/webrtc/modules/audio_processing/echo_detector/moving_max.cc
+++ b/webrtc/modules/audio_processing/echo_detector/moving_max.cc
@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/echo_detector/moving_max.h"
+
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+namespace {
+
+// Parameter for controlling how fast the estimated maximum decays after the
+// previous maximum is no longer valid. With a value of 0.99, the maximum will
+// decay to 1% of its former value after 460 updates.
+constexpr float kDecayFactor = 0.99f;
+
+}  // namespace
+
+MovingMax::MovingMax(size_t window_size) : window_size_(window_size) {
+  RTC_DCHECK_GT(window_size, 0);
+}
+
+MovingMax::~MovingMax() {}
+
+void MovingMax::Update(float value) {
+  if (counter_ >= window_size_ - 1) {
+    max_value_ *= kDecayFactor;
+  } else {
+    ++counter_;
+  }
+  if (value > max_value_) {
+    max_value_ = value;
+    counter_ = 0;
+  }
+}
+
+float MovingMax::max() const {
+  return max_value_;
+}
+
+void MovingMax::Clear() {
+  max_value_ = 0.f;
+  counter_ = 0;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/echo_detector/moving_max.h
+++ b/webrtc/modules/audio_processing/echo_detector/moving_max.h
@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_
+
+#include <stddef.h>
+
+namespace webrtc {
+
+class MovingMax {
+ public:
+  explicit MovingMax(size_t window_size);
+  ~MovingMax();
+
+  void Update(float value);
+  float max() const;
+  // Reset all of the state in this class.
+  void Clear();
+
+ private:
+  float max_value_ = 0.f;
+  size_t counter_ = 0;
+  size_t window_size_ = 1;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_
--- a/webrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc
+++ b/webrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc
@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/echo_detector/moving_max.h"
+#include "webrtc/test/gtest.h"
+
+namespace webrtc {
+
+// Test if the maximum is correctly found.
+TEST(MovingMaxTests, SimpleTest) {
+  MovingMax test_moving_max(5);
+  test_moving_max.Update(1.0f);
+  test_moving_max.Update(1.1f);
+  test_moving_max.Update(1.9f);
+  test_moving_max.Update(1.87f);
+  test_moving_max.Update(1.89f);
+  EXPECT_EQ(1.9f, test_moving_max.max());
+}
+
+// Test if values fall out of the window when expected.
+TEST(MovingMaxTests, SlidingWindowTest) {
+  MovingMax test_moving_max(5);
+  test_moving_max.Update(1.0f);
+  test_moving_max.Update(1.9f);
+  test_moving_max.Update(1.7f);
+  test_moving_max.Update(1.87f);
+  test_moving_max.Update(1.89f);
+  test_moving_max.Update(1.3f);
+  test_moving_max.Update(1.2f);
+  EXPECT_LT(test_moving_max.max(), 1.9f);
+}
+
+// Test if Clear() works as expected.
+TEST(MovingMaxTests, ClearTest) {
+  MovingMax test_moving_max(5);
+  test_moving_max.Update(1.0f);
+  test_moving_max.Update(1.1f);
+  test_moving_max.Update(1.9f);
+  test_moving_max.Update(1.87f);
+  test_moving_max.Update(1.89f);
+  EXPECT_EQ(1.9f, test_moving_max.max());
+  test_moving_max.Clear();
+  EXPECT_EQ(0.f, test_moving_max.max());
+}
+
+// Test the decay of the estimated maximum.
+TEST(MovingMaxTests, DecayTest) {
+  MovingMax test_moving_max(1);
+  test_moving_max.Update(1.0f);
+  float previous_value = 1.0f;
+  for (int i = 0; i < 500; i++) {
+    test_moving_max.Update(0.0f);
+    EXPECT_LT(test_moving_max.max(), previous_value);
+    EXPECT_GT(test_moving_max.max(), 0.0f);
+    previous_value = test_moving_max.max();
+  }
+  EXPECT_LT(test_moving_max.max(), 0.01f);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@ -510,12 +510,9 @@ class AudioProcessing {
  };

  struct AudioProcessingStatistics {
-    AudioProcessingStatistics() {
-      residual_echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f);
-      echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f);
-      echo_return_loss_enhancement.Set(-100.0f, -100.0f, -100.0f, -100.0f);
-      a_nlp.Set(-100.0f, -100.0f, -100.0f, -100.0f);
-    }
+    AudioProcessingStatistics();
+    AudioProcessingStatistics(const AudioProcessingStatistics& other);
+    ~AudioProcessingStatistics();

    // AEC Statistics.
    // RERL = ERL + ERLE
@ -541,10 +538,10 @@ class AudioProcessing {
    int delay_standard_deviation = -1;
    float fraction_poor_delays = -1.0f;

-    // Residual echo detector likelihood. This value is not yet calculated and
-    // is currently always set to zero.
-    // TODO(ivoc): Implement this stat.
+    // Residual echo detector likelihood.
    float residual_echo_likelihood = -1.0f;
+    // Maximum residual echo likelihood from the last time period.
+    float residual_echo_likelihood_recent_max = -1.0f;
  };

  // TODO(ivoc): Make this pure virtual when all subclasses have been updated.
--- a/webrtc/modules/audio_processing/residual_echo_detector.cc
+++ b/webrtc/modules/audio_processing/residual_echo_detector.cc
@ -26,6 +26,8 @@ constexpr size_t kLookbackFrames = 650;
 // TODO(ivoc): Verify the size of this buffer.
 constexpr size_t kRenderBufferSize = 30;
 constexpr float kAlpha = 0.001f;
+// 10 seconds of data, updated every 10 ms.
+constexpr size_t kAggregationBufferSize = 10 * 100;

 }  // namespace

@ -36,7 +38,8 @@ ResidualEchoDetector::ResidualEchoDetector()
      render_power_(kLookbackFrames),
      render_power_mean_(kLookbackFrames),
      render_power_std_dev_(kLookbackFrames),
-      covariances_(kLookbackFrames){};
+      covariances_(kLookbackFrames),
+      recent_likelihood_max_(kAggregationBufferSize) {}

 ResidualEchoDetector::~ResidualEchoDetector() = default;

@ -107,6 +110,9 @@ void ResidualEchoDetector::AnalyzeCaptureAudio(
  RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood",
                       echo_percentage, 0, 100, 100 /* number of bins */);

+  // Update the buffer of recent likelihood values.
+  recent_likelihood_max_.Update(echo_likelihood_);
+
  // Update the next insertion index.
  ++next_insertion_index_;
  next_insertion_index_ %= kLookbackFrames;
@ -119,6 +125,7 @@ void ResidualEchoDetector::Initialize() {
  std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f);
  render_statistics_.Clear();
  capture_statistics_.Clear();
+  recent_likelihood_max_.Clear();
  for (auto& cov : covariances_) {
    cov.Clear();
  }
--- a/webrtc/modules/audio_processing/residual_echo_detector.h
+++ b/webrtc/modules/audio_processing/residual_echo_detector.h
@ -16,6 +16,7 @@
 #include "webrtc/base/array_view.h"
 #include "webrtc/modules/audio_processing/echo_detector/circular_buffer.h"
 #include "webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h"
+#include "webrtc/modules/audio_processing/echo_detector/moving_max.h"
 #include "webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h"

 namespace webrtc {
@ -46,6 +47,10 @@ class ResidualEchoDetector {
  // This function should be called while holding the capture lock.
  float echo_likelihood() const { return echo_likelihood_; }

+  float echo_likelihood_recent_max() const {
+    return recent_likelihood_max_.max();
+  }
+
 private:
  // Keep track if the |Process| function has been previously called.
  bool first_process_call_ = true;
@ -76,6 +81,7 @@ class ResidualEchoDetector {
  float echo_likelihood_ = 0.f;
  // Reliability of the current likelihood.
  float reliability_ = 0.f;
+  MovingMax recent_likelihood_max_;
 };

 }  // namespace webrtc