Refactor RMSLevel and give it new functionality

This change rewrites RMSLevel, making it accept an ArrayView as input, and modify the implementation somewhat. It also makes the class keep track of the peak RMS in addition to the average RMS over the measurement period. New tests are added to cover the new functionality. BUG=webrtc:6622 Review-Url: https://codereview.webrtc.org/2535523002 Cr-Commit-Position: refs/heads/master@{#15294}
2016-11-29 04:26:24 -08:00 · 2016-11-29 04:26:24 -08:00 · 5049942219
commit 5049942219
parent 1308c69466
7 changed files with 196 additions and 74 deletions
--- a/webrtc/modules/audio_processing/level_estimator_impl.cc
+++ b/webrtc/modules/audio_processing/level_estimator_impl.cc
@ -10,6 +10,7 @@

 #include "webrtc/modules/audio_processing/level_estimator_impl.h"

+#include "webrtc/base/array_view.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/modules/audio_processing/rms_level.h"
 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
@ -17,7 +18,7 @@
 namespace webrtc {

 LevelEstimatorImpl::LevelEstimatorImpl(rtc::CriticalSection* crit)
-    : crit_(crit), rms_(new RMSLevel()) {
+    : crit_(crit), rms_(new RmsLevel()) {
  RTC_DCHECK(crit);
 }

@ -36,7 +37,8 @@ void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
  }

  for (size_t i = 0; i < audio->num_channels(); i++) {
-    rms_->Process(audio->channels_const()[i], audio->num_frames());
+    rms_->Analyze(rtc::ArrayView<const int16_t>(audio->channels_const()[i],
+                                                audio->num_frames()));
  }
 }

@ -60,6 +62,6 @@ int LevelEstimatorImpl::RMS() {
    return AudioProcessing::kNotEnabledError;
  }

-  return rms_->RMS();
+  return rms_->Average();
 }
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/level_estimator_impl.h
+++ b/webrtc/modules/audio_processing/level_estimator_impl.h
@ -20,7 +20,7 @@
 namespace webrtc {

 class AudioBuffer;
-class RMSLevel;
+class RmsLevel;

 class LevelEstimatorImpl : public LevelEstimator {
 public:
@ -39,7 +39,7 @@ class LevelEstimatorImpl : public LevelEstimator {
 private:
  rtc::CriticalSection* const crit_ = nullptr;
  bool enabled_ GUARDED_BY(crit_) = false;
-  std::unique_ptr<RMSLevel> rms_ GUARDED_BY(crit_);
+  std::unique_ptr<RmsLevel> rms_ GUARDED_BY(crit_);
  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(LevelEstimatorImpl);
 };
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/rms_level.cc
+++ b/webrtc/modules/audio_processing/rms_level.cc
@ -11,52 +11,98 @@
 #include "webrtc/modules/audio_processing/rms_level.h"

 #include <math.h>
+#include <algorithm>
+#include <numeric>

 #include "webrtc/base/checks.h"

 namespace webrtc {
+namespace {
+static constexpr float kMaxSquaredLevel = 32768 * 32768;
+static constexpr int kMinLevelDb = 127;
+// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10).
+static constexpr float kMinLevel = 1.995262314968883e-13f;

-static const float kMaxSquaredLevel = 32768 * 32768;
-
-RMSLevel::RMSLevel()
-    : sum_square_(0),
-      sample_count_(0) {}
-
-RMSLevel::~RMSLevel() {}
-
-void RMSLevel::Reset() {
-  sum_square_ = 0;
-  sample_count_ = 0;
-}
-
-void RMSLevel::Process(const int16_t* data, size_t length) {
-  for (size_t i = 0; i < length; ++i) {
-    sum_square_ += data[i] * data[i];
+// Calculates the normalized RMS value from a mean square value. The input
+// should be the sum of squared samples divided by the number of samples. The
+// value will be normalized to full range before computing the RMS, wich is
+// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very
+// faint.
+int ComputeRms(float mean_square) {
+  if (mean_square <= kMinLevel * kMaxSquaredLevel) {
+    // Very faint; simply return the minimum value.
+    return kMinLevelDb;
  }
-  sample_count_ += length;
-}
-
-void RMSLevel::ProcessMuted(size_t length) {
-  sample_count_ += length;
-}
-
-int RMSLevel::RMS() {
-  if (sample_count_ == 0 || sum_square_ == 0) {
-    Reset();
-    return kMinLevel;
-  }
-
  // Normalize by the max level.
-  float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel);
+  const float mean_square_norm = mean_square / kMaxSquaredLevel;
+  RTC_DCHECK_GT(mean_square_norm, kMinLevel);
  // 20log_10(x^0.5) = 10log_10(x)
-  rms = 10 * log10(rms);
-  RTC_DCHECK_LE(rms, 0);
-  if (rms < -kMinLevel)
-    rms = -kMinLevel;
+  const float rms = 10.f * log10(mean_square_norm);
+  RTC_DCHECK_LE(rms, 0.f);
+  RTC_DCHECK_GT(rms, -kMinLevelDb);
+  // Return the negated value.
+  return static_cast<int>(-rms + 0.5f);
+}
+}  // namespace

-  rms = -rms;
+RmsLevel::RmsLevel() {
  Reset();
-  return static_cast<int>(rms + 0.5);
 }

+RmsLevel::~RmsLevel() = default;
+
+void RmsLevel::Reset() {
+  sum_square_ = 0.f;
+  sample_count_ = 0;
+  max_sum_square_ = 0.f;
+  block_size_ = rtc::Optional<size_t>();
+}
+
+void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) {
+  if (data.empty()) {
+    return;
+  }
+
+  CheckBlockSize(data.size());
+
+  const float sum_square =
+      std::accumulate(data.begin(), data.end(), 0.f,
+                      [](float a, int16_t b) { return a + b * b; });
+  RTC_DCHECK_GE(sum_square, 0.f);
+  sum_square_ += sum_square;
+  sample_count_ += data.size();
+
+  max_sum_square_ = std::max(max_sum_square_, sum_square);
+}
+
+void RmsLevel::AnalyzeMuted(size_t length) {
+  CheckBlockSize(length);
+  sample_count_ += length;
+}
+
+int RmsLevel::Average() {
+  int rms = (sample_count_ == 0) ? kMinLevelDb
+                                 : ComputeRms(sum_square_ / sample_count_);
+  Reset();
+  return rms;
+}
+
+RmsLevel::Levels RmsLevel::AverageAndPeak() {
+  // Note that block_size_ should by design always be non-empty when
+  // sample_count_ != 0. Also, the * operator of rtc::Optional enforces this
+  // with a DCHECK.
+  Levels levels = (sample_count_ == 0)
+                      ? Levels{kMinLevelDb, kMinLevelDb}
+                      : Levels{ComputeRms(sum_square_ / sample_count_),
+                               ComputeRms(max_sum_square_ / *block_size_)};
+  Reset();
+  return levels;
+}
+
+void RmsLevel::CheckBlockSize(size_t block_size) {
+  if (block_size_ != rtc::Optional<size_t>(block_size)) {
+    Reset();
+    block_size_ = rtc::Optional<size_t>(block_size);
+  }
+}
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/rms_level.h
+++ b/webrtc/modules/audio_processing/rms_level.h
@ -11,8 +11,8 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_

-#include <cstddef>
-
+#include "webrtc/base/array_view.h"
+#include "webrtc/base/optional.h"
 #include "webrtc/typedefs.h"

 namespace webrtc {
@ -23,34 +23,48 @@ namespace webrtc {
 // with the intent that it can provide the RTP audio level indication.
 //
 // The expected approach is to provide constant-sized chunks of audio to
-// Process(). When enough chunks have been accumulated to form a packet, call
-// RMS() to get the audio level indicator for the RTP header.
-class RMSLevel {
+// Analyze(). When enough chunks have been accumulated to form a packet, call
+// Average() to get the audio level indicator for the RTP header.
+class RmsLevel {
 public:
-  static const int kMinLevel = 127;
+  struct Levels {
+    int average;
+    int peak;
+  };

-  RMSLevel();
-  ~RMSLevel();
+  RmsLevel();
+  ~RmsLevel();

  // Can be called to reset internal states, but is not required during normal
  // operation.
  void Reset();

-  // Pass each chunk of audio to Process() to accumulate the level.
-  void Process(const int16_t* data, size_t length);
+  // Pass each chunk of audio to Analyze() to accumulate the level.
+  void Analyze(rtc::ArrayView<const int16_t> data);

  // If all samples with the given |length| have a magnitude of zero, this is
  // a shortcut to avoid some computation.
-  void ProcessMuted(size_t length);
+  void AnalyzeMuted(size_t length);

-  // Computes the RMS level over all data passed to Process() since the last
-  // call to RMS(). The returned value is positive but should be interpreted as
-  // negative as per the RFC. It is constrained to [0, 127].
-  int RMS();
+  // Computes the RMS level over all data passed to Analyze() since the last
+  // call to Average(). The returned value is positive but should be interpreted
+  // as negative as per the RFC. It is constrained to [0, 127]. Resets the
+  // internal state to start a new measurement period.
+  int Average();
+
+  // Like Average() above, but also returns the RMS peak value. Resets the
+  // internal state to start a new measurement period.
+  Levels AverageAndPeak();

 private:
+  // Compares |block_size| with |block_size_|. If they are different, calls
+  // Reset() and stores the new size.
+  void CheckBlockSize(size_t block_size);
+
  float sum_square_;
  size_t sample_count_;
+  float max_sum_square_;
+  rtc::Optional<size_t> block_size_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/rms_level_unittest.cc
+++ b/webrtc/modules/audio_processing/rms_level_unittest.cc
@ -12,6 +12,7 @@
 #include <vector>

 #include "webrtc/base/array_view.h"
+#include "webrtc/base/checks.h"
 #include "webrtc/base/mathutils.h"
 #include "webrtc/base/safe_conversions.h"
 #include "webrtc/modules/audio_processing/rms_level.h"
@ -22,12 +23,11 @@ namespace {
 constexpr int kSampleRateHz = 48000;
 constexpr size_t kBlockSizeSamples = kSampleRateHz / 100;

-std::unique_ptr<RMSLevel> RunTest(rtc::ArrayView<const int16_t> input) {
-  std::unique_ptr<RMSLevel> level(new RMSLevel);
+std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const int16_t> input) {
+  std::unique_ptr<RmsLevel> level(new RmsLevel);
  for (size_t n = 0; n + kBlockSizeSamples <= input.size();
       n += kBlockSizeSamples) {
-    level->Process(input.subview(n, kBlockSizeSamples).data(),
-                   kBlockSizeSamples);
+    level->Analyze(input.subview(n, kBlockSizeSamples));
  }
  return level;
 }
@ -47,46 +47,104 @@ std::vector<int16_t> CreateSinusoid(int frequency_hz,
 TEST(RmsLevelTest, Run1000HzFullScale) {
  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
  auto level = RunTest(x);
-  EXPECT_EQ(3, level->RMS());  // -3 dBFS
+  EXPECT_EQ(3, level->Average());  // -3 dBFS
+}
+
+TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) {
+  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
+  auto level = RunTest(x);
+  auto stats = level->AverageAndPeak();
+  EXPECT_EQ(3, stats.average);  // -3 dBFS
+  EXPECT_EQ(3, stats.peak);
 }

 TEST(RmsLevelTest, Run1000HzHalfScale) {
  auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
  auto level = RunTest(x);
-  EXPECT_EQ(9, level->RMS());  // -9 dBFS
+  EXPECT_EQ(9, level->Average());  // -9 dBFS
 }

 TEST(RmsLevelTest, RunZeros) {
  std::vector<int16_t> x(kSampleRateHz, 0);  // 1 second of pure silence.
  auto level = RunTest(x);
-  EXPECT_EQ(127, level->RMS());
+  EXPECT_EQ(127, level->Average());
+}
+
+TEST(RmsLevelTest, RunZerosAverageAndPeak) {
+  std::vector<int16_t> x(kSampleRateHz, 0);  // 1 second of pure silence.
+  auto level = RunTest(x);
+  auto stats = level->AverageAndPeak();
+  EXPECT_EQ(127, stats.average);
+  EXPECT_EQ(127, stats.peak);
 }

 TEST(RmsLevelTest, NoSamples) {
-  RMSLevel level;
-  EXPECT_EQ(127, level.RMS());  // Return minimum if no samples are given.
+  RmsLevel level;
+  EXPECT_EQ(127, level.Average());  // Return minimum if no samples are given.
+}
+
+TEST(RmsLevelTest, NoSamplesAverageAndPeak) {
+  RmsLevel level;
+  auto stats = level.AverageAndPeak();
+  EXPECT_EQ(127, stats.average);
+  EXPECT_EQ(127, stats.peak);
 }

 TEST(RmsLevelTest, PollTwice) {
  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
  auto level = RunTest(x);
-  level->RMS();
-  EXPECT_EQ(127, level->RMS());  // Stats should be reset at this point.
+  level->Average();
+  EXPECT_EQ(127, level->Average());  // Stats should be reset at this point.
 }

 TEST(RmsLevelTest, Reset) {
  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
  auto level = RunTest(x);
  level->Reset();
-  EXPECT_EQ(127, level->RMS());  // Stats should be reset at this point.
+  EXPECT_EQ(127, level->Average());  // Stats should be reset at this point.
 }

 // Inserts 1 second of full-scale sinusoid, followed by 1 second of muted.
 TEST(RmsLevelTest, ProcessMuted) {
  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
  auto level = RunTest(x);
-  level->ProcessMuted(kSampleRateHz);
-  EXPECT_EQ(6, level->RMS());  // Average RMS halved due to the silence.
+  const size_t kBlocksPerSecond = rtc::CheckedDivExact(
+      static_cast<size_t>(kSampleRateHz), kBlockSizeSamples);
+  for (size_t i = 0; i < kBlocksPerSecond; ++i) {
+    level->AnalyzeMuted(kBlockSizeSamples);
+  }
+  EXPECT_EQ(6, level->Average());  // Average RMS halved due to the silence.
+}
+
+// Inserts 1 second of half-scale sinusoid, follwed by 10 ms of full-scale, and
+// finally 1 second of half-scale again. Expect the average to be -9 dBFS due
+// to the vast majority of the signal being half-scale, and the peak to be
+// -3 dBFS.
+TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) {
+  auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
+  auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100);
+  auto x = half_scale;
+  x.insert(x.end(), full_scale.begin(), full_scale.end());
+  x.insert(x.end(), half_scale.begin(), half_scale.end());
+  ASSERT_EQ(static_cast<size_t>(2 * kSampleRateHz + kSampleRateHz / 100),
+            x.size());
+  auto level = RunTest(x);
+  auto stats = level->AverageAndPeak();
+  EXPECT_EQ(9, stats.average);
+  EXPECT_EQ(3, stats.peak);
+}
+
+TEST(RmsLevelTest, ResetOnBlockSizeChange) {
+  auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
+  auto level = RunTest(x);
+  // Create a new signal with half amplitude, but double block length.
+  auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2);
+  level->Analyze(y);
+  auto stats = level->AverageAndPeak();
+  // Expect all stats to only be influenced by the last signal (y), since the
+  // changed block size should reset the stats.
+  EXPECT_EQ(9, stats.average);
+  EXPECT_EQ(9, stats.peak);
 }

 }  // namespace webrtc
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -13,6 +13,7 @@
 #include <algorithm>
 #include <utility>

+#include "webrtc/base/array_view.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/criticalsection.h"
 #include "webrtc/base/format_macros.h"
@ -364,7 +365,7 @@ int32_t Channel::SendData(FrameType frameType,
    // Store current audio level in the RTP/RTCP module.
    // The level will be used in combination with voice-activity state
    // (frameType) to add an RTP header extension
-    _rtpRtcpModule->SetAudioLevel(rms_level_.RMS());
+    _rtpRtcpModule->SetAudioLevel(rms_level_.Average());
  }

  // Push data from ACM to RTP/RTCP-module to deliver audio frame for
@ -2780,9 +2781,10 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
        _audioFrame.samples_per_channel_ * _audioFrame.num_channels_;
    RTC_CHECK_LE(length, sizeof(_audioFrame.data_));
    if (is_muted && previous_frame_muted_) {
-      rms_level_.ProcessMuted(length);
+      rms_level_.AnalyzeMuted(length);
    } else {
-      rms_level_.Process(_audioFrame.data_, length);
+      rms_level_.Analyze(
+          rtc::ArrayView<const int16_t>(_audioFrame.data_, length));
    }
  }
  previous_frame_muted_ = is_muted;
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@ -508,7 +508,7 @@ class Channel
  VoiceEngineObserver* _voiceEngineObserverPtr;  // owned by base
  rtc::CriticalSection* _callbackCritSectPtr;    // owned by base
  Transport* _transportPtr;  // WebRtc socket or external transport
-  RMSLevel rms_level_;
+  RmsLevel rms_level_;
  int32_t _sendFrameType;  // Send data is voice, 1-voice, 0-otherwise
  // VoEBase
  bool _externalMixing;