From 95611837087e57f59ece87f02b9479e58c6904e8 Mon Sep 17 00:00:00 2001
From: aleloi <aleloi@webrtc.org>
Date: Tue, 8 Nov 2016 06:39:50 -0800
Subject: [PATCH] Changed mixing to be done at the minimal possible frequency.

This change changes mixing to be done at the lowest possible
APM-native rate that does not lead to quality loss. An Audio
Processing-native rate is one of 8, 16, 32, or 48 kHz. Mixing at a
lower sampling rate and avoiding resampling can in many cases lead to
big efficiency improvements, as reported by experiments.

This CL also fixes a design issue with the AudioMixer: audio at
non-native rates is no longer fed to the APM instance which is the
limiter.

NOTRY=True
BUG=webrtc:6346

Review-Url: https://codereview.webrtc.org/2458703002
Cr-Commit-Position: refs/heads/master@{#14980}
---
 webrtc/api/audio/audio_mixer.h                |  12 +-
 .../modules/audio_mixer/audio_mixer_impl.cc   |  34 +++++-
 webrtc/modules/audio_mixer/audio_mixer_impl.h |   3 +-
 .../audio_mixer/audio_mixer_impl_unittest.cc  | 112 ++++++++++++++----
 4 files changed, 132 insertions(+), 29 deletions(-)
diff --git a/webrtc/api/audio/audio_mixer.h b/webrtc/api/audio/audio_mixer.h
index f786d6c99b..dbb58ca74a 100644
--- a/webrtc/api/audio/audio_mixer.h
+++ b/webrtc/api/audio/audio_mixer.h
@@ -60,11 +60,13 @@ class AudioMixer : public rtc::RefCountInterface {
 
   // Performs mixing by asking registered audio sources for audio. The
   // mixed result is placed in the provided AudioFrame. This method
-  // will only be called from a single thread. The rate and channels
-  // arguments specify the rate and number of channels of the mix
-  // result. All fields in |audio_frame_for_mixing| must be updated.
-  virtual void Mix(int sample_rate_hz,
-                   size_t number_of_channels,
+  // will only be called from a single thread. The channels argument
+  // specifies the number of channels of the mix result. The mixer
+  // should mix at a rate that doesn't cause quality loss of the
+  // sources' audio. The mixing rate is one of the rates listed in
+  // AudioProcessing::NativeRate. All fields in
+  // |audio_frame_for_mixing| must be updated.
+  virtual void Mix(size_t number_of_channels,
                    AudioFrame* audio_frame_for_mixing) = 0;
 
  protected:
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
index a7af8c33d7..d7025f9e67 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
@@ -137,6 +137,32 @@ AudioMixerImpl::SourceStatusList::iterator FindSourceInList(
       });
 }
 
+// Rounds the maximal audio source frequency up to an APM-native
+// frequency.
+int CalculateMixingFrequency(
+    const AudioMixerImpl::SourceStatusList& audio_source_list) {
+  if (audio_source_list.empty()) {
+    return AudioMixerImpl::kDefaultFrequency;
+  }
+  using NativeRate = AudioProcessing::NativeRate;
+  int maximal_frequency = 0;
+  for (const auto& source_status : audio_source_list) {
+    const int source_needed_frequency =
+        source_status->audio_source->PreferredSampleRate();
+    RTC_DCHECK_LE(NativeRate::kSampleRate8kHz, source_needed_frequency);
+    RTC_DCHECK_LE(source_needed_frequency, NativeRate::kSampleRate48kHz);
+    maximal_frequency = std::max(maximal_frequency, source_needed_frequency);
+  }
+
+  static constexpr NativeRate native_rates[] = {
+      NativeRate::kSampleRate8kHz, NativeRate::kSampleRate16kHz,
+      NativeRate::kSampleRate32kHz, NativeRate::kSampleRate48kHz};
+  const auto rounded_up_index = std::lower_bound(
+      std::begin(native_rates), std::end(native_rates), maximal_frequency);
+  RTC_DCHECK(rounded_up_index != std::end(native_rates));
+  return *rounded_up_index;
+}
+
 }  // namespace
 
 AudioMixerImpl::AudioMixerImpl(std::unique_ptr<AudioProcessing> limiter)
@@ -186,12 +212,16 @@ rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create() {
       new rtc::RefCountedObject<AudioMixerImpl>(std::move(limiter)));
 }
 
-void AudioMixerImpl::Mix(int sample_rate,
-                         size_t number_of_channels,
+void AudioMixerImpl::Mix(size_t number_of_channels,
                          AudioFrame* audio_frame_for_mixing) {
   RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2);
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
 
+  const int sample_rate = [&]() {
+    rtc::CritScope lock(&crit_);
+    return CalculateMixingFrequency(audio_source_list_);
+  }();
+
   if (OutputFrequency() != sample_rate) {
     SetOutputFrequency(sample_rate);
   }
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h
index e2f55fce40..ec2989e554 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.h
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h
@@ -55,8 +55,7 @@ class AudioMixerImpl : public AudioMixer {
   bool AddSource(Source* audio_source) override;
   bool RemoveSource(Source* audio_source) override;
 
-  void Mix(int sample_rate_hz,
-           size_t number_of_channels,
+  void Mix(size_t number_of_channels,
            AudioFrame* audio_frame_for_mixing) override LOCKS_EXCLUDED(crit_);
 
   // Returns true if the source was mixed last round. Returns
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
index e743192d02..517d9e5219 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -56,6 +56,8 @@ class MockMixerAudioSource : public AudioMixer::Source {
     ON_CALL(*this, GetAudioFrameWithInfo(_, _))
         .WillByDefault(
             Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
+    ON_CALL(*this, PreferredSampleRate())
+        .WillByDefault(Return(kDefaultSampleRateHz));
   }
 
   MOCK_METHOD2(GetAudioFrameWithInfo,
@@ -71,13 +73,16 @@ class MockMixerAudioSource : public AudioMixer::Source {
   }
 
  private:
-  AudioFrame fake_frame_;
-  AudioFrameInfo fake_audio_frame_info_;
   AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
                                         AudioFrame* audio_frame) {
     audio_frame->CopyFrom(fake_frame_);
+    audio_frame->sample_rate_hz_ = sample_rate_hz;
+    audio_frame->samples_per_channel_ = sample_rate_hz / 100;
     return fake_info();
   }
+
+  AudioFrame fake_frame_;
+  AudioFrameInfo fake_audio_frame_info_;
 };
 
 // Creates participants from |frames| and |frame_info| and adds them
@@ -104,7 +109,7 @@ void MixAndCompare(
         .Times(Exactly(1));
   }
 
-  mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing);
+  mixer->Mix(1, &frame_for_mixing);
 
   for (int i = 0; i < num_audio_sources; i++) {
     EXPECT_EQ(expected_status[i],
@@ -113,6 +118,18 @@ void MixAndCompare(
   }
 }
 
+void MixMonoAtGivenNativeRate(int native_sample_rate,
+                              AudioFrame* mix_frame,
+                              rtc::scoped_refptr<AudioMixer> mixer,
+                              MockMixerAudioSource* audio_source) {
+  ON_CALL(*audio_source, PreferredSampleRate())
+      .WillByDefault(Return(native_sample_rate));
+  audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate;
+  audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100;
+
+  mixer->Mix(1, mix_frame);
+}
+
 TEST(AudioMixer, LargestEnergyVadActiveMixed) {
   constexpr int kAudioSources =
       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
@@ -138,8 +155,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {
       AudioFrame::kVadPassive;
 
   AudioFrame audio_frame;
-  mixer->Mix(kDefaultSampleRateHz,
-             1,  // number of channels
+  mixer->Mix(1,  // number of channels
              &audio_frame);
 
   for (int i = 0; i < kAudioSources; ++i) {
@@ -176,8 +192,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   AudioFrame audio_frame;
   // Two mix iteration to compare after the ramp-up step.
   for (int i = 0; i < 2; i++) {
-    mixer->Mix(kDefaultSampleRateHz,
-               1,  // number of channels
+    mixer->Mix(1,  // number of channels
                &audio_frame);
   }
 
@@ -185,21 +200,78 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
       0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
 }
 
-TEST(AudioMixer, ParticipantSampleRate) {
+TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
+  const auto mixer = AudioMixerImpl::Create();
+
+  MockMixerAudioSource audio_source;
+  ResetFrame(audio_source.fake_frame());
+
+  mixer->AddSource(&audio_source);
+
+  for (auto frequency : {8000, 16000, 32000, 48000}) {
+    EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _))
+        .Times(Exactly(1));
+
+    MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
+                             &audio_source);
+  }
+}
+
+TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) {
+  const auto mixer = AudioMixerImpl::Create();
+
+  MockMixerAudioSource audio_source;
+  ResetFrame(audio_source.fake_frame());
+
+  mixer->AddSource(&audio_source);
+
+  for (auto frequency : {8000, 16000, 32000, 48000}) {
+    MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
+                             &audio_source);
+
+    EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
+  }
+}
+
+TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) {
   const auto mixer = AudioMixerImpl::Create();
 
   MockMixerAudioSource participant;
   ResetFrame(participant.fake_frame());
+  mixer->AddSource(&participant);
 
-  EXPECT_TRUE(mixer->AddSource(&participant));
-  for (auto frequency : {8000, 16000, 32000, 48000}) {
-    EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency, _))
-        .Times(Exactly(1));
-    participant.fake_frame()->sample_rate_hz_ = frequency;
-    participant.fake_frame()->samples_per_channel_ = frequency / 100;
-    mixer->Mix(frequency, 1, &frame_for_mixing);
-    EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
+  const int needed_frequency = 44100;
+  ON_CALL(participant, PreferredSampleRate())
+      .WillByDefault(Return(needed_frequency));
+
+  // We expect mixing frequency to be native and >= needed_frequency.
+  const int expected_mix_frequency = 48000;
+  EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _))
+      .Times(Exactly(1));
+  participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency;
+  participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100;
+
+  mixer->Mix(1, &frame_for_mixing);
+
+  EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_);
+}
+
+// Check that the mixing rate is always >= participants preferred rate.
+TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) {
+  const auto mixer = AudioMixerImpl::Create();
+
+  std::vector<MockMixerAudioSource> audio_sources(2);
+  const std::vector<int> source_sample_rates = {8000, 16000};
+  for (int i = 0; i < 2; ++i) {
+    auto& source = audio_sources[i];
+    ResetFrame(source.fake_frame());
+    mixer->AddSource(&source);
+    const auto sample_rate = source_sample_rates[i];
+    EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate));
+
+    EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _));
   }
+  mixer->Mix(1, &frame_for_mixing);
 }
 
 TEST(AudioMixer, ParticipantNumberOfChannels) {
@@ -212,7 +284,7 @@ TEST(AudioMixer, ParticipantNumberOfChannels) {
   for (size_t number_of_channels : {1, 2}) {
     EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
-    mixer->Mix(kDefaultSampleRateHz, number_of_channels, &frame_for_mixing);
+    mixer->Mix(number_of_channels, &frame_for_mixing);
     EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
   }
 }
@@ -241,7 +313,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
   }
 
   // First mixer iteration
-  mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing);
+  mixer->Mix(1, &frame_for_mixing);
 
   // All participants but the loudest should have been mixed.
   for (int i = 0; i < kAudioSources - 1; i++) {
@@ -256,7 +328,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
         .Times(Exactly(1));
   }
 
-  mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing);
+  mixer->Mix(1, &frame_for_mixing);
 
   // The most quiet participant should not have been mixed.
   EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0]))
@@ -291,7 +363,7 @@ TEST(AudioMixer, ConstructFromOtherThread) {
       .Times(Exactly(1));
 
   // Do one mixer iteration
-  mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing);
+  mixer->Mix(1, &frame_for_mixing);
 }
 
 TEST(AudioMixer, MutedShouldMixAfterUnmuted) {