From 652ac89c09adba4be85a27b0be8c2fc2395d16f1 Mon Sep 17 00:00:00 2001
From: aleloi <aleloi@webrtc.org>
Date: Wed, 7 Sep 2016 07:42:14 -0700
Subject: [PATCH] Simplifications of the mixing algorithm.

Methods are named more consistently and have a more consistent
signatures. The call structure of mixing is slightly
simplified. Anonymous participants are also ramped up.

NOTRY=True

Review-Url: https://codereview.webrtc.org/2298163002
Cr-Commit-Position: refs/heads/master@{#14110}
---
 .../modules/audio_mixer/audio_mixer_impl.cc   | 147 +++++++++---------
 webrtc/modules/audio_mixer/audio_mixer_impl.h |  32 ++--
 .../audio_mixer/test/audio_mixer_unittest.cc  |   2 +
 3 files changed, 83 insertions(+), 98 deletions(-)
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
index 93b78bd4c3..ee78eef2a1 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
@@ -37,6 +37,17 @@ class SourceFrame {
     }
   }
 
+  SourceFrame(MixerAudioSource* p,
+              AudioFrame* a,
+              bool m,
+              bool was_mixed_before,
+              uint32_t energy)
+      : audio_source_(p),
+        audio_frame_(a),
+        muted_(m),
+        energy_(energy),
+        was_mixed_before_(was_mixed_before) {}
+
   // a.shouldMixBefore(b) is used to select mixer participants.
   bool shouldMixBefore(const SourceFrame& other) const {
     if (muted_ != other.muted_) {
@@ -70,20 +81,19 @@ void RemixFrame(AudioFrame* frame, size_t number_of_channels) {
   }
 }
 
-// Mix |frame| into |mixed_frame|, with saturation protection and upmixing.
-// These effects are applied to |frame| itself prior to mixing. Assumes that
-// |mixed_frame| always has at least as many channels as |frame|. Supports
-// stereo at most.
-//
-void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame, bool use_limiter) {
-  RTC_DCHECK_GE(mixed_frame->num_channels_, frame->num_channels_);
-  if (use_limiter) {
-    // Divide by two to avoid saturation in the mixing.
-    // This is only meaningful if the limiter will be used.
-    *frame >>= 1;
+void Ramp(const std::vector<SourceFrame>& mixed_sources_and_frames) {
+  for (const auto& source_frame : mixed_sources_and_frames) {
+    // Ramp in previously unmixed.
+    if (!source_frame.was_mixed_before_) {
+      NewMixerRampIn(source_frame.audio_frame_);
+    }
+
+    const bool is_mixed = source_frame.audio_source_->_mixHistory->IsMixed();
+    // Ramp out currently unmixed.
+    if (source_frame.was_mixed_before_ && !is_mixed) {
+      NewMixerRampOut(source_frame.audio_frame_);
+    }
   }
-  RTC_DCHECK_EQ(frame->num_channels_, mixed_frame->num_channels_);
-  *mixed_frame += *frame;
 }
 
 }  // namespace
@@ -188,21 +198,21 @@ void AudioMixerImpl::Mix(int sample_rate,
     SetOutputFrequency(static_cast<Frequency>(sample_rate));
   }
 
-  AudioFrameList mixList;
-  AudioFrameList additionalFramesList;
+  AudioFrameList mix_list;
+  AudioFrameList anonymous_mix_list;
   int num_mixed_audio_sources;
   {
     rtc::CritScope lock(&crit_);
-    mixList = UpdateToMix(kMaximumAmountOfMixedAudioSources);
-    GetAdditionalAudio(&additionalFramesList);
+    mix_list = GetNonAnonymousAudio();
+    anonymous_mix_list = GetAnonymousAudio();
     num_mixed_audio_sources = static_cast<int>(num_mixed_audio_sources_);
   }
 
-  for (FrameAndMuteInfo& frame_and_mute : mixList) {
-    RemixFrame(frame_and_mute.frame, number_of_channels);
-  }
-  for (FrameAndMuteInfo& frame_and_mute : additionalFramesList) {
-    RemixFrame(frame_and_mute.frame, number_of_channels);
+  mix_list.insert(mix_list.begin(), anonymous_mix_list.begin(),
+                  anonymous_mix_list.end());
+
+  for (const auto& frame : mix_list) {
+    RemixFrame(frame, number_of_channels);
   }
 
   audio_frame_for_mixing->UpdateFrame(
@@ -213,10 +223,9 @@ void AudioMixerImpl::Mix(int sample_rate,
 
   use_limiter_ = num_mixed_audio_sources > 1;
 
-  // We only use the limiter if it supports the output sample rate and
-  // we're actually mixing multiple streams.
-  MixFromList(audio_frame_for_mixing, mixList, id_, use_limiter_);
-  MixAnonomouslyFromList(audio_frame_for_mixing, additionalFramesList);
+  // We only use the limiter if we're actually mixing multiple streams.
+  MixFromList(audio_frame_for_mixing, mix_list, id_, use_limiter_);
+
   if (audio_frame_for_mixing->samples_per_channel_ == 0) {
     // Nothing was mixed, set the audio samples to silence.
     audio_frame_for_mixing->samples_per_channel_ = sample_size_;
@@ -332,10 +341,13 @@ bool AudioMixerImpl::AnonymousMixabilityStatus(
   return IsAudioSourceInList(audio_source, additional_audio_source_list_);
 }
 
-AudioFrameList AudioMixerImpl::UpdateToMix(size_t maxAudioFrameCounter) const {
+AudioFrameList AudioMixerImpl::GetNonAnonymousAudio() const {
   RTC_DCHECK_RUN_ON(&thread_checker_);
+  WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, id_,
+               "GetNonAnonymousAudio()");
   AudioFrameList result;
   std::vector<SourceFrame> audioSourceMixingDataList;
+  std::vector<SourceFrame> ramp_list;
 
   // Get audio source audio and put it in the struct vector.
   for (MixerAudioSource* audio_source : audio_source_list_) {
@@ -360,6 +372,7 @@ AudioFrameList AudioMixerImpl::UpdateToMix(size_t maxAudioFrameCounter) const {
   std::sort(audioSourceMixingDataList.begin(), audioSourceMixingDataList.end(),
             std::mem_fn(&SourceFrame::shouldMixBefore));
 
+  int maxAudioFrameCounter = kMaximumAmountOfMixedAudioSources;
   // Go through list in order and put things in mixList.
   for (SourceFrame& p : audioSourceMixingDataList) {
     // Filter muted.
@@ -372,34 +385,28 @@ AudioFrameList AudioMixerImpl::UpdateToMix(size_t maxAudioFrameCounter) const {
     bool is_mixed = false;
     if (maxAudioFrameCounter > 0) {
       --maxAudioFrameCounter;
-      if (!p.was_mixed_before_) {
-        NewMixerRampIn(p.audio_frame_);
-      }
-      result.emplace_back(p.audio_frame_, false);
+      result.push_back(p.audio_frame_);
+      ramp_list.emplace_back(p.audio_source_, p.audio_frame_, false,
+                             p.was_mixed_before_, -1);
       is_mixed = true;
     }
-
-    // Ramp out unmuted.
-    if (p.was_mixed_before_ && !is_mixed) {
-      NewMixerRampOut(p.audio_frame_);
-      result.emplace_back(p.audio_frame_, false);
-    }
-
     p.audio_source_->_mixHistory->SetIsMixed(is_mixed);
   }
+  Ramp(ramp_list);
   return result;
 }
 
-void AudioMixerImpl::GetAdditionalAudio(
-    AudioFrameList* additionalFramesList) const {
+AudioFrameList AudioMixerImpl::GetAnonymousAudio() const {
   RTC_DCHECK_RUN_ON(&thread_checker_);
   WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, id_,
-               "GetAdditionalAudio(additionalFramesList)");
+               "GetAnonymousAudio()");
   // The GetAudioFrameWithMuted() callback may result in the audio source being
   // removed from additionalAudioFramesList_. If that happens it will
   // invalidate any iterators. Create a copy of the audio sources list such
   // that the list of participants can be traversed safely.
+  std::vector<SourceFrame> ramp_list;
   MixerAudioSourceList additionalAudioSourceList;
+  AudioFrameList result;
   additionalAudioSourceList.insert(additionalAudioSourceList.begin(),
                                    additional_audio_source_list_.begin(),
                                    additional_audio_source_list_.end());
@@ -416,13 +423,15 @@ void AudioMixerImpl::GetAdditionalAudio(
                    "failed to GetAudioFrameWithMuted() from audio_source");
       continue;
     }
-    if (audio_frame->samples_per_channel_ == 0) {
-      // Empty frame. Don't use it.
-      continue;
+    if (ret != MixerAudioSource::AudioFrameInfo::kMuted) {
+      result.push_back(audio_frame);
+      ramp_list.emplace_back(*audio_source, audio_frame, false,
+                             (*audio_source)->_mixHistory->IsMixed(), -1);
+      (*audio_source)->_mixHistory->SetIsMixed(true);
     }
-    additionalFramesList->push_back(FrameAndMuteInfo(
-        audio_frame, ret == MixerAudioSource::AudioFrameInfo::kMuted));
   }
+  Ramp(ramp_list);
+  return result;
 }
 
 bool AudioMixerImpl::IsAudioSourceInList(
@@ -474,9 +483,8 @@ int32_t AudioMixerImpl::MixFromList(AudioFrame* mixedAudio,
   uint32_t position = 0;
 
   if (audioFrameList.size() == 1) {
-    mixedAudio->timestamp_ = audioFrameList.front().frame->timestamp_;
-    mixedAudio->elapsed_time_ms_ =
-        audioFrameList.front().frame->elapsed_time_ms_;
+    mixedAudio->timestamp_ = audioFrameList.front()->timestamp_;
+    mixedAudio->elapsed_time_ms_ = audioFrameList.front()->elapsed_time_ms_;
   } else {
     // TODO(wu): Issue 3390.
     // Audio frame timestamp is only supported in one channel case.
@@ -484,35 +492,24 @@ int32_t AudioMixerImpl::MixFromList(AudioFrame* mixedAudio,
     mixedAudio->elapsed_time_ms_ = -1;
   }
 
-  for (AudioFrameList::const_iterator iter = audioFrameList.begin();
-       iter != audioFrameList.end(); ++iter) {
-    if (!iter->muted) {
-      MixFrames(mixedAudio, iter->frame, use_limiter);
-    }
+  for (const auto& frame : audioFrameList) {
+    RTC_DCHECK_EQ(mixedAudio->sample_rate_hz_, frame->sample_rate_hz_);
+    RTC_DCHECK_EQ(
+        frame->samples_per_channel_,
+        static_cast<size_t>((mixedAudio->sample_rate_hz_ * kFrameDurationInMs) /
+                            1000));
 
+    // Mix |f.frame| into |mixedAudio|, with saturation protection.
+    // These effect is applied to |f.frame| itself prior to mixing.
+    if (use_limiter) {
+      // Divide by two to avoid saturation in the mixing.
+      // This is only meaningful if the limiter will be used.
+      *frame >>= 1;
+    }
+    RTC_DCHECK_EQ(frame->num_channels_, mixedAudio->num_channels_);
+    *mixedAudio += *frame;
     position++;
   }
-
-  return 0;
-}
-
-// TODO(andrew): consolidate this function with MixFromList.
-int32_t AudioMixerImpl::MixAnonomouslyFromList(
-    AudioFrame* mixedAudio,
-    const AudioFrameList& audioFrameList) const {
-  RTC_DCHECK_RUN_ON(&thread_checker_);
-  WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, id_,
-               "MixAnonomouslyFromList(mixedAudio, audioFrameList)");
-
-  if (audioFrameList.empty())
-    return 0;
-
-  for (AudioFrameList::const_iterator iter = audioFrameList.begin();
-       iter != audioFrameList.end(); ++iter) {
-    if (!iter->muted) {
-      MixFrames(mixedAudio, iter->frame, use_limiter_);
-    }
-  }
   return 0;
 }
 
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h
index 66cf982242..864ecc2064 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.h
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h
@@ -11,7 +11,6 @@
 #ifndef WEBRTC_MODULES_AUDIO_MIXER_AUDIO_MIXER_IMPL_H_
 #define WEBRTC_MODULES_AUDIO_MIXER_AUDIO_MIXER_IMPL_H_
 
-#include <list>
 #include <map>
 #include <memory>
 #include <vector>
@@ -26,14 +25,8 @@ namespace webrtc {
 class AudioProcessing;
 class CriticalSectionWrapper;
 
-struct FrameAndMuteInfo {
-  FrameAndMuteInfo(AudioFrame* f, bool m) : frame(f), muted(m) {}
-  AudioFrame* frame;
-  bool muted;
-};
-
-typedef std::list<FrameAndMuteInfo> AudioFrameList;
-typedef std::list<MixerAudioSource*> MixerAudioSourceList;
+typedef std::vector<AudioFrame*> AudioFrameList;
+typedef std::vector<MixerAudioSource*> MixerAudioSourceList;
 
 // Cheshire cat implementation of MixerAudioSource's non virtual functions.
 class NewMixHistory {
@@ -85,11 +78,10 @@ class AudioMixerImpl : public AudioMixer {
   int32_t SetOutputFrequency(const Frequency& frequency);
   Frequency OutputFrequency() const;
 
-  // Compute what audio sources to mix from audio_source_list_. Ramp in
-  // and out. Update mixed status. maxAudioFrameCounter specifies how
-  // many participants are allowed to be mixed.
-  AudioFrameList UpdateToMix(size_t maxAudioFrameCounter) const
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  // Compute what audio sources to mix from audio_source_list_. Ramp
+  // in and out. Update mixed status. Mixes up to
+  // kMaximumAmountOfMixedAudioSources audio sources.
+  AudioFrameList GetNonAnonymousAudio() const EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   // Return the lowest mixing frequency that can be used without having to
   // downsample any audio.
@@ -97,9 +89,9 @@ class AudioMixerImpl : public AudioMixer {
   int32_t GetLowestMixingFrequencyFromList(
       const MixerAudioSourceList& mixList) const;
 
-  // Return the AudioFrames that should be mixed anonymously.
-  void GetAdditionalAudio(AudioFrameList* additionalFramesList) const
-      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  // Return the AudioFrames that should be mixed anonymously. Ramp in
+  // and out. Update mixed status.
+  AudioFrameList GetAnonymousAudio() const EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   // This function returns true if it finds the MixerAudioSource in the
   // specified list of MixerAudioSources.
@@ -119,12 +111,6 @@ class AudioMixerImpl : public AudioMixer {
                              int32_t id,
                              bool use_limiter);
 
-  // Mix the AudioFrames stored in audioFrameList into mixedAudio. No
-  // record will be kept of this mix (e.g. the corresponding MixerAudioSources
-  // will not be marked as IsMixed()
-  int32_t MixAnonomouslyFromList(AudioFrame* mixedAudio,
-                                 const AudioFrameList& audioFrameList) const;
-
   bool LimitMixedAudio(AudioFrame* mixedAudio) const;
 
   // Output level functions for VoEVolumeControl.
diff --git a/webrtc/modules/audio_mixer/test/audio_mixer_unittest.cc b/webrtc/modules/audio_mixer/test/audio_mixer_unittest.cc
index 895535fdb2..5258fdce9c 100644
--- a/webrtc/modules/audio_mixer/test/audio_mixer_unittest.cc
+++ b/webrtc/modules/audio_mixer/test/audio_mixer_unittest.cc
@@ -273,6 +273,8 @@ TEST(AudioMixer, ParticipantSampleRate) {
   for (auto frequency : {8000, 16000, 32000, 48000}) {
     EXPECT_CALL(participant, GetAudioFrameWithMuted(_, frequency))
         .Times(Exactly(1));
+    participant.fake_frame()->sample_rate_hz_ = frequency;
+    participant.fake_frame()->samples_per_channel_ = frequency / 100;
     mixer->Mix(frequency, 1, &frame_for_mixing);
     EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
   }