From 6c278491adae678cdd95c71c444a02103012df56 Mon Sep 17 00:00:00 2001
From: aleloi <aleloi@webrtc.org>
Date: Thu, 20 Oct 2016 14:24:39 -0700
Subject: [PATCH] Move audio frame memory handling inside AudioMixer.

Simplify the AudioMixer::Source interface and update the mixer
implementation to the new interface.

Instead of asking a mixer source to provide a pointer to an AudioFrame
during each mixing iteration, a mixer should supply a pointer to its
own AudioFrame.

This simplifies lifetime issues as sources do not give away an
internal pointer.

Implementation: when an audio source is added, the mixer allocates a
new AudioFrame. The audio frame is kept together in the internal class
SourceStatus together with the audio source pointer until the source
is removed.

NOTRY=True
BUG=webrtc:6346

Review-Url: https://codereview.webrtc.org/2420913002
Cr-Commit-Position: refs/heads/master@{#14713}
---
 webrtc/api/audio/audio_mixer.h                | 37 +++++++----------
 webrtc/audio/audio_receive_stream.cc          |  7 ++--
 webrtc/audio/audio_receive_stream.h           |  3 +-
 .../modules/audio_mixer/audio_mixer_impl.cc   | 40 +++++++++----------
 webrtc/modules/audio_mixer/audio_mixer_impl.h |  5 ++-
 .../audio_mixer/audio_mixer_impl_unittest.cc  | 33 ++++++++-------
 webrtc/voice_engine/channel.cc                | 11 ++---
 webrtc/voice_engine/channel.h                 |  6 +--
 webrtc/voice_engine/channel_proxy.cc          |  7 ++--
 webrtc/voice_engine/channel_proxy.h           |  5 ++-
 10 files changed, 75 insertions(+), 79 deletions(-)

diff --git a/webrtc/api/audio/audio_mixer.h b/webrtc/api/audio/audio_mixer.h
index 960adbbd43..ee39daaf10 100644
--- a/webrtc/api/audio/audio_mixer.h
+++ b/webrtc/api/audio/audio_mixer.h
@@ -35,33 +35,18 @@ class AudioMixer : public rtc::RefCountInterface {
       kError,   // The audio_frame will not be used.
     };
 
-    struct AudioFrameWithInfo {
-      AudioFrame* audio_frame;
-      AudioFrameInfo audio_frame_info;
-    };
-
-    // The implementation of GetAudioFrameWithInfo should update
-    // audio_frame with new audio every time it's called. Implementing
-    // classes are allowed to return the same AudioFrame pointer on
-    // different calls. The pointer must stay valid until the next
-    // mixing call or until this audio source is disconnected from the
-    // mixer. The mixer may modify the contents of the passed
-    // AudioFrame pointer at any time until the next call to
-    // GetAudioFrameWithInfo, or until the source is removed from the
-    // mixer.
-    virtual AudioFrameWithInfo GetAudioFrameWithInfo(int sample_rate_hz) = 0;
+    // Overwrites |audio_frame|. The data_ field is overwritten with
+    // 10 ms of new audio (either 1 or 2 interleaved channels) at
+    // |sample_rate_hz|. All fields in |audio_frame| must be updated.
+    virtual AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
+                                                 AudioFrame* audio_frame) = 0;
 
     // A way for a mixer implementation to distinguish participants.
     virtual int Ssrc() = 0;
 
-   protected:
     virtual ~Source() {}
   };
 
-  // Since the mixer is reference counted, the destructor may be
-  // called from any thread.
-  ~AudioMixer() override {}
-
   // Returns true if adding/removing was successful. A source is never
   // added twice and removal is never attempted if a source has not
   // been successfully added to the mixer. Addition and removal can
@@ -70,12 +55,18 @@ class AudioMixer : public rtc::RefCountInterface {
   virtual bool RemoveSource(Source* audio_source) = 0;
 
   // Performs mixing by asking registered audio sources for audio. The
-  // mixed result is placed in the provided AudioFrame. Will only be
-  // called from a single thread. The rate and channels arguments
-  // specify the rate and number of channels of the mix result.
+  // mixed result is placed in the provided AudioFrame. This method
+  // will only be called from a single thread. The rate and channels
+  // arguments specify the rate and number of channels of the mix
+  // result. All fields in |audio_frame_for_mixing| must be updated.
   virtual void Mix(int sample_rate_hz,
                    size_t number_of_channels,
                    AudioFrame* audio_frame_for_mixing) = 0;
+
+ protected:
+  // Since the mixer is reference counted, the destructor may be
+  // called from any thread.
+  ~AudioMixer() override {}
 };
 }  // namespace webrtc
 
diff --git a/webrtc/audio/audio_receive_stream.cc b/webrtc/audio/audio_receive_stream.cc
index d05934297c..12b8b3f7ad 100644
--- a/webrtc/audio/audio_receive_stream.cc
+++ b/webrtc/audio/audio_receive_stream.cc
@@ -272,9 +272,10 @@ bool AudioReceiveStream::DeliverRtp(const uint8_t* packet,
   return channel_proxy_->ReceivedRTPPacket(packet, length, packet_time);
 }
 
-AudioMixer::Source::AudioFrameWithInfo
-AudioReceiveStream::GetAudioFrameWithInfo(int sample_rate_hz) {
-  return channel_proxy_->GetAudioFrameWithInfo(sample_rate_hz);
+AudioMixer::Source::AudioFrameInfo AudioReceiveStream::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
+  return channel_proxy_->GetAudioFrameWithInfo(sample_rate_hz, audio_frame);
 }
 
 int AudioReceiveStream::Ssrc() {
diff --git a/webrtc/audio/audio_receive_stream.h b/webrtc/audio/audio_receive_stream.h
index 82c642a137..e6416cd9a3 100644
--- a/webrtc/audio/audio_receive_stream.h
+++ b/webrtc/audio/audio_receive_stream.h
@@ -55,7 +55,8 @@ class AudioReceiveStream final : public webrtc::AudioReceiveStream,
   const webrtc::AudioReceiveStream::Config& config() const;
 
   // AudioMixer::Source
-  AudioFrameWithInfo GetAudioFrameWithInfo(int sample_rate_hz) override;
+  AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
+                                       AudioFrame* audio_frame) override;
   int Ssrc() override;
 
  private:
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
index 3fcd3e3c66..c646fcb56c 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc
@@ -119,20 +119,22 @@ int32_t MixFromList(AudioFrame* mixed_audio,
 AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList(
     AudioMixerImpl::Source const* audio_source,
     AudioMixerImpl::SourceStatusList const* audio_source_list) {
-  return std::find_if(audio_source_list->begin(), audio_source_list->end(),
-                      [audio_source](const AudioMixerImpl::SourceStatus& p) {
-                        return p.audio_source == audio_source;
-                      });
+  return std::find_if(
+      audio_source_list->begin(), audio_source_list->end(),
+      [audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
+        return p->audio_source == audio_source;
+      });
 }
 
 // TODO(aleloi): remove non-const version when WEBRTC only supports modern STL.
 AudioMixerImpl::SourceStatusList::iterator FindSourceInList(
     AudioMixerImpl::Source const* audio_source,
     AudioMixerImpl::SourceStatusList* audio_source_list) {
-  return std::find_if(audio_source_list->begin(), audio_source_list->end(),
-                      [audio_source](const AudioMixerImpl::SourceStatus& p) {
-                        return p.audio_source == audio_source;
-                      });
+  return std::find_if(
+      audio_source_list->begin(), audio_source_list->end(),
+      [audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
+        return p->audio_source == audio_source;
+      });
 }
 
 }  // namespace
@@ -244,7 +246,7 @@ bool AudioMixerImpl::AddSource(Source* audio_source) {
   RTC_DCHECK(FindSourceInList(audio_source, &audio_source_list_) ==
              audio_source_list_.end())
       << "Source already added to mixer";
-  audio_source_list_.emplace_back(audio_source, false, 0);
+  audio_source_list_.emplace_back(new SourceStatus(audio_source, false, 0));
   return true;
 }
 
@@ -263,21 +265,18 @@ AudioFrameList AudioMixerImpl::GetAudioFromSources() {
   std::vector<SourceFrame> audio_source_mixing_data_list;
   std::vector<SourceFrame> ramp_list;
 
-  // Get audio source audio and put it in the struct vector.
+  // Get audio from the audio sources and put it in the SourceFrame vector.
   for (auto& source_and_status : audio_source_list_) {
-    auto audio_frame_with_info =
-        source_and_status.audio_source->GetAudioFrameWithInfo(
-            static_cast<int>(OutputFrequency()));
-
-    const auto audio_frame_info = audio_frame_with_info.audio_frame_info;
-    AudioFrame* audio_source_audio_frame = audio_frame_with_info.audio_frame;
+    const auto audio_frame_info =
+        source_and_status->audio_source->GetAudioFrameWithInfo(
+            OutputFrequency(), &source_and_status->audio_frame);
 
     if (audio_frame_info == Source::AudioFrameInfo::kError) {
       LOG_F(LS_WARNING) << "failed to GetAudioFrameWithInfo() from source";
       continue;
     }
     audio_source_mixing_data_list.emplace_back(
-        &source_and_status, audio_source_audio_frame,
+        source_and_status.get(), &source_and_status->audio_frame,
         audio_frame_info == Source::AudioFrameInfo::kMuted);
   }
 
@@ -344,10 +343,9 @@ bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest(
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
   rtc::CritScope lock(&crit_);
 
-  const auto non_anonymous_iter =
-      FindSourceInList(audio_source, &audio_source_list_);
-  if (non_anonymous_iter != audio_source_list_.end()) {
-    return non_anonymous_iter->is_mixed;
+  const auto iter = FindSourceInList(audio_source, &audio_source_list_);
+  if (iter != audio_source_list_.end()) {
+    return (*iter)->is_mixed;
   }
 
   LOG(LS_ERROR) << "Audio source unknown";
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h
index e011e8a340..876284fac6 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl.h
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h
@@ -35,9 +35,12 @@ class AudioMixerImpl : public AudioMixer {
     Source* audio_source = nullptr;
     bool is_mixed = false;
     float gain = 0.0f;
+
+    // A frame that will be passed to audio_source->GetAudioFrameWithInfo.
+    AudioFrame audio_frame;
   };
 
-  typedef std::vector<SourceStatus> SourceStatusList;
+  using SourceStatusList = std::vector<std::unique_ptr<SourceStatus>>;
 
   // AudioProcessing only accepts 10 ms frames.
   static const int kFrameDurationInMs = 10;
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 9147a15000..28976fae88 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -53,12 +53,13 @@ class MockMixerAudioSource : public AudioMixer::Source {
  public:
   MockMixerAudioSource()
       : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
-    ON_CALL(*this, GetAudioFrameWithInfo(_))
+    ON_CALL(*this, GetAudioFrameWithInfo(_, _))
         .WillByDefault(
             Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
   }
 
-  MOCK_METHOD1(GetAudioFrameWithInfo, AudioFrameWithInfo(int sample_rate_hz));
+  MOCK_METHOD2(GetAudioFrameWithInfo,
+               AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
 
   MOCK_METHOD0(Ssrc, int());
 
@@ -69,14 +70,12 @@ class MockMixerAudioSource : public AudioMixer::Source {
   }
 
  private:
-  AudioFrame fake_frame_, fake_output_frame_;
+  AudioFrame fake_frame_;
   AudioFrameInfo fake_audio_frame_info_;
-  AudioFrameWithInfo FakeAudioFrameWithInfo(int sample_rate_hz) {
-    fake_output_frame_.CopyFrom(fake_frame_);
-    return {
-        &fake_output_frame_,  // audio_frame_pointer
-        fake_info(),          // audio_frame_info
-    };
+  AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
+                                        AudioFrame* audio_frame) {
+    audio_frame->CopyFrom(fake_frame_);
+    return fake_info();
   }
 };
 
@@ -100,7 +99,7 @@ void MixAndCompare(
 
   for (int i = 0; i < num_audio_sources; i++) {
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -129,7 +128,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {
     participants[i].fake_frame()->data_[80] = i;
 
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_)).Times(Exactly(1));
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
   }
 
   // Last participant gives audio frame with passive VAD, although it has the
@@ -171,7 +170,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   }
 
   EXPECT_TRUE(mixer->AddSource(&participant));
-  EXPECT_CALL(participant, GetAudioFrameWithInfo(_)).Times(Exactly(2));
+  EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
 
   AudioFrame audio_frame;
   // Two mix iteration to compare after the ramp-up step.
@@ -193,7 +192,7 @@ TEST(AudioMixer, ParticipantSampleRate) {
 
   EXPECT_TRUE(mixer->AddSource(&participant));
   for (auto frequency : {8000, 16000, 32000, 48000}) {
-    EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency))
+    EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency, _))
         .Times(Exactly(1));
     participant.fake_frame()->sample_rate_hz_ = frequency;
     participant.fake_frame()->samples_per_channel_ = frequency / 100;
@@ -210,7 +209,7 @@ TEST(AudioMixer, ParticipantNumberOfChannels) {
 
   EXPECT_TRUE(mixer->AddSource(&participant));
   for (size_t number_of_channels : {1, 2}) {
-    EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
     mixer->Mix(kDefaultSampleRateHz, number_of_channels, &frame_for_mixing);
     EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
@@ -236,7 +235,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
   // Add all participants but the loudest for mixing.
   for (int i = 0; i < kAudioSources - 1; i++) {
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -252,7 +251,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
   // Add new participant with higher energy.
   EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
   for (int i = 0; i < kAudioSources; i++) {
-    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz))
+    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
         .Times(Exactly(1));
   }
 
@@ -287,7 +286,7 @@ TEST(AudioMixer, ConstructFromOtherThread) {
       RTC_FROM_HERE,
       rtc::Bind(&AudioMixer::AddSource, mixer.get(), &participant)));
 
-  EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz))
+  EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
       .Times(Exactly(1));
 
   // Do one mixer iteration
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 44a1c73b08..6d053323da 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -712,11 +712,12 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted(
                : MixerParticipant::AudioFrameInfo::kNormal;
 }
 
-AudioMixer::Source::AudioFrameWithInfo Channel::GetAudioFrameWithInfo(
-    int sample_rate_hz) {
-  mix_audio_frame_.sample_rate_hz_ = sample_rate_hz;
+AudioMixer::Source::AudioFrameInfo Channel::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
+  audio_frame->sample_rate_hz_ = sample_rate_hz;
 
-  const auto frame_info = GetAudioFrameWithMuted(-1, &mix_audio_frame_);
+  const auto frame_info = GetAudioFrameWithMuted(-1, audio_frame);
 
   using FrameInfo = AudioMixer::Source::AudioFrameInfo;
   FrameInfo new_audio_frame_info = FrameInfo::kError;
@@ -731,7 +732,7 @@ AudioMixer::Source::AudioFrameWithInfo Channel::GetAudioFrameWithInfo(
       new_audio_frame_info = FrameInfo::kError;
       break;
   }
-  return {&mix_audio_frame_, new_audio_frame_info};
+  return new_audio_frame_info;
 }
 
 int32_t Channel::NeededFrequency(int32_t id) const {
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index 97340beb4a..3b478cafb9 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -379,8 +379,9 @@ class Channel
   int32_t NeededFrequency(int32_t id) const override;
 
   // From AudioMixer::Source.
-  AudioMixer::Source::AudioFrameWithInfo GetAudioFrameWithInfo(
-      int sample_rate_hz);
+  AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo(
+      int sample_rate_hz,
+      AudioFrame* audio_frame);
 
   // From FileCallback
   void PlayNotification(int32_t id, uint32_t durationMs) override;
@@ -475,7 +476,6 @@ class Channel
   AudioLevel _outputAudioLevel;
   bool _externalTransport;
   AudioFrame _audioFrame;
-  AudioFrame mix_audio_frame_;
   // Downsamples to the codec rate if necessary.
   PushResampler<int16_t> input_resampler_;
   std::unique_ptr<FilePlayer> input_file_player_;
diff --git a/webrtc/voice_engine/channel_proxy.cc b/webrtc/voice_engine/channel_proxy.cc
index dc01c9b627..ba58502e95 100644
--- a/webrtc/voice_engine/channel_proxy.cc
+++ b/webrtc/voice_engine/channel_proxy.cc
@@ -214,10 +214,11 @@ void ChannelProxy::SetRtcEventLog(RtcEventLog* event_log) {
   channel()->SetRtcEventLog(event_log);
 }
 
-AudioMixer::Source::AudioFrameWithInfo ChannelProxy::GetAudioFrameWithInfo(
-    int sample_rate_hz) {
+AudioMixer::Source::AudioFrameInfo ChannelProxy::GetAudioFrameWithInfo(
+    int sample_rate_hz,
+    AudioFrame* audio_frame) {
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
-  return channel()->GetAudioFrameWithInfo(sample_rate_hz);
+  return channel()->GetAudioFrameWithInfo(sample_rate_hz, audio_frame);
 }
 
 Channel* ChannelProxy::channel() const {
diff --git a/webrtc/voice_engine/channel_proxy.h b/webrtc/voice_engine/channel_proxy.h
index b09a56c302..d19c0092a7 100644
--- a/webrtc/voice_engine/channel_proxy.h
+++ b/webrtc/voice_engine/channel_proxy.h
@@ -93,8 +93,9 @@ class ChannelProxy {
 
   virtual void SetRtcEventLog(RtcEventLog* event_log);
 
-  virtual AudioMixer::Source::AudioFrameWithInfo GetAudioFrameWithInfo(
-      int sample_rate_hz);
+  virtual AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo(
+      int sample_rate_hz,
+      AudioFrame* audio_frame);
 
  private:
   Channel* channel() const;