From 27d106bcf7eaf864e8433f1fc303475b953498b3 Mon Sep 17 00:00:00 2001
From: "aluebs@webrtc.org" <aluebs@webrtc.org>
Date: Thu, 11 Dec 2014 17:09:21 +0000
Subject: [PATCH] Move the downmixing out of AudioBuffer

This provides more flexibility if some component in AudioProcessing wants to operate before downmixing.
Now the AudioProcessing does only track the processing rate, but not the processing number of channels. This is tracked by the AudioBuffer itself and can be changed at any time to one smaller or equal the input number of channels. For each chunk it is reset to input number of channels and the end it should be equal to the output number of channels.

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/28169004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7879 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../modules/audio_processing/audio_buffer.cc  | 27 ++++++++++++-------
 .../modules/audio_processing/audio_buffer.h   | 10 +++++++
 .../audio_processing/audio_processing_impl.cc | 24 ++++++++---------
 .../audio_processing/audio_processing_impl.h  |  6 +++--
 4 files changed, 43 insertions(+), 24 deletions(-)
diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index 079de39ffd..8f164efaa3 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -65,6 +65,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
     proc_samples_per_channel_(process_samples_per_channel),
     num_proc_channels_(num_process_channels),
     output_samples_per_channel_(output_samples_per_channel),
+    num_channels_(num_process_channels),
     num_bands_(1),
     samples_per_split_channel_(proc_samples_per_channel_),
     mixed_low_pass_valid_(false),
@@ -77,7 +78,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
   assert(proc_samples_per_channel_ > 0);
   assert(output_samples_per_channel_ > 0);
   assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
-  assert(num_proc_channels_ <= num_input_channels);
+  assert(num_proc_channels_ <= num_input_channels_);
 
   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
     input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
@@ -172,7 +173,7 @@ void AudioBuffer::CopyTo(int samples_per_channel,
                          AudioProcessing::ChannelLayout layout,
                          float* const* data) {
   assert(samples_per_channel == output_samples_per_channel_);
-  assert(ChannelsFromLayout(layout) == num_proc_channels_);
+  assert(ChannelsFromLayout(layout) == num_channels_);
 
   // Convert to the float range.
   float* const* data_ptr = data;
@@ -180,14 +181,14 @@ void AudioBuffer::CopyTo(int samples_per_channel,
     // Convert to an intermediate buffer for subsequent resampling.
     data_ptr = process_buffer_->channels();
   }
-  for (int i = 0; i < num_proc_channels_; ++i) {
+  for (int i = 0; i < num_channels_; ++i) {
     FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_,
                     data_ptr[i]);
   }
 
   // Resample.
   if (output_samples_per_channel_ != proc_samples_per_channel_) {
-    for (int i = 0; i < num_proc_channels_; ++i) {
+    for (int i = 0; i < num_channels_; ++i) {
       output_resamplers_[i]->Resample(data_ptr[i],
                                       proc_samples_per_channel_,
                                       data[i],
@@ -201,6 +202,7 @@ void AudioBuffer::InitForNewData() {
   mixed_low_pass_valid_ = false;
   reference_copied_ = false;
   activity_ = AudioFrame::kVadUnknown;
+  num_channels_ = num_proc_channels_;
 }
 
 const int16_t* AudioBuffer::data_const(int channel) const {
@@ -362,7 +364,11 @@ AudioFrame::VADActivity AudioBuffer::activity() const {
 }
 
 int AudioBuffer::num_channels() const {
-  return num_proc_channels_;
+  return num_channels_;
+}
+
+void AudioBuffer::set_num_channels(int num_channels) {
+  num_channels_ = num_channels;
 }
 
 int AudioBuffer::samples_per_channel() const {
@@ -412,8 +418,8 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
 
 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
   assert(proc_samples_per_channel_ == output_samples_per_channel_);
-  assert(num_proc_channels_ == num_input_channels_);
-  assert(frame->num_channels_ == num_proc_channels_);
+  assert(num_channels_ == num_input_channels_);
+  assert(frame->num_channels_ == num_channels_);
   assert(frame->samples_per_channel_ == proc_samples_per_channel_);
   frame->vad_activity_ = activity_;
 
@@ -422,19 +428,20 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
   }
 
   int16_t* interleaved = frame->data_;
-  for (int i = 0; i < num_proc_channels_; i++) {
+  for (int i = 0; i < num_channels_; i++) {
     int16_t* deinterleaved = channels_->ibuf()->channel(i);
     int interleaved_idx = i;
     for (int j = 0; j < proc_samples_per_channel_; j++) {
       interleaved[interleaved_idx] = deinterleaved[j];
-      interleaved_idx += num_proc_channels_;
+      interleaved_idx += num_channels_;
     }
   }
 }
 
 void AudioBuffer::CopyLowPassToReference() {
   reference_copied_ = true;
-  if (!low_pass_reference_channels_.get()) {
+  if (!low_pass_reference_channels_.get() ||
+      low_pass_reference_channels_->num_channels() != num_channels_) {
     low_pass_reference_channels_.reset(
         new ChannelBuffer<int16_t>(samples_per_split_channel_,
                                    num_proc_channels_));
diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h
index 65d7cad2c0..a526ca07c8 100644
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@@ -45,6 +45,7 @@ class AudioBuffer {
   virtual ~AudioBuffer();
 
   int num_channels() const;
+  void set_num_channels(int num_channels);
   int samples_per_channel() const;
   int samples_per_split_channel() const;
   int samples_per_keyboard_channel() const;
@@ -107,11 +108,20 @@ class AudioBuffer {
   // Called from DeinterleaveFrom() and CopyFrom().
   void InitForNewData();
 
+  // The audio is passed into DeinterleaveFrom() or CopyFrom() with input
+  // format (samples per channel and number of channels).
   const int input_samples_per_channel_;
   const int num_input_channels_;
+  // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing
+  // format.
   const int proc_samples_per_channel_;
   const int num_proc_channels_;
+  // The audio is returned by InterleaveTo() and CopyTo() with output samples
+  // per channels and the current number of channels. This last one can be
+  // changed at any time using set_num_channels().
   const int output_samples_per_channel_;
+  int num_channels_;
+
   int num_bands_;
   int samples_per_split_channel_;
   bool mixed_low_pass_valid_;
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 0c50ecb060..ba22f33536 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -87,8 +87,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
       event_msg_(new audioproc::Event()),
 #endif
       fwd_in_format_(kSampleRate16kHz, 1),
-      fwd_proc_format_(kSampleRate16kHz, 1),
-      fwd_out_format_(kSampleRate16kHz),
+      fwd_proc_format_(kSampleRate16kHz),
+      fwd_out_format_(kSampleRate16kHz, 1),
       rev_in_format_(kSampleRate16kHz, 1),
       rev_proc_format_(kSampleRate16kHz, 1),
       split_rate_(kSampleRate16kHz),
@@ -152,7 +152,7 @@ int AudioProcessingImpl::set_sample_rate_hz(int rate) {
                           rate,
                           rev_in_format_.rate(),
                           fwd_in_format_.num_channels(),
-                          fwd_proc_format_.num_channels(),
+                          fwd_out_format_.num_channels(),
                           rev_in_format_.num_channels());
 }
 
@@ -180,7 +180,7 @@ int AudioProcessingImpl::InitializeLocked() {
   capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(),
                                        fwd_in_format_.num_channels(),
                                        fwd_proc_format_.samples_per_channel(),
-                                       fwd_proc_format_.num_channels(),
+                                       fwd_out_format_.num_channels(),
                                        fwd_out_format_.samples_per_channel()));
 
   // Initialize all components.
@@ -226,7 +226,7 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
   }
 
   fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
-  fwd_out_format_.set(output_sample_rate_hz);
+  fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
   rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels);
 
   // We process at the closest native rate >= min(input rate, output rate)...
@@ -244,7 +244,7 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
     fwd_proc_rate = kSampleRate16kHz;
   }
 
-  fwd_proc_format_.set(fwd_proc_rate, num_output_channels);
+  fwd_proc_format_.set(fwd_proc_rate);
 
   // We normally process the reverse stream at 16 kHz. Unless...
   int rev_proc_rate = kSampleRate16kHz;
@@ -285,7 +285,7 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
       output_sample_rate_hz == fwd_out_format_.rate() &&
       reverse_sample_rate_hz == rev_in_format_.rate() &&
       num_input_channels == fwd_in_format_.num_channels() &&
-      num_output_channels == fwd_proc_format_.num_channels() &&
+      num_output_channels == fwd_out_format_.num_channels() &&
       num_reverse_channels == rev_in_format_.num_channels()) {
     return kNoError;
   }
@@ -332,7 +332,7 @@ int AudioProcessingImpl::num_input_channels() const {
 }
 
 int AudioProcessingImpl::num_output_channels() const {
-  return fwd_proc_format_.num_channels();
+  return fwd_out_format_.num_channels();
 }
 
 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
@@ -389,7 +389,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
     audioproc::Stream* msg = event_msg_->mutable_stream();
     const size_t channel_size =
         sizeof(float) * fwd_out_format_.samples_per_channel();
-    for (int i = 0; i < fwd_proc_format_.num_channels(); ++i)
+    for (int i = 0; i < fwd_out_format_.num_channels(); ++i)
       msg->add_output_channel(dest[i], channel_size);
     RETURN_ON_ERR(WriteMessageToDebugFile());
   }
@@ -513,7 +513,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
                                       fwd_out_format_.rate(),
                                       sample_rate_hz,
                                       fwd_in_format_.num_channels(),
-                                      fwd_proc_format_.num_channels(),
+                                      fwd_out_format_.num_channels(),
                                       num_channels));
   if (samples_per_channel != rev_in_format_.samples_per_channel()) {
     return kBadDataLengthError;
@@ -774,7 +774,7 @@ bool AudioProcessingImpl::is_data_processed() const {
 
 bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
   // Check if we've upmixed or downmixed the audio.
-  return ((fwd_proc_format_.num_channels() != fwd_in_format_.num_channels()) ||
+  return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
           is_data_processed);
 }
 
@@ -828,7 +828,7 @@ int AudioProcessingImpl::WriteInitMessage() {
   audioproc::Init* msg = event_msg_->mutable_init();
   msg->set_sample_rate(fwd_in_format_.rate());
   msg->set_num_input_channels(fwd_in_format_.num_channels());
-  msg->set_num_output_channels(fwd_proc_format_.num_channels());
+  msg->set_num_output_channels(fwd_out_format_.num_channels());
   msg->set_num_reverse_channels(rev_in_format_.num_channels());
   msg->set_reverse_sample_rate(rev_in_format_.rate());
   msg->set_output_sample_rate(fwd_out_format_.rate());
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 0817f471f4..caab37962a 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -184,8 +184,10 @@ class AudioProcessingImpl : public AudioProcessing {
 #endif
 
   AudioFormat fwd_in_format_;
-  AudioFormat fwd_proc_format_;
-  AudioRate fwd_out_format_;
+  // This one is an AudioRate, because the forward processing number of channels
+  // is mutable and is tracked by the capture_audio_.
+  AudioRate fwd_proc_format_;
+  AudioFormat fwd_out_format_;
   AudioFormat rev_in_format_;
   AudioFormat rev_proc_format_;
   int split_rate_;