Move the downmixing out of AudioBuffer

This provides more flexibility if some component in AudioProcessing wants to operate before downmixing. Now the AudioProcessing does only track the processing rate, but not the processing number of channels. This is tracked by the AudioBuffer itself and can be changed at any time to one smaller or equal the input number of channels. For each chunk it is reset to input number of channels and the end it should be equal to the output number of channels. R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/28169004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7879 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-12-11 17:09:21 +00:00 · 2014-12-11 17:09:21 +00:00 · 27d106bcf7
commit 27d106bcf7
parent 0ca768b131
4 changed files with 43 additions and 24 deletions
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@ -65,6 +65,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
    proc_samples_per_channel_(process_samples_per_channel),
    num_proc_channels_(num_process_channels),
    output_samples_per_channel_(output_samples_per_channel),
+    num_channels_(num_process_channels),
    num_bands_(1),
    samples_per_split_channel_(proc_samples_per_channel_),
    mixed_low_pass_valid_(false),
@ -77,7 +78,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
  assert(proc_samples_per_channel_ > 0);
  assert(output_samples_per_channel_ > 0);
  assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
-  assert(num_proc_channels_ <= num_input_channels);
+  assert(num_proc_channels_ <= num_input_channels_);

  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
    input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
@ -172,7 +173,7 @@ void AudioBuffer::CopyTo(int samples_per_channel,
                         AudioProcessing::ChannelLayout layout,
                         float* const* data) {
  assert(samples_per_channel == output_samples_per_channel_);
-  assert(ChannelsFromLayout(layout) == num_proc_channels_);
+  assert(ChannelsFromLayout(layout) == num_channels_);

  // Convert to the float range.
  float* const* data_ptr = data;
@ -180,14 +181,14 @@ void AudioBuffer::CopyTo(int samples_per_channel,
    // Convert to an intermediate buffer for subsequent resampling.
    data_ptr = process_buffer_->channels();
  }
-  for (int i = 0; i < num_proc_channels_; ++i) {
+  for (int i = 0; i < num_channels_; ++i) {
    FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_,
                    data_ptr[i]);
  }

  // Resample.
  if (output_samples_per_channel_ != proc_samples_per_channel_) {
-    for (int i = 0; i < num_proc_channels_; ++i) {
+    for (int i = 0; i < num_channels_; ++i) {
      output_resamplers_[i]->Resample(data_ptr[i],
                                      proc_samples_per_channel_,
                                      data[i],
@ -201,6 +202,7 @@ void AudioBuffer::InitForNewData() {
  mixed_low_pass_valid_ = false;
  reference_copied_ = false;
  activity_ = AudioFrame::kVadUnknown;
+  num_channels_ = num_proc_channels_;
 }

 const int16_t* AudioBuffer::data_const(int channel) const {
@ -362,7 +364,11 @@ AudioFrame::VADActivity AudioBuffer::activity() const {
 }

 int AudioBuffer::num_channels() const {
-  return num_proc_channels_;
+  return num_channels_;
+}
+
+void AudioBuffer::set_num_channels(int num_channels) {
+  num_channels_ = num_channels;
 }

 int AudioBuffer::samples_per_channel() const {
@ -412,8 +418,8 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {

 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
  assert(proc_samples_per_channel_ == output_samples_per_channel_);
-  assert(num_proc_channels_ == num_input_channels_);
-  assert(frame->num_channels_ == num_proc_channels_);
+  assert(num_channels_ == num_input_channels_);
+  assert(frame->num_channels_ == num_channels_);
  assert(frame->samples_per_channel_ == proc_samples_per_channel_);
  frame->vad_activity_ = activity_;

@ -422,19 +428,20 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
  }

  int16_t* interleaved = frame->data_;
-  for (int i = 0; i < num_proc_channels_; i++) {
+  for (int i = 0; i < num_channels_; i++) {
    int16_t* deinterleaved = channels_->ibuf()->channel(i);
    int interleaved_idx = i;
    for (int j = 0; j < proc_samples_per_channel_; j++) {
      interleaved[interleaved_idx] = deinterleaved[j];
-      interleaved_idx += num_proc_channels_;
+      interleaved_idx += num_channels_;
    }
  }
 }

 void AudioBuffer::CopyLowPassToReference() {
  reference_copied_ = true;
-  if (!low_pass_reference_channels_.get()) {
+  if (!low_pass_reference_channels_.get() ||
+      low_pass_reference_channels_->num_channels() != num_channels_) {
    low_pass_reference_channels_.reset(
        new ChannelBuffer<int16_t>(samples_per_split_channel_,
                                   num_proc_channels_));
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@ -45,6 +45,7 @@ class AudioBuffer {
  virtual ~AudioBuffer();

  int num_channels() const;
+  void set_num_channels(int num_channels);
  int samples_per_channel() const;
  int samples_per_split_channel() const;
  int samples_per_keyboard_channel() const;
@ -107,11 +108,20 @@ class AudioBuffer {
  // Called from DeinterleaveFrom() and CopyFrom().
  void InitForNewData();

+  // The audio is passed into DeinterleaveFrom() or CopyFrom() with input
+  // format (samples per channel and number of channels).
  const int input_samples_per_channel_;
  const int num_input_channels_;
+  // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing
+  // format.
  const int proc_samples_per_channel_;
  const int num_proc_channels_;
+  // The audio is returned by InterleaveTo() and CopyTo() with output samples
+  // per channels and the current number of channels. This last one can be
+  // changed at any time using set_num_channels().
  const int output_samples_per_channel_;
+  int num_channels_;
+
  int num_bands_;
  int samples_per_split_channel_;
  bool mixed_low_pass_valid_;
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -87,8 +87,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
      event_msg_(new audioproc::Event()),
 #endif
      fwd_in_format_(kSampleRate16kHz, 1),
-      fwd_proc_format_(kSampleRate16kHz, 1),
-      fwd_out_format_(kSampleRate16kHz),
+      fwd_proc_format_(kSampleRate16kHz),
+      fwd_out_format_(kSampleRate16kHz, 1),
      rev_in_format_(kSampleRate16kHz, 1),
      rev_proc_format_(kSampleRate16kHz, 1),
      split_rate_(kSampleRate16kHz),
@ -152,7 +152,7 @@ int AudioProcessingImpl::set_sample_rate_hz(int rate) {
                          rate,
                          rev_in_format_.rate(),
                          fwd_in_format_.num_channels(),
-                          fwd_proc_format_.num_channels(),
+                          fwd_out_format_.num_channels(),
                          rev_in_format_.num_channels());
 }

@ -180,7 +180,7 @@ int AudioProcessingImpl::InitializeLocked() {
  capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(),
                                       fwd_in_format_.num_channels(),
                                       fwd_proc_format_.samples_per_channel(),
-                                       fwd_proc_format_.num_channels(),
+                                       fwd_out_format_.num_channels(),
                                       fwd_out_format_.samples_per_channel()));

  // Initialize all components.
@ -226,7 +226,7 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
  }

  fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
-  fwd_out_format_.set(output_sample_rate_hz);
+  fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
  rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels);

  // We process at the closest native rate >= min(input rate, output rate)...
@ -244,7 +244,7 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
    fwd_proc_rate = kSampleRate16kHz;
  }

-  fwd_proc_format_.set(fwd_proc_rate, num_output_channels);
+  fwd_proc_format_.set(fwd_proc_rate);

  // We normally process the reverse stream at 16 kHz. Unless...
  int rev_proc_rate = kSampleRate16kHz;
@ -285,7 +285,7 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
      output_sample_rate_hz == fwd_out_format_.rate() &&
      reverse_sample_rate_hz == rev_in_format_.rate() &&
      num_input_channels == fwd_in_format_.num_channels() &&
-      num_output_channels == fwd_proc_format_.num_channels() &&
+      num_output_channels == fwd_out_format_.num_channels() &&
      num_reverse_channels == rev_in_format_.num_channels()) {
    return kNoError;
  }
@ -332,7 +332,7 @@ int AudioProcessingImpl::num_input_channels() const {
 }

 int AudioProcessingImpl::num_output_channels() const {
-  return fwd_proc_format_.num_channels();
+  return fwd_out_format_.num_channels();
 }

 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
@ -389,7 +389,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
    audioproc::Stream* msg = event_msg_->mutable_stream();
    const size_t channel_size =
        sizeof(float) * fwd_out_format_.samples_per_channel();
-    for (int i = 0; i < fwd_proc_format_.num_channels(); ++i)
+    for (int i = 0; i < fwd_out_format_.num_channels(); ++i)
      msg->add_output_channel(dest[i], channel_size);
    RETURN_ON_ERR(WriteMessageToDebugFile());
  }
@ -513,7 +513,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
                                      fwd_out_format_.rate(),
                                      sample_rate_hz,
                                      fwd_in_format_.num_channels(),
-                                      fwd_proc_format_.num_channels(),
+                                      fwd_out_format_.num_channels(),
                                      num_channels));
  if (samples_per_channel != rev_in_format_.samples_per_channel()) {
    return kBadDataLengthError;
@ -774,7 +774,7 @@ bool AudioProcessingImpl::is_data_processed() const {

 bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
  // Check if we've upmixed or downmixed the audio.
-  return ((fwd_proc_format_.num_channels() != fwd_in_format_.num_channels()) ||
+  return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
          is_data_processed);
 }

@ -828,7 +828,7 @@ int AudioProcessingImpl::WriteInitMessage() {
  audioproc::Init* msg = event_msg_->mutable_init();
  msg->set_sample_rate(fwd_in_format_.rate());
  msg->set_num_input_channels(fwd_in_format_.num_channels());
-  msg->set_num_output_channels(fwd_proc_format_.num_channels());
+  msg->set_num_output_channels(fwd_out_format_.num_channels());
  msg->set_num_reverse_channels(rev_in_format_.num_channels());
  msg->set_reverse_sample_rate(rev_in_format_.rate());
  msg->set_output_sample_rate(fwd_out_format_.rate());
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@ -184,8 +184,10 @@ class AudioProcessingImpl : public AudioProcessing {
 #endif

  AudioFormat fwd_in_format_;
-  AudioFormat fwd_proc_format_;
-  AudioRate fwd_out_format_;
+  // This one is an AudioRate, because the forward processing number of channels
+  // is mutable and is tracked by the capture_audio_.
+  AudioRate fwd_proc_format_;
+  AudioFormat fwd_out_format_;
  AudioFormat rev_in_format_;
  AudioFormat rev_proc_format_;
  int split_rate_;