From 913f7b8d5eae7a234fd8c0a26c90dc71fbea9133 Mon Sep 17 00:00:00 2001
From: "henrik.lundin@webrtc.org" <henrik.lundin@webrtc.org>
Date: Tue, 21 Oct 2014 06:54:23 +0000
Subject: [PATCH] Fix for glitches in ACM when switching desired output sample
 rate

The problem was that if the output sample rate is changed such from one
where no resampling is needed to a rate that requires resampling, the
first output from the resampler will contain an onset period. The
solution provided in this CL is to keep a copy of the last output frame
in ACM, and if the resampler is engaged, it will be primed with this
old frame before resampling the current frame.

BUG=3919
R=bjornv@webrtc.org, turaj@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/27729004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7479 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../audio_coding/main/acm2/acm_receiver.cc    | 111 +++++++++---------
 .../audio_coding/main/acm2/acm_receiver.h     |   4 +-
 .../audio_coding_module_unittest_oldapi.cc    |   4 +-
 3 files changed, 62 insertions(+), 57 deletions(-)

diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
index 0c7e6c721e..074475411c 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -122,11 +122,14 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
       last_audio_decoder_(-1),  // Invalid value.
       previous_audio_activity_(AudioFrame::kVadPassive),
       current_sample_rate_hz_(config.neteq_config.sample_rate_hz),
+      audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
+      last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
       nack_(),
       nack_enabled_(false),
       neteq_(NetEq::Create(config.neteq_config)),
       vad_enabled_(true),
       clock_(config.clock),
+      resampled_last_output_frame_(true),
       av_sync_(false),
       initial_delay_manager_(),
       missing_packets_sync_stream_(),
@@ -143,6 +146,9 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
     neteq_->EnableVad();
   else
     neteq_->DisableVad();
+
+  memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
+  memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
 }
 
 AcmReceiver::~AcmReceiver() {
@@ -342,7 +348,6 @@ int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
 
 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
   enum NetEqOutputType type;
-  int16_t* ptr_audio_buffer = audio_frame->data_;
   int samples_per_channel;
   int num_channels;
   bool return_silence = false;
@@ -359,18 +364,6 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
       initial_delay_manager_->LatePackets(timestamp_now,
                                           late_packets_sync_stream_.get());
     }
-
-    if (!return_silence) {
-      // This is our initial guess regarding whether a resampling will be
-      // required. It is based on previous sample rate of netEq. Most often,
-      // this is a correct guess, however, in case that incoming payload changes
-      // the resampling might might be needed. By doing so, we avoid an
-      // unnecessary memcpy().
-      if (desired_freq_hz != -1 &&
-          current_sample_rate_hz_ != desired_freq_hz) {
-        ptr_audio_buffer = audio_buffer_;
-      }
-    }
   }
 
   // If |late_packets_sync_stream_| is allocated then we have been in AV-sync
@@ -381,17 +374,19 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
       return 0;
   }
 
+  // Accessing members, take the lock.
+  CriticalSectionScoped lock(crit_sect_.get());
+
+  // Always write the output to |audio_buffer_| first.
   if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
-                       ptr_audio_buffer,
+                       audio_buffer_.get(),
                        &samples_per_channel,
-                       &num_channels, &type) != NetEq::kOK) {
+                       &num_channels,
+                       &type) != NetEq::kOK) {
     LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "NetEq Failed.";
     return -1;
   }
 
-  // Accessing members, take the lock.
-  CriticalSectionScoped lock(crit_sect_.get());
-
   // Update NACK.
   int decoded_sequence_num = 0;
   uint32_t decoded_timestamp = 0;
@@ -409,45 +404,53 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
   bool need_resampling = (desired_freq_hz != -1) &&
       (current_sample_rate_hz_ != desired_freq_hz);
 
-  if (ptr_audio_buffer == audio_buffer_) {
-    // Data is written to local buffer.
-    if (need_resampling) {
-      samples_per_channel =
-          resampler_.Resample10Msec(audio_buffer_,
-                                    current_sample_rate_hz_,
-                                    desired_freq_hz,
-                                    num_channels,
-                                    AudioFrame::kMaxDataSizeSamples,
-                                    audio_frame->data_);
-      if (samples_per_channel < 0) {
-        LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "Resampler Failed.";
-        return -1;
-      }
-    } else {
-      // We might end up here ONLY if codec is changed.
-      memcpy(audio_frame->data_, audio_buffer_, samples_per_channel *
-             num_channels * sizeof(int16_t));
-    }
-  } else {
-    // Data is written into |audio_frame|.
-    if (need_resampling) {
-      // We might end up here ONLY if codec is changed.
-      samples_per_channel =
-          resampler_.Resample10Msec(audio_frame->data_,
-                                    current_sample_rate_hz_,
-                                    desired_freq_hz,
-                                    num_channels,
-                                    AudioFrame::kMaxDataSizeSamples,
-                                    audio_buffer_);
-      if (samples_per_channel < 0) {
-        LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "Resampler Failed.";
-        return -1;
-      }
-      memcpy(audio_frame->data_, audio_buffer_, samples_per_channel *
-             num_channels * sizeof(int16_t));
+  if (need_resampling && !resampled_last_output_frame_) {
+    // Prime the resampler with the last frame.
+    int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
+    samples_per_channel =
+        resampler_.Resample10Msec(last_audio_buffer_.get(),
+                                  current_sample_rate_hz_,
+                                  desired_freq_hz,
+                                  num_channels,
+                                  AudioFrame::kMaxDataSizeSamples,
+                                  temp_output);
+    if (samples_per_channel < 0) {
+      LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio")
+          << "Resampling last_audio_buffer_ failed.";
+      return -1;
     }
   }
 
+  // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
+  // through resampling, or through straight memcpy.
+  // TODO(henrik.lundin) Glitches in the output may appear if the output rate
+  // from NetEq changes. See WebRTC issue 3923.
+  if (need_resampling) {
+    samples_per_channel =
+        resampler_.Resample10Msec(audio_buffer_.get(),
+                                  current_sample_rate_hz_,
+                                  desired_freq_hz,
+                                  num_channels,
+                                  AudioFrame::kMaxDataSizeSamples,
+                                  audio_frame->data_);
+    if (samples_per_channel < 0) {
+      LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio")
+          << "Resampling audio_buffer_ failed.";
+      return -1;
+    }
+    resampled_last_output_frame_ = true;
+  } else {
+    resampled_last_output_frame_ = false;
+    // We might end up here ONLY if codec is changed.
+    memcpy(audio_frame->data_,
+           audio_buffer_.get(),
+           samples_per_channel * num_channels * sizeof(int16_t));
+  }
+
+  // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
+  // for next time.
+  audio_buffer_.swap(last_audio_buffer_);
+
   audio_frame->num_channels_ = num_channels;
   audio_frame->samples_per_channel_ = samples_per_channel;
   audio_frame->sample_rate_hz_ = samples_per_channel * 100;
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
index 94ea5b017a..6d31b9a5c8 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
@@ -334,7 +334,8 @@ class AcmReceiver {
   ACMResampler resampler_ GUARDED_BY(crit_sect_);
   // Used in GetAudio, declared as member to avoid allocating every 10ms.
   // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
-  int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
+  scoped_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
+  scoped_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
   scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
   bool nack_enabled_ GUARDED_BY(crit_sect_);
   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
@@ -342,6 +343,7 @@ class AcmReceiver {
   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
   bool vad_enabled_;
   Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
+  bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);
 
   // Indicates if a non-zero initial delay is set, and the receiver is in
   // AV-sync mode.
diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
index 7ffcac730f..d9ed32c1ec 100644
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
@@ -1034,7 +1034,7 @@ TEST_F(AcmSwitchingOutputFrequencyOldApi, TestWithoutToggling) {
   Run(16000, 16000, 1000);
 }
 
-TEST_F(AcmSwitchingOutputFrequencyOldApi, DISABLED_Toggle16KhzTo32Khz) {
+TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo32Khz) {
   Run(16000, 32000, 1000);
 }
 
@@ -1042,7 +1042,7 @@ TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle32KhzTo16Khz) {
   Run(32000, 16000, 1000);
 }
 
-TEST_F(AcmSwitchingOutputFrequencyOldApi, DISABLED_Toggle16KhzTo8Khz) {
+TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo8Khz) {
   Run(16000, 8000, 1000);
 }