From 5bd3397e53b96f73d1dcc120a50baf594aee3eb5 Mon Sep 17 00:00:00 2001
From: minyue <minyue@webrtc.org>
Date: Mon, 2 May 2016 04:46:11 -0700
Subject: [PATCH] Adding 120 ms frame length support in NetEq.

BUG=webrtc:1015

Review-Url: https://codereview.webrtc.org/1901633002
Cr-Commit-Position: refs/heads/master@{#12592}
---
 webrtc/modules/audio_coding/neteq/merge.cc    |  31 ++-
 webrtc/modules/audio_coding/neteq/merge.h     |   3 +-
 .../modules/audio_coding/neteq/neteq_impl.cc  |   6 +
 .../modules/audio_coding/neteq/neteq_impl.h   |   4 +-
 .../audio_coding/neteq/neteq_impl_unittest.cc | 214 +++++++++++++++++-
 .../audio_coding/neteq/normal_unittest.cc     |  50 ++++
 6 files changed, 288 insertions(+), 20 deletions(-)

diff --git a/webrtc/modules/audio_coding/neteq/merge.cc b/webrtc/modules/audio_coding/neteq/merge.cc
index b62df61367..94db1129cd 100644
--- a/webrtc/modules/audio_coding/neteq/merge.cc
+++ b/webrtc/modules/audio_coding/neteq/merge.cc
@@ -39,6 +39,8 @@ Merge::Merge(int fs_hz,
   assert(num_channels_ > 0);
 }
 
+Merge::~Merge() = default;
+
 size_t Merge::Process(int16_t* input, size_t input_length,
                       int16_t* external_mute_factor_array,
                       AudioMultiVector* output) {
@@ -91,9 +93,8 @@ size_t Merge::Process(int16_t* input, size_t input_length,
           old_length, input_length_per_channel, expand_period);
     }
 
-    static const int kTempDataSize = 3600;
-    int16_t temp_data[kTempDataSize];  // TODO(hlundin) Remove this.
-    int16_t* decoded_output = temp_data + best_correlation_index;
+    temp_data_.resize(input_length_per_channel + best_correlation_index);
+    int16_t* decoded_output = temp_data_.data() + best_correlation_index;
 
     // Mute the new decoded data if needed (and unmute it linearly).
     // This is the overlapping part of expanded_signal.
@@ -127,7 +128,7 @@ size_t Merge::Process(int16_t* input, size_t input_length,
     int16_t increment =
         static_cast<int16_t>(16384 / (interpolation_length + 1));  // In Q14.
     int16_t mute_factor = 16384 - increment;
-    memmove(temp_data, expanded_channel,
+    memmove(temp_data_.data(), expanded_channel,
             sizeof(int16_t) * best_correlation_index);
     DspHelper::CrossFade(&expanded_channel[best_correlation_index],
                          input_channel, interpolation_length,
@@ -140,8 +141,8 @@ size_t Merge::Process(int16_t* input, size_t input_length,
     } else {
       assert(output->Size() == output_length);
     }
-    memcpy(&(*output)[channel][0], temp_data,
-           sizeof(temp_data[0]) * output_length);
+    memcpy(&(*output)[channel][0], temp_data_.data(),
+           sizeof(temp_data_[0]) * output_length);
   }
 
   // Copy back the first part of the data to |sync_buffer_| and remove it from
@@ -208,22 +209,20 @@ int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
       std::min(static_cast<size_t>(64 * fs_mult_), input_length);
   const int16_t expanded_max =
       WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
-  const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
-
-  // Calculate energy of expanded signal.
-  // |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz.
-  int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);
-  int expanded_shift = 6 + log_fs_mult
-      - WebRtcSpl_NormW32(expanded_max * expanded_max);
-  expanded_shift = std::max(expanded_shift, 0);
+  int32_t factor = (expanded_max * expanded_max) /
+      (std::numeric_limits<int32_t>::max() /
+          static_cast<int32_t>(mod_input_length));
+  const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
   int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
                                                           expanded_signal,
                                                           mod_input_length,
                                                           expanded_shift);
 
   // Calculate energy of input signal.
-  int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max);
-  input_shift = std::max(input_shift, 0);
+  const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
+  factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /
+      static_cast<int32_t>(mod_input_length));
+  const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
   int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
                                                        mod_input_length,
                                                        input_shift);
diff --git a/webrtc/modules/audio_coding/neteq/merge.h b/webrtc/modules/audio_coding/neteq/merge.h
index 95dea5a885..48f09a1672 100644
--- a/webrtc/modules/audio_coding/neteq/merge.h
+++ b/webrtc/modules/audio_coding/neteq/merge.h
@@ -37,7 +37,7 @@ class Merge {
         size_t num_channels,
         Expand* expand,
         SyncBuffer* sync_buffer);
-  virtual ~Merge() {}
+  virtual ~Merge();
 
   // The main method to produce the audio data. The decoded data is supplied in
   // |input|, having |input_length| samples in total for all channels
@@ -93,6 +93,7 @@ class Merge {
   int16_t expanded_downsampled_[kExpandDownsampLength];
   int16_t input_downsampled_[kInputDownsampLength];
   AudioMultiVector expanded_;
+  std::vector<int16_t> temp_data_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(Merge);
 };
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index ef1e6cbf4a..2eb22772ee 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -500,6 +500,11 @@ const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
   return sync_buffer_.get();
 }
 
+Operations NetEqImpl::last_operation_for_test() const {
+  rtc::CritScope lock(&crit_sect_);
+  return last_operation_;
+}
+
 // Methods below this line are private.
 
 int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
@@ -905,6 +910,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) {
       return kInvalidOperation;
     }
   }  // End of switch.
+  last_operation_ = operation;
   if (return_value < 0) {
     return return_value;
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index 707fbebca7..a707f2565c 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -204,10 +204,11 @@ class NetEqImpl : public webrtc::NetEq {
 
   // This accessor method is only intended for testing purposes.
   const SyncBuffer* sync_buffer_for_test() const;
+  Operations last_operation_for_test() const;
 
  protected:
   static const int kOutputSizeMs = 10;
-  static const size_t kMaxFrameSize = 2880;  // 60 ms @ 48 kHz.
+  static const size_t kMaxFrameSize = 5760;  // 120 ms @ 48 kHz.
   // TODO(hlundin): Provide a better value for kSyncBufferSize.
   static const size_t kSyncBufferSize = 2 * kMaxFrameSize;
 
@@ -383,6 +384,7 @@ class NetEqImpl : public webrtc::NetEq {
   size_t output_size_samples_ GUARDED_BY(crit_sect_);
   size_t decoder_frame_length_ GUARDED_BY(crit_sect_);
   Modes last_mode_ GUARDED_BY(crit_sect_);
+  Operations last_operation_ GUARDED_BY(crit_sect_);
   std::unique_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_);
   size_t decoded_buffer_length_ GUARDED_BY(crit_sect_);
   std::unique_ptr<int16_t[]> decoded_buffer_ GUARDED_BY(crit_sect_);
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index 77622bc457..8b47adb9c5 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -763,7 +763,7 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
 TEST_F(NetEqImplTest, UnsupportedDecoder) {
   UseNoMocks();
   CreateInstance();
-  static const size_t kNetEqMaxFrameSize = 2880;  // 60 ms @ 48 kHz.
+  static const size_t kNetEqMaxFrameSize = 5760;  // 120 ms @ 48 kHz.
   static const size_t kChannels = 2;
 
   const uint8_t kPayloadType = 17;   // Just an arbitrary number.
@@ -773,7 +773,7 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
   const size_t kPayloadLengthSamples =
       static_cast<size_t>(10 * kSampleRateHz / 1000);  // 10 ms.
   const size_t kPayloadLengthBytes = 1;
-  uint8_t payload[kPayloadLengthBytes]= {0};
+  uint8_t payload[kPayloadLengthBytes] = {0};
   int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0};
   WebRtcRTPHeader rtp_header;
   rtp_header.header.payloadType = kPayloadType;
@@ -1189,4 +1189,214 @@ TEST_F(NetEqImplTest, TickTimerIncrement) {
   EXPECT_EQ(1u, tick_timer_->ticks());
 }
 
+class Decoder120ms : public AudioDecoder {
+ public:
+  Decoder120ms(SpeechType speech_type)
+      : next_value_(1),
+        speech_type_(speech_type) {}
+
+  int DecodeInternal(const uint8_t* encoded,
+                     size_t encoded_len,
+                     int sample_rate_hz,
+                     int16_t* decoded,
+                     SpeechType* speech_type) override {
+    size_t decoded_len =
+        rtc::CheckedDivExact(sample_rate_hz, 1000) * 120 * Channels();
+    for (size_t i = 0; i < decoded_len; ++i) {
+      decoded[i] = next_value_++;
+    }
+    *speech_type = speech_type_;
+    return decoded_len;
+  }
+
+  void Reset() override { next_value_ = 1; }
+  size_t Channels() const override { return 2; }
+
+ private:
+  int16_t next_value_;
+  SpeechType speech_type_;
+};
+
+class NetEqImplTest120ms : public NetEqImplTest {
+ protected:
+  NetEqImplTest120ms() : NetEqImplTest() {}
+  virtual ~NetEqImplTest120ms() {}
+
+  void CreateInstanceNoMocks() {
+    UseNoMocks();
+    CreateInstance();
+  }
+
+  void CreateInstanceWithDelayManagerMock() {
+    UseNoMocks();
+    use_mock_delay_manager_ = true;
+    CreateInstance();
+  }
+
+  uint32_t timestamp_diff_between_packets() const {
+    return rtc::CheckedDivExact(kSamplingFreq_, 1000u) * 120;
+  }
+
+  uint32_t first_timestamp() const { return 10u; }
+
+  void GetFirstPacket() {
+    for (int i = 0; i < 12; i++) {
+      EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+    }
+  }
+
+  void InsertPacket(uint32_t timestamp) {
+    WebRtcRTPHeader rtp_header;
+    rtp_header.header.payloadType = kPayloadType;
+    rtp_header.header.sequenceNumber = sequence_number_;
+    rtp_header.header.timestamp = timestamp;
+    rtp_header.header.ssrc = 15;
+    const size_t kPayloadLengthBytes = 1;  // This can be arbitrary.
+    uint8_t payload[kPayloadLengthBytes] = {0};
+    EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, 10));
+    sequence_number_++;
+  }
+
+  void Register120msCodec(AudioDecoder::SpeechType speech_type) {
+    decoder_.reset(new Decoder120ms(speech_type));
+    ASSERT_EQ(2u, decoder_->Channels());
+    EXPECT_EQ(NetEq::kOK, neteq_->RegisterExternalDecoder(
+                              decoder_.get(), NetEqDecoder::kDecoderOpus_2ch,
+                              "120ms codec", kPayloadType, kSamplingFreq_));
+  }
+
+  std::unique_ptr<Decoder120ms> decoder_;
+  AudioFrame output_;
+  const uint32_t kPayloadType = 17;
+  const uint32_t kSamplingFreq_ = 48000;
+  uint16_t sequence_number_ = 1;
+};
+
+TEST_F(NetEqImplTest120ms, AudioRepetition) {
+  config_.playout_mode = kPlayoutFax;
+  CreateInstanceNoMocks();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kAudioRepetition, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, AlternativePlc) {
+  config_.playout_mode = kPlayoutOff;
+  CreateInstanceNoMocks();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kAlternativePlc, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, CodecInternalCng) {
+  CreateInstanceNoMocks();
+  Register120msCodec(AudioDecoder::kComfortNoise);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kCodecInternalCng, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, Normal) {
+  CreateInstanceNoMocks();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  EXPECT_EQ(kNormal, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, Merge) {
+  CreateInstanceWithDelayManagerMock();
+
+  Register120msCodec(AudioDecoder::kSpeech);
+  InsertPacket(first_timestamp());
+
+  GetFirstPacket();
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+
+  InsertPacket(first_timestamp() + 2 * timestamp_diff_between_packets());
+
+  // Delay manager reports a target level which should cause a Merge.
+  EXPECT_CALL(*mock_delay_manager_, TargetLevel()).WillOnce(Return(-10));
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kMerge, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, Expand) {
+  CreateInstanceNoMocks();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kExpand, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, FastAccelerate) {
+  CreateInstanceWithDelayManagerMock();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+  InsertPacket(first_timestamp() + timestamp_diff_between_packets());
+
+  // Delay manager report buffer limit which should cause a FastAccelerate.
+  EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
+      .Times(1)
+      .WillOnce(DoAll(SetArgPointee<0>(0), SetArgPointee<1>(0)));
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kFastAccelerate, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, PreemptiveExpand) {
+  CreateInstanceWithDelayManagerMock();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  InsertPacket(first_timestamp() + timestamp_diff_between_packets());
+
+  // Delay manager report buffer limit which should cause a PreemptiveExpand.
+  EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
+      .Times(1)
+      .WillOnce(DoAll(SetArgPointee<0>(100), SetArgPointee<1>(100)));
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kPreemptiveExpand, neteq_->last_operation_for_test());
+}
+
+TEST_F(NetEqImplTest120ms, Accelerate) {
+  CreateInstanceWithDelayManagerMock();
+  Register120msCodec(AudioDecoder::kSpeech);
+
+  InsertPacket(first_timestamp());
+  GetFirstPacket();
+
+  InsertPacket(first_timestamp() + timestamp_diff_between_packets());
+
+  // Delay manager report buffer limit which should cause a Accelerate.
+  EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
+      .Times(1)
+      .WillOnce(DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2)));
+
+  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
+  EXPECT_EQ(kAccelerate, neteq_->last_operation_for_test());
+}
+
 }// namespace webrtc
diff --git a/webrtc/modules/audio_coding/neteq/normal_unittest.cc b/webrtc/modules/audio_coding/neteq/normal_unittest.cc
index f98e99a82d..5e1fc131e5 100644
--- a/webrtc/modules/audio_coding/neteq/normal_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/normal_unittest.cc
@@ -27,9 +27,20 @@
 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
 
 using ::testing::_;
+using ::testing::Invoke;
 
 namespace webrtc {
 
+namespace {
+
+int ExpandProcess120ms(AudioMultiVector* output) {
+  AudioMultiVector dummy_audio(1, 11520u);
+  dummy_audio.CopyTo(output);
+  return 0;
+}
+
+} // namespace
+
 TEST(Normal, CreateAndDestroy) {
   MockDecoderDatabase db;
   int fs = 8000;
@@ -121,6 +132,45 @@ TEST(Normal, InputLengthAndChannelsDoNotMatch) {
   EXPECT_CALL(expand, Die());  // Called when |expand| goes out of scope.
 }
 
+TEST(Normal, LastModeExpand120msPacket) {
+  WebRtcSpl_Init();
+  MockDecoderDatabase db;
+  const int kFs = 48000;
+  const size_t kPacketsizeBytes = 11520u;
+  const size_t kChannels = 1;
+  BackgroundNoise bgn(kChannels);
+  SyncBuffer sync_buffer(kChannels, 1000);
+  RandomVector random_vector;
+  StatisticsCalculator statistics;
+  MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
+                    kChannels);
+  Normal normal(kFs, &db, bgn, &expand);
+
+  int16_t input[kPacketsizeBytes] = {0};
+
+  std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[kChannels]);
+  for (size_t i = 0; i < kChannels; ++i) {
+    mute_factor_array[i] = 16384;
+  }
+
+  AudioMultiVector output(kChannels);
+
+  EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
+  EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms));
+  EXPECT_CALL(expand, Reset());
+  EXPECT_EQ(static_cast<int>(kPacketsizeBytes),
+            normal.Process(input,
+                           kPacketsizeBytes,
+                           kModeExpand,
+                           mute_factor_array.get(),
+                           &output));
+
+  EXPECT_EQ(kPacketsizeBytes, output.Size());
+
+  EXPECT_CALL(db, Die());      // Called when |db| goes out of scope.
+  EXPECT_CALL(expand, Die());  // Called when |expand| goes out of scope.
+}
+
 // TODO(hlundin): Write more tests.
 
 }  // namespace webrtc