From 5bd3397e53b96f73d1dcc120a50baf594aee3eb5 Mon Sep 17 00:00:00 2001 From: minyue Date: Mon, 2 May 2016 04:46:11 -0700 Subject: [PATCH] Adding 120 ms frame length support in NetEq. BUG=webrtc:1015 Review-Url: https://codereview.webrtc.org/1901633002 Cr-Commit-Position: refs/heads/master@{#12592} --- webrtc/modules/audio_coding/neteq/merge.cc | 31 ++- webrtc/modules/audio_coding/neteq/merge.h | 3 +- .../modules/audio_coding/neteq/neteq_impl.cc | 6 + .../modules/audio_coding/neteq/neteq_impl.h | 4 +- .../audio_coding/neteq/neteq_impl_unittest.cc | 214 +++++++++++++++++- .../audio_coding/neteq/normal_unittest.cc | 50 ++++ 6 files changed, 288 insertions(+), 20 deletions(-) diff --git a/webrtc/modules/audio_coding/neteq/merge.cc b/webrtc/modules/audio_coding/neteq/merge.cc index b62df61367..94db1129cd 100644 --- a/webrtc/modules/audio_coding/neteq/merge.cc +++ b/webrtc/modules/audio_coding/neteq/merge.cc @@ -39,6 +39,8 @@ Merge::Merge(int fs_hz, assert(num_channels_ > 0); } +Merge::~Merge() = default; + size_t Merge::Process(int16_t* input, size_t input_length, int16_t* external_mute_factor_array, AudioMultiVector* output) { @@ -91,9 +93,8 @@ size_t Merge::Process(int16_t* input, size_t input_length, old_length, input_length_per_channel, expand_period); } - static const int kTempDataSize = 3600; - int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. - int16_t* decoded_output = temp_data + best_correlation_index; + temp_data_.resize(input_length_per_channel + best_correlation_index); + int16_t* decoded_output = temp_data_.data() + best_correlation_index; // Mute the new decoded data if needed (and unmute it linearly). // This is the overlapping part of expanded_signal. @@ -127,7 +128,7 @@ size_t Merge::Process(int16_t* input, size_t input_length, int16_t increment = static_cast(16384 / (interpolation_length + 1)); // In Q14. int16_t mute_factor = 16384 - increment; - memmove(temp_data, expanded_channel, + memmove(temp_data_.data(), expanded_channel, sizeof(int16_t) * best_correlation_index); DspHelper::CrossFade(&expanded_channel[best_correlation_index], input_channel, interpolation_length, @@ -140,8 +141,8 @@ size_t Merge::Process(int16_t* input, size_t input_length, } else { assert(output->Size() == output_length); } - memcpy(&(*output)[channel][0], temp_data, - sizeof(temp_data[0]) * output_length); + memcpy(&(*output)[channel][0], temp_data_.data(), + sizeof(temp_data_[0]) * output_length); } // Copy back the first part of the data to |sync_buffer_| and remove it from @@ -208,22 +209,20 @@ int16_t Merge::SignalScaling(const int16_t* input, size_t input_length, std::min(static_cast(64 * fs_mult_), input_length); const int16_t expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); - const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); - - // Calculate energy of expanded signal. - // |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz. - int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_); - int expanded_shift = 6 + log_fs_mult - - WebRtcSpl_NormW32(expanded_max * expanded_max); - expanded_shift = std::max(expanded_shift, 0); + int32_t factor = (expanded_max * expanded_max) / + (std::numeric_limits::max() / + static_cast(mod_input_length)); + const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal, expanded_signal, mod_input_length, expanded_shift); // Calculate energy of input signal. - int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max); - input_shift = std::max(input_shift, 0); + const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); + factor = (input_max * input_max) / (std::numeric_limits::max() / + static_cast(mod_input_length)); + const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input, mod_input_length, input_shift); diff --git a/webrtc/modules/audio_coding/neteq/merge.h b/webrtc/modules/audio_coding/neteq/merge.h index 95dea5a885..48f09a1672 100644 --- a/webrtc/modules/audio_coding/neteq/merge.h +++ b/webrtc/modules/audio_coding/neteq/merge.h @@ -37,7 +37,7 @@ class Merge { size_t num_channels, Expand* expand, SyncBuffer* sync_buffer); - virtual ~Merge() {} + virtual ~Merge(); // The main method to produce the audio data. The decoded data is supplied in // |input|, having |input_length| samples in total for all channels @@ -93,6 +93,7 @@ class Merge { int16_t expanded_downsampled_[kExpandDownsampLength]; int16_t input_downsampled_[kInputDownsampLength]; AudioMultiVector expanded_; + std::vector temp_data_; RTC_DISALLOW_COPY_AND_ASSIGN(Merge); }; diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index ef1e6cbf4a..2eb22772ee 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -500,6 +500,11 @@ const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { return sync_buffer_.get(); } +Operations NetEqImpl::last_operation_for_test() const { + rtc::CritScope lock(&crit_sect_); + return last_operation_; +} + // Methods below this line are private. int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header, @@ -905,6 +910,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) { return kInvalidOperation; } } // End of switch. + last_operation_ = operation; if (return_value < 0) { return return_value; } diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h index 707fbebca7..a707f2565c 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h @@ -204,10 +204,11 @@ class NetEqImpl : public webrtc::NetEq { // This accessor method is only intended for testing purposes. const SyncBuffer* sync_buffer_for_test() const; + Operations last_operation_for_test() const; protected: static const int kOutputSizeMs = 10; - static const size_t kMaxFrameSize = 2880; // 60 ms @ 48 kHz. + static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. // TODO(hlundin): Provide a better value for kSyncBufferSize. static const size_t kSyncBufferSize = 2 * kMaxFrameSize; @@ -383,6 +384,7 @@ class NetEqImpl : public webrtc::NetEq { size_t output_size_samples_ GUARDED_BY(crit_sect_); size_t decoder_frame_length_ GUARDED_BY(crit_sect_); Modes last_mode_ GUARDED_BY(crit_sect_); + Operations last_operation_ GUARDED_BY(crit_sect_); std::unique_ptr mute_factor_array_ GUARDED_BY(crit_sect_); size_t decoded_buffer_length_ GUARDED_BY(crit_sect_); std::unique_ptr decoded_buffer_ GUARDED_BY(crit_sect_); diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc index 77622bc457..8b47adb9c5 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -763,7 +763,7 @@ TEST_F(NetEqImplTest, CodecInternalCng) { TEST_F(NetEqImplTest, UnsupportedDecoder) { UseNoMocks(); CreateInstance(); - static const size_t kNetEqMaxFrameSize = 2880; // 60 ms @ 48 kHz. + static const size_t kNetEqMaxFrameSize = 5760; // 120 ms @ 48 kHz. static const size_t kChannels = 2; const uint8_t kPayloadType = 17; // Just an arbitrary number. @@ -773,7 +773,7 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) { const size_t kPayloadLengthSamples = static_cast(10 * kSampleRateHz / 1000); // 10 ms. const size_t kPayloadLengthBytes = 1; - uint8_t payload[kPayloadLengthBytes]= {0}; + uint8_t payload[kPayloadLengthBytes] = {0}; int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0}; WebRtcRTPHeader rtp_header; rtp_header.header.payloadType = kPayloadType; @@ -1189,4 +1189,214 @@ TEST_F(NetEqImplTest, TickTimerIncrement) { EXPECT_EQ(1u, tick_timer_->ticks()); } +class Decoder120ms : public AudioDecoder { + public: + Decoder120ms(SpeechType speech_type) + : next_value_(1), + speech_type_(speech_type) {} + + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + size_t decoded_len = + rtc::CheckedDivExact(sample_rate_hz, 1000) * 120 * Channels(); + for (size_t i = 0; i < decoded_len; ++i) { + decoded[i] = next_value_++; + } + *speech_type = speech_type_; + return decoded_len; + } + + void Reset() override { next_value_ = 1; } + size_t Channels() const override { return 2; } + + private: + int16_t next_value_; + SpeechType speech_type_; +}; + +class NetEqImplTest120ms : public NetEqImplTest { + protected: + NetEqImplTest120ms() : NetEqImplTest() {} + virtual ~NetEqImplTest120ms() {} + + void CreateInstanceNoMocks() { + UseNoMocks(); + CreateInstance(); + } + + void CreateInstanceWithDelayManagerMock() { + UseNoMocks(); + use_mock_delay_manager_ = true; + CreateInstance(); + } + + uint32_t timestamp_diff_between_packets() const { + return rtc::CheckedDivExact(kSamplingFreq_, 1000u) * 120; + } + + uint32_t first_timestamp() const { return 10u; } + + void GetFirstPacket() { + for (int i = 0; i < 12; i++) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + } + } + + void InsertPacket(uint32_t timestamp) { + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = sequence_number_; + rtp_header.header.timestamp = timestamp; + rtp_header.header.ssrc = 15; + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + uint8_t payload[kPayloadLengthBytes] = {0}; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, 10)); + sequence_number_++; + } + + void Register120msCodec(AudioDecoder::SpeechType speech_type) { + decoder_.reset(new Decoder120ms(speech_type)); + ASSERT_EQ(2u, decoder_->Channels()); + EXPECT_EQ(NetEq::kOK, neteq_->RegisterExternalDecoder( + decoder_.get(), NetEqDecoder::kDecoderOpus_2ch, + "120ms codec", kPayloadType, kSamplingFreq_)); + } + + std::unique_ptr decoder_; + AudioFrame output_; + const uint32_t kPayloadType = 17; + const uint32_t kSamplingFreq_ = 48000; + uint16_t sequence_number_ = 1; +}; + +TEST_F(NetEqImplTest120ms, AudioRepetition) { + config_.playout_mode = kPlayoutFax; + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kAudioRepetition, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, AlternativePlc) { + config_.playout_mode = kPlayoutOff; + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kAlternativePlc, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, CodecInternalCng) { + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kComfortNoise); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kCodecInternalCng, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Normal) { + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(kNormal, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Merge) { + CreateInstanceWithDelayManagerMock(); + + Register120msCodec(AudioDecoder::kSpeech); + InsertPacket(first_timestamp()); + + GetFirstPacket(); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + + InsertPacket(first_timestamp() + 2 * timestamp_diff_between_packets()); + + // Delay manager reports a target level which should cause a Merge. + EXPECT_CALL(*mock_delay_manager_, TargetLevel()).WillOnce(Return(-10)); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kMerge, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Expand) { + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kExpand, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, FastAccelerate) { + CreateInstanceWithDelayManagerMock(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + // Delay manager report buffer limit which should cause a FastAccelerate. + EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _)) + .Times(1) + .WillOnce(DoAll(SetArgPointee<0>(0), SetArgPointee<1>(0))); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kFastAccelerate, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, PreemptiveExpand) { + CreateInstanceWithDelayManagerMock(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + // Delay manager report buffer limit which should cause a PreemptiveExpand. + EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _)) + .Times(1) + .WillOnce(DoAll(SetArgPointee<0>(100), SetArgPointee<1>(100))); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kPreemptiveExpand, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Accelerate) { + CreateInstanceWithDelayManagerMock(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + // Delay manager report buffer limit which should cause a Accelerate. + EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _)) + .Times(1) + .WillOnce(DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2))); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_)); + EXPECT_EQ(kAccelerate, neteq_->last_operation_for_test()); +} + }// namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/normal_unittest.cc b/webrtc/modules/audio_coding/neteq/normal_unittest.cc index f98e99a82d..5e1fc131e5 100644 --- a/webrtc/modules/audio_coding/neteq/normal_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/normal_unittest.cc @@ -27,9 +27,20 @@ #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" using ::testing::_; +using ::testing::Invoke; namespace webrtc { +namespace { + +int ExpandProcess120ms(AudioMultiVector* output) { + AudioMultiVector dummy_audio(1, 11520u); + dummy_audio.CopyTo(output); + return 0; +} + +} // namespace + TEST(Normal, CreateAndDestroy) { MockDecoderDatabase db; int fs = 8000; @@ -121,6 +132,45 @@ TEST(Normal, InputLengthAndChannelsDoNotMatch) { EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope. } +TEST(Normal, LastModeExpand120msPacket) { + WebRtcSpl_Init(); + MockDecoderDatabase db; + const int kFs = 48000; + const size_t kPacketsizeBytes = 11520u; + const size_t kChannels = 1; + BackgroundNoise bgn(kChannels); + SyncBuffer sync_buffer(kChannels, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs, + kChannels); + Normal normal(kFs, &db, bgn, &expand); + + int16_t input[kPacketsizeBytes] = {0}; + + std::unique_ptr mute_factor_array(new int16_t[kChannels]); + for (size_t i = 0; i < kChannels; ++i) { + mute_factor_array[i] = 16384; + } + + AudioMultiVector output(kChannels); + + EXPECT_CALL(expand, SetParametersForNormalAfterExpand()); + EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms)); + EXPECT_CALL(expand, Reset()); + EXPECT_EQ(static_cast(kPacketsizeBytes), + normal.Process(input, + kPacketsizeBytes, + kModeExpand, + mute_factor_array.get(), + &output)); + + EXPECT_EQ(kPacketsizeBytes, output.Size()); + + EXPECT_CALL(db, Die()); // Called when |db| goes out of scope. + EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope. +} + // TODO(hlundin): Write more tests. } // namespace webrtc