diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index 38cce73f6b..fb587c598d 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1057,6 +1057,10 @@ rtc_static_library("neteq") { "neteq/cross_correlation.h", "neteq/decision_logic.cc", "neteq/decision_logic.h", + "neteq/decision_logic_fax.cc", + "neteq/decision_logic_fax.h", + "neteq/decision_logic_normal.cc", + "neteq/decision_logic_normal.h", "neteq/decoder_database.cc", "neteq/decoder_database.h", "neteq/defines.h", diff --git a/modules/audio_coding/acm2/acm_receiver_unittest.cc b/modules/audio_coding/acm2/acm_receiver_unittest.cc index 457ea1d4d2..d1cff235c2 100644 --- a/modules/audio_coding/acm2/acm_receiver_unittest.cc +++ b/modules/audio_coding/acm2/acm_receiver_unittest.cc @@ -292,7 +292,7 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { protected: AcmReceiverTestFaxModeOldApi() { - config_.neteq_config.for_test_no_time_stretching = true; + config_.neteq_config.playout_mode = kPlayoutFax; } void RunVerifyAudioFrame(RentACodec::CodecId codec_id) { @@ -301,7 +301,7 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { // timestamp increments predictable; in normal mode, NetEq may decide to do // accelerate or pre-emptive expand operations after some time, offsetting // the timestamp. - EXPECT_TRUE(config_.neteq_config.for_test_no_time_stretching); + EXPECT_EQ(kPlayoutFax, config_.neteq_config.playout_mode); const RentACodec::CodecId kCodecId[] = {codec_id}; AddSetOfCodecs(kCodecId); diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc index afe25e1a20..cc58f04781 100644 --- a/modules/audio_coding/neteq/decision_logic.cc +++ b/modules/audio_coding/neteq/decision_logic.cc @@ -10,37 +10,47 @@ #include "modules/audio_coding/neteq/decision_logic.h" -#include #include -#include #include "modules/audio_coding/neteq/buffer_level_filter.h" -#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/decision_logic_fax.h" +#include "modules/audio_coding/neteq/decision_logic_normal.h" #include "modules/audio_coding/neteq/delay_manager.h" #include "modules/audio_coding/neteq/expand.h" #include "modules/audio_coding/neteq/packet_buffer.h" #include "modules/audio_coding/neteq/sync_buffer.h" #include "modules/include/module_common_types.h" -#include "system_wrappers/include/field_trial.h" namespace webrtc { DecisionLogic* DecisionLogic::Create(int fs_hz, size_t output_size_samples, - bool disallow_time_stretching, + NetEqPlayoutMode playout_mode, DecoderDatabase* decoder_database, const PacketBuffer& packet_buffer, DelayManager* delay_manager, BufferLevelFilter* buffer_level_filter, const TickTimer* tick_timer) { - return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching, - decoder_database, packet_buffer, delay_manager, - buffer_level_filter, tick_timer); + switch (playout_mode) { + case kPlayoutOn: + case kPlayoutStreaming: + return new DecisionLogicNormal( + fs_hz, output_size_samples, playout_mode, decoder_database, + packet_buffer, delay_manager, buffer_level_filter, tick_timer); + case kPlayoutFax: + case kPlayoutOff: + return new DecisionLogicFax( + fs_hz, output_size_samples, playout_mode, decoder_database, + packet_buffer, delay_manager, buffer_level_filter, tick_timer); + } + // This line cannot be reached, but must be here to avoid compiler errors. + assert(false); + return NULL; } DecisionLogic::DecisionLogic(int fs_hz, size_t output_size_samples, - bool disallow_time_stretching, + NetEqPlayoutMode playout_mode, DecoderDatabase* decoder_database, const PacketBuffer& packet_buffer, DelayManager* delay_manager, @@ -55,13 +65,11 @@ DecisionLogic::DecisionLogic(int fs_hz, packet_length_samples_(0), sample_memory_(0), prev_time_scale_(false), - disallow_time_stretching_(disallow_time_stretching), timescale_countdown_( tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), num_consecutive_expands_(0), - postpone_decoding_after_expand_(field_trial::IsEnabled( - "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) { - delay_manager_->set_streaming_mode(false); + playout_mode_(playout_mode) { + delay_manager_->set_streaming_mode(playout_mode_ == kPlayoutStreaming); SetSampleRate(fs_hz, output_size_samples); } @@ -160,228 +168,4 @@ void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples, } } -Operations DecisionLogic::GetDecisionSpecialized(const SyncBuffer& sync_buffer, - const Expand& expand, - size_t decoder_frame_length, - const Packet* next_packet, - Modes prev_mode, - bool play_dtmf, - bool* reset_decoder, - size_t generated_noise_samples, - size_t cur_size_samples) { - // Guard for errors, to avoid getting stuck in error mode. - if (prev_mode == kModeError) { - if (!next_packet) { - return kExpand; - } else { - return kUndefined; // Use kUndefined to flag for a reset. - } - } - - uint32_t target_timestamp = sync_buffer.end_timestamp(); - uint32_t available_timestamp = 0; - bool is_cng_packet = false; - if (next_packet) { - available_timestamp = next_packet->timestamp; - is_cng_packet = - decoder_database_->IsComfortNoise(next_packet->payload_type); - } - - if (is_cng_packet) { - return CngOperation(prev_mode, target_timestamp, available_timestamp, - generated_noise_samples); - } - - // Handle the case with no packet at all available (except maybe DTMF). - if (!next_packet) { - return NoPacket(play_dtmf); - } - - // If the expand period was very long, reset NetEQ since it is likely that the - // sender was restarted. - if (num_consecutive_expands_ > kReinitAfterExpands) { - *reset_decoder = true; - return kNormal; - } - - // Make sure we don't restart audio too soon after an expansion to avoid - // running out of data right away again. We should only wait if there are no - // DTX or CNG packets in the buffer (otherwise we should just play out what we - // have, since we cannot know the exact duration of DTX or CNG packets), and - // if the mute factor is low enough (otherwise the expansion was short enough - // to not be noticable). - // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. - if (postpone_decoding_after_expand_ && prev_mode == kModeExpand && - !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) && - cur_size_samples(delay_manager_->TargetLevel() * - packet_length_samples_)>> 8 && - expand.MuteFactor(0) < 16384 / 2) { - return kExpand; - } - - const uint32_t five_seconds_samples = - static_cast(5 * 8000 * fs_mult_); - // Check if the required packet is available. - if (target_timestamp == available_timestamp) { - return ExpectedPacketAvailable(prev_mode, play_dtmf); - } else if (!PacketBuffer::IsObsoleteTimestamp( - available_timestamp, target_timestamp, five_seconds_samples)) { - return FuturePacketAvailable( - sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp, - available_timestamp, play_dtmf, generated_noise_samples); - } else { - // This implies that available_timestamp < target_timestamp, which can - // happen when a new stream or codec is received. Signal for a reset. - return kUndefined; - } -} - -Operations DecisionLogic::CngOperation(Modes prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - size_t generated_noise_samples) { - // Signed difference between target and available timestamp. - int32_t timestamp_diff = static_cast( - static_cast(generated_noise_samples + target_timestamp) - - available_timestamp); - int32_t optimal_level_samp = static_cast( - (delay_manager_->TargetLevel() * packet_length_samples_) >> 8); - const int64_t excess_waiting_time_samp = - -static_cast(timestamp_diff) - optimal_level_samp; - - if (excess_waiting_time_samp > optimal_level_samp / 2) { - // The waiting time for this packet will be longer than 1.5 - // times the wanted buffer delay. Apply fast-forward to cut the - // waiting time down to the optimal. - noise_fast_forward_ = rtc::dchecked_cast(noise_fast_forward_ + - excess_waiting_time_samp); - timestamp_diff = - rtc::saturated_cast(timestamp_diff + excess_waiting_time_samp); - } - - if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) { - // Not time to play this packet yet. Wait another round before using this - // packet. Keep on playing CNG from previous CNG parameters. - return kRfc3389CngNoPacket; - } else { - // Otherwise, go for the CNG packet now. - noise_fast_forward_ = 0; - return kRfc3389Cng; - } -} - -Operations DecisionLogic::NoPacket(bool play_dtmf) { - if (cng_state_ == kCngRfc3389On) { - // Keep on playing comfort noise. - return kRfc3389CngNoPacket; - } else if (cng_state_ == kCngInternalOn) { - // Keep on playing codec internal comfort noise. - return kCodecInternalCng; - } else if (play_dtmf) { - return kDtmf; - } else { - // Nothing to play, do expand. - return kExpand; - } -} - -Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode, - bool play_dtmf) { - if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) { - // Check criterion for time-stretching. - int low_limit, high_limit; - delay_manager_->BufferLimits(&low_limit, &high_limit); - if (buffer_level_filter_->filtered_current_level() >= high_limit << 2) - return kFastAccelerate; - if (TimescaleAllowed()) { - if (buffer_level_filter_->filtered_current_level() >= high_limit) - return kAccelerate; - if (buffer_level_filter_->filtered_current_level() < low_limit) - return kPreemptiveExpand; - } - } - return kNormal; -} - -Operations DecisionLogic::FuturePacketAvailable( - const SyncBuffer& sync_buffer, - const Expand& expand, - size_t decoder_frame_length, - Modes prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - bool play_dtmf, - size_t generated_noise_samples) { - // Required packet is not available, but a future packet is. - // Check if we should continue with an ongoing expand because the new packet - // is too far into the future. - uint32_t timestamp_leap = available_timestamp - target_timestamp; - if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) && - !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) && - UnderTargetLevel()) { - if (play_dtmf) { - // Still have DTMF to play, so do not do expand. - return kDtmf; - } else { - // Nothing to play. - return kExpand; - } - } - - const size_t samples_left = - sync_buffer.FutureLength() - expand.overlap_length(); - const size_t cur_size_samples = - samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length; - - // If previous was comfort noise, then no merge is needed. - if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) { - // Keep the same delay as before the CNG, but make sure that the number of - // samples in buffer is no higher than 4 times the optimal level. (Note that - // TargetLevel() is in Q8.) - if (static_cast(generated_noise_samples + target_timestamp) >= - available_timestamp || - cur_size_samples > - ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) * - 4) { - // Time to play this new packet. - return kNormal; - } else { - // Too early to play this new packet; keep on playing comfort noise. - if (prev_mode == kModeRfc3389Cng) { - return kRfc3389CngNoPacket; - } else { // prevPlayMode == kModeCodecInternalCng. - return kCodecInternalCng; - } - } - } - // Do not merge unless we have done an expand before. - if (prev_mode == kModeExpand) { - return kMerge; - } else if (play_dtmf) { - // Play DTMF instead of expand. - return kDtmf; - } else { - return kExpand; - } -} - -bool DecisionLogic::UnderTargetLevel() const { - return buffer_level_filter_->filtered_current_level() <= - delay_manager_->TargetLevel(); -} - -bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { - return timestamp_leap >= - static_cast(output_size_samples_ * kReinitAfterExpands); -} - -bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { - return timestamp_leap > - static_cast(output_size_samples_ * num_consecutive_expands_); -} - -bool DecisionLogic::MaxWaitForPacket() const { - return num_consecutive_expands_ >= kMaxWaitForPacket; -} - } // namespace webrtc diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h index 49141236d3..d23aa744c3 100644 --- a/modules/audio_coding/neteq/decision_logic.h +++ b/modules/audio_coding/neteq/decision_logic.h @@ -28,34 +28,32 @@ class PacketBuffer; class SyncBuffer; struct Packet; -// This is the class for the decision tree implementation. +// This is the base class for the decision tree implementations. Derived classes +// must implement the method GetDecisionSpecialized(). class DecisionLogic { public: // Static factory function which creates different types of objects depending // on the |playout_mode|. static DecisionLogic* Create(int fs_hz, size_t output_size_samples, - bool disallow_time_stretching, + NetEqPlayoutMode playout_mode, DecoderDatabase* decoder_database, const PacketBuffer& packet_buffer, DelayManager* delay_manager, BufferLevelFilter* buffer_level_filter, const TickTimer* tick_timer); - static const int kReinitAfterExpands = 100; - static const int kMaxWaitForPacket = 10; - // Constructor. DecisionLogic(int fs_hz, size_t output_size_samples, - bool disallow_time_stretching, + NetEqPlayoutMode playout_mode, DecoderDatabase* decoder_database, const PacketBuffer& packet_buffer, DelayManager* delay_manager, BufferLevelFilter* buffer_level_filter, const TickTimer* tick_timer); - ~DecisionLogic(); + virtual ~DecisionLogic(); // Resets object to a clean state. void Reset(); @@ -96,7 +94,7 @@ class DecisionLogic { // not. Note that this is necessary, since an expand decision can be changed // to kNormal in NetEqImpl::GetDecision if there is still enough data in the // sync buffer. - void ExpandDecision(Operations operation); + virtual void ExpandDecision(Operations operation); // Adds |value| to |sample_memory_|. void AddSampleMemory(int32_t value) { sample_memory_ += value; } @@ -109,17 +107,14 @@ class DecisionLogic { packet_length_samples_ = value; } void set_prev_time_scale(bool value) { prev_time_scale_ = value; } + NetEqPlayoutMode playout_mode() const { return playout_mode_; } - private: + protected: // The value 5 sets maximum time-stretch rate to about 100 ms/s. static const int kMinTimescaleInterval = 5; enum CngState { kCngOff, kCngRfc3389On, kCngInternalOn }; - // Updates the |buffer_level_filter_| with the current buffer level - // |buffer_size_packets|. - void FilterBufferLevel(size_t buffer_size_packets, Modes prev_mode); - // Returns the operation that should be done next. |sync_buffer| and |expand| // are provided for reference. |decoder_frame_length| is the number of samples // obtained from the last decoded frame. If there is a packet available, it @@ -128,63 +123,20 @@ class DecisionLogic { // |prev_mode|. If there is a DTMF event to play, |play_dtmf| should be set to // true. The output variable |reset_decoder| will be set to true if a reset is // required; otherwise it is left unchanged (i.e., it can remain true if it - // was true before the call). - // TODO(henrik.lundin) Fold this method into GetDecision. - Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, - const Expand& expand, - size_t decoder_frame_length, - const Packet* next_packet, - Modes prev_mode, - bool play_dtmf, - bool* reset_decoder, - size_t generated_noise_samples, - size_t cur_size_samples); + // was true before the call). Should be implemented by derived classes. + virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + const Packet* next_packet, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder, + size_t generated_noise_samples, + size_t cur_size_samples) = 0; - // Returns the operation given that the next available packet is a comfort - // noise payload (RFC 3389 only, not codec-internal). - Operations CngOperation(Modes prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - size_t generated_noise_samples); - - // Returns the operation given that no packets are available (except maybe - // a DTMF event, flagged by setting |play_dtmf| true). - Operations NoPacket(bool play_dtmf); - - // Returns the operation to do given that the expected packet is available. - Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); - - // Returns the operation to do given that the expected packet is not - // available, but a packet further into the future is at hand. - Operations FuturePacketAvailable(const SyncBuffer& sync_buffer, - const Expand& expand, - size_t decoder_frame_length, - Modes prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - bool play_dtmf, - size_t generated_noise_samples); - - // Checks if enough time has elapsed since the last successful timescale - // operation was done (i.e., accelerate or preemptive expand). - bool TimescaleAllowed() const { - return !timescale_countdown_ || timescale_countdown_->Finished(); - } - - // Checks if the current (filtered) buffer level is under the target level. - bool UnderTargetLevel() const; - - // Checks if |timestamp_leap| is so long into the future that a reset due - // to exceeding kReinitAfterExpands will be done. - bool ReinitAfterExpands(uint32_t timestamp_leap) const; - - // Checks if we still have not done enough expands to cover the distance from - // the last decoded packet to the next available packet, the distance beeing - // conveyed in |timestamp_leap|. - bool PacketTooEarly(uint32_t timestamp_leap) const; - - // Checks if num_consecutive_expands_ >= kMaxWaitForPacket. - bool MaxWaitForPacket() const; + // Updates the |buffer_level_filter_| with the current buffer level + // |buffer_size_packets|. + void FilterBufferLevel(size_t buffer_size_packets, Modes prev_mode); DecoderDatabase* decoder_database_; const PacketBuffer& packet_buffer_; @@ -199,11 +151,11 @@ class DecisionLogic { size_t packet_length_samples_; int sample_memory_; bool prev_time_scale_; - bool disallow_time_stretching_; std::unique_ptr timescale_countdown_; int num_consecutive_expands_; - const bool postpone_decoding_after_expand_; + const NetEqPlayoutMode playout_mode_; + private: RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogic); }; diff --git a/modules/audio_coding/neteq/decision_logic_fax.cc b/modules/audio_coding/neteq/decision_logic_fax.cc new file mode 100644 index 0000000000..0f904bb531 --- /dev/null +++ b/modules/audio_coding/neteq/decision_logic_fax.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decision_logic_fax.h" + +#include + +#include + +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/sync_buffer.h" + +namespace webrtc { + +Operations DecisionLogicFax::GetDecisionSpecialized( + const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + const Packet* next_packet, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder, + size_t generated_noise_samples, + size_t /*cur_size_samples*/) { + assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff); + uint32_t target_timestamp = sync_buffer.end_timestamp(); + uint32_t available_timestamp = 0; + int is_cng_packet = 0; + if (next_packet) { + available_timestamp = next_packet->timestamp; + is_cng_packet = + decoder_database_->IsComfortNoise(next_packet->payload_type); + } + if (is_cng_packet) { + if (static_cast((generated_noise_samples + target_timestamp) - + available_timestamp) >= 0) { + // Time to play this packet now. + return kRfc3389Cng; + } else { + // Wait before playing this packet. + return kRfc3389CngNoPacket; + } + } + if (!next_packet) { + // No packet. If in CNG mode, play as usual. Otherwise, use other method to + // generate data. + if (cng_state_ == kCngRfc3389On) { + // Continue playing comfort noise. + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Continue playing codec-internal comfort noise. + return kCodecInternalCng; + } else { + // Nothing to play. Generate some data to play out. + switch (playout_mode_) { + case kPlayoutOff: + return kAlternativePlc; + case kPlayoutFax: + return kAudioRepetition; + default: + assert(false); + return kUndefined; + } + } + } else if (target_timestamp == available_timestamp) { + return kNormal; + } else { + if (static_cast((generated_noise_samples + target_timestamp) - + available_timestamp) >= 0) { + return kNormal; + } else { + // If currently playing comfort noise, continue with that. Do not + // increase the timestamp counter since generated_noise_stopwatch_ in + // NetEqImpl will take care of the time-keeping. + if (cng_state_ == kCngRfc3389On) { + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + return kCodecInternalCng; + } else { + // Otherwise, do packet-loss concealment and increase the + // timestamp while waiting for the time to play this packet. + switch (playout_mode_) { + case kPlayoutOff: + return kAlternativePlcIncreaseTimestamp; + case kPlayoutFax: + return kAudioRepetitionIncreaseTimestamp; + default: + assert(0); + return kUndefined; + } + } + } + } +} + +} // namespace webrtc diff --git a/modules/audio_coding/neteq/decision_logic_fax.h b/modules/audio_coding/neteq/decision_logic_fax.h new file mode 100644 index 0000000000..1436f99aaa --- /dev/null +++ b/modules/audio_coding/neteq/decision_logic_fax.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_ + +#include "modules/audio_coding/neteq/decision_logic.h" +#include "rtc_base/constructormagic.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// Implementation of the DecisionLogic class for playout modes kPlayoutFax and +// kPlayoutOff. +class DecisionLogicFax : public DecisionLogic { + public: + // Constructor. + DecisionLogicFax(int fs_hz, + size_t output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter, + const TickTimer* tick_timer) + : DecisionLogic(fs_hz, + output_size_samples, + playout_mode, + decoder_database, + packet_buffer, + delay_manager, + buffer_level_filter, + tick_timer) {} + + protected: + Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + const Packet* next_packet, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder, + size_t generated_noise_samples, + size_t cur_size_samples) override; + + private: + RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_ diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc new file mode 100644 index 0000000000..a683b8c201 --- /dev/null +++ b/modules/audio_coding/neteq/decision_logic_normal.cc @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decision_logic_normal.h" + +#include + +#include +#include + +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "modules/audio_coding/neteq/sync_buffer.h" + +namespace webrtc { + +Operations DecisionLogicNormal::GetDecisionSpecialized( + const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + const Packet* next_packet, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder, + size_t generated_noise_samples, + size_t cur_size_samples) { + assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming); + // Guard for errors, to avoid getting stuck in error mode. + if (prev_mode == kModeError) { + if (!next_packet) { + return kExpand; + } else { + return kUndefined; // Use kUndefined to flag for a reset. + } + } + + uint32_t target_timestamp = sync_buffer.end_timestamp(); + uint32_t available_timestamp = 0; + bool is_cng_packet = false; + if (next_packet) { + available_timestamp = next_packet->timestamp; + is_cng_packet = + decoder_database_->IsComfortNoise(next_packet->payload_type); + } + + if (is_cng_packet) { + return CngOperation(prev_mode, target_timestamp, available_timestamp, + generated_noise_samples); + } + + // Handle the case with no packet at all available (except maybe DTMF). + if (!next_packet) { + return NoPacket(play_dtmf); + } + + // If the expand period was very long, reset NetEQ since it is likely that the + // sender was restarted. + if (num_consecutive_expands_ > kReinitAfterExpands) { + *reset_decoder = true; + return kNormal; + } + + // Make sure we don't restart audio too soon after an expansion to avoid + // running out of data right away again. We should only wait if there are no + // DTX or CNG packets in the buffer (otherwise we should just play out what we + // have, since we cannot know the exact duration of DTX or CNG packets), and + // if the mute factor is low enough (otherwise the expansion was short enough + // to not be noticable). + // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. + if (postpone_decoding_after_expand_ && prev_mode == kModeExpand && + !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) && + cur_size_samples(delay_manager_->TargetLevel() * + packet_length_samples_)>> 8 && + expand.MuteFactor(0) < 16384 / 2) { + return kExpand; + } + + const uint32_t five_seconds_samples = + static_cast(5 * 8000 * fs_mult_); + // Check if the required packet is available. + if (target_timestamp == available_timestamp) { + return ExpectedPacketAvailable(prev_mode, play_dtmf); + } else if (!PacketBuffer::IsObsoleteTimestamp( + available_timestamp, target_timestamp, five_seconds_samples)) { + return FuturePacketAvailable( + sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp, + available_timestamp, play_dtmf, generated_noise_samples); + } else { + // This implies that available_timestamp < target_timestamp, which can + // happen when a new stream or codec is received. Signal for a reset. + return kUndefined; + } +} + +Operations DecisionLogicNormal::CngOperation(Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + size_t generated_noise_samples) { + // Signed difference between target and available timestamp. + int32_t timestamp_diff = static_cast( + static_cast(generated_noise_samples + target_timestamp) - + available_timestamp); + int32_t optimal_level_samp = static_cast( + (delay_manager_->TargetLevel() * packet_length_samples_) >> 8); + const int64_t excess_waiting_time_samp = + -static_cast(timestamp_diff) - optimal_level_samp; + + if (excess_waiting_time_samp > optimal_level_samp / 2) { + // The waiting time for this packet will be longer than 1.5 + // times the wanted buffer delay. Apply fast-forward to cut the + // waiting time down to the optimal. + noise_fast_forward_ = rtc::dchecked_cast(noise_fast_forward_ + + excess_waiting_time_samp); + timestamp_diff = + rtc::saturated_cast(timestamp_diff + excess_waiting_time_samp); + } + + if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) { + // Not time to play this packet yet. Wait another round before using this + // packet. Keep on playing CNG from previous CNG parameters. + return kRfc3389CngNoPacket; + } else { + // Otherwise, go for the CNG packet now. + noise_fast_forward_ = 0; + return kRfc3389Cng; + } +} + +Operations DecisionLogicNormal::NoPacket(bool play_dtmf) { + if (cng_state_ == kCngRfc3389On) { + // Keep on playing comfort noise. + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Keep on playing codec internal comfort noise. + return kCodecInternalCng; + } else if (play_dtmf) { + return kDtmf; + } else { + // Nothing to play, do expand. + return kExpand; + } +} + +Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode, + bool play_dtmf) { + if (prev_mode != kModeExpand && !play_dtmf) { + // Check criterion for time-stretching. + int low_limit, high_limit; + delay_manager_->BufferLimits(&low_limit, &high_limit); + if (buffer_level_filter_->filtered_current_level() >= high_limit << 2) + return kFastAccelerate; + if (TimescaleAllowed()) { + if (buffer_level_filter_->filtered_current_level() >= high_limit) + return kAccelerate; + if (buffer_level_filter_->filtered_current_level() < low_limit) + return kPreemptiveExpand; + } + } + return kNormal; +} + +Operations DecisionLogicNormal::FuturePacketAvailable( + const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + bool play_dtmf, + size_t generated_noise_samples) { + // Required packet is not available, but a future packet is. + // Check if we should continue with an ongoing expand because the new packet + // is too far into the future. + uint32_t timestamp_leap = available_timestamp - target_timestamp; + if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) && + !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) && + UnderTargetLevel()) { + if (play_dtmf) { + // Still have DTMF to play, so do not do expand. + return kDtmf; + } else { + // Nothing to play. + return kExpand; + } + } + + const size_t samples_left = + sync_buffer.FutureLength() - expand.overlap_length(); + const size_t cur_size_samples = + samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length; + + // If previous was comfort noise, then no merge is needed. + if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) { + // Keep the same delay as before the CNG, but make sure that the number of + // samples in buffer is no higher than 4 times the optimal level. (Note that + // TargetLevel() is in Q8.) + if (static_cast(generated_noise_samples + target_timestamp) >= + available_timestamp || + cur_size_samples > + ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) * + 4) { + // Time to play this new packet. + return kNormal; + } else { + // Too early to play this new packet; keep on playing comfort noise. + if (prev_mode == kModeRfc3389Cng) { + return kRfc3389CngNoPacket; + } else { // prevPlayMode == kModeCodecInternalCng. + return kCodecInternalCng; + } + } + } + // Do not merge unless we have done an expand before. + if (prev_mode == kModeExpand) { + return kMerge; + } else if (play_dtmf) { + // Play DTMF instead of expand. + return kDtmf; + } else { + return kExpand; + } +} + +bool DecisionLogicNormal::UnderTargetLevel() const { + return buffer_level_filter_->filtered_current_level() <= + delay_manager_->TargetLevel(); +} + +bool DecisionLogicNormal::ReinitAfterExpands(uint32_t timestamp_leap) const { + return timestamp_leap >= + static_cast(output_size_samples_ * kReinitAfterExpands); +} + +bool DecisionLogicNormal::PacketTooEarly(uint32_t timestamp_leap) const { + return timestamp_leap > + static_cast(output_size_samples_ * num_consecutive_expands_); +} + +bool DecisionLogicNormal::MaxWaitForPacket() const { + return num_consecutive_expands_ >= kMaxWaitForPacket; +} + +} // namespace webrtc diff --git a/modules/audio_coding/neteq/decision_logic_normal.h b/modules/audio_coding/neteq/decision_logic_normal.h new file mode 100644 index 0000000000..ed2ea39b98 --- /dev/null +++ b/modules/audio_coding/neteq/decision_logic_normal.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_ + +#include "modules/audio_coding/neteq/decision_logic.h" +#include "rtc_base/constructormagic.h" +#include "system_wrappers/include/field_trial.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// Implementation of the DecisionLogic class for playout modes kPlayoutOn and +// kPlayoutStreaming. +class DecisionLogicNormal : public DecisionLogic { + public: + // Constructor. + DecisionLogicNormal(int fs_hz, + size_t output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter, + const TickTimer* tick_timer) + : DecisionLogic(fs_hz, + output_size_samples, + playout_mode, + decoder_database, + packet_buffer, + delay_manager, + buffer_level_filter, + tick_timer), + postpone_decoding_after_expand_(field_trial::IsEnabled( + "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {} + + protected: + static const int kReinitAfterExpands = 100; + static const int kMaxWaitForPacket = 10; + + Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + const Packet* next_packet, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder, + size_t generated_noise_samples, + size_t cur_size_samples) override; + + // Returns the operation to do given that the expected packet is not + // available, but a packet further into the future is at hand. + virtual Operations FuturePacketAvailable(const SyncBuffer& sync_buffer, + const Expand& expand, + size_t decoder_frame_length, + Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + bool play_dtmf, + size_t generated_noise_samples); + + // Returns the operation to do given that the expected packet is available. + virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); + + // Returns the operation given that no packets are available (except maybe + // a DTMF event, flagged by setting |play_dtmf| true). + virtual Operations NoPacket(bool play_dtmf); + + private: + // Returns the operation given that the next available packet is a comfort + // noise payload (RFC 3389 only, not codec-internal). + Operations CngOperation(Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + size_t generated_noise_samples); + + // Checks if enough time has elapsed since the last successful timescale + // operation was done (i.e., accelerate or preemptive expand). + bool TimescaleAllowed() const { + return !timescale_countdown_ || timescale_countdown_->Finished(); + } + + // Checks if the current (filtered) buffer level is under the target level. + bool UnderTargetLevel() const; + + // Checks if |timestamp_leap| is so long into the future that a reset due + // to exceeding kReinitAfterExpands will be done. + bool ReinitAfterExpands(uint32_t timestamp_leap) const; + + // Checks if we still have not done enough expands to cover the distance from + // the last decoded packet to the next available packet, the distance beeing + // conveyed in |timestamp_leap|. + bool PacketTooEarly(uint32_t timestamp_leap) const; + + // Checks if num_consecutive_expands_ >= kMaxWaitForPacket. + bool MaxWaitForPacket() const; + + const bool postpone_decoding_after_expand_; + + RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_ diff --git a/modules/audio_coding/neteq/decision_logic_unittest.cc b/modules/audio_coding/neteq/decision_logic_unittest.cc index 6929daadff..5f0e5c2675 100644 --- a/modules/audio_coding/neteq/decision_logic_unittest.cc +++ b/modules/audio_coding/neteq/decision_logic_unittest.cc @@ -33,7 +33,19 @@ TEST(DecisionLogic, CreateAndDestroy) { DelayManager delay_manager(240, &delay_peak_detector, &tick_timer); BufferLevelFilter buffer_level_filter; DecisionLogic* logic = DecisionLogic::Create( - fs_hz, output_size_samples, false, &decoder_database, packet_buffer, + fs_hz, output_size_samples, kPlayoutOn, &decoder_database, packet_buffer, + &delay_manager, &buffer_level_filter, &tick_timer); + delete logic; + logic = DecisionLogic::Create( + fs_hz, output_size_samples, kPlayoutStreaming, &decoder_database, + packet_buffer, &delay_manager, &buffer_level_filter, &tick_timer); + delete logic; + logic = DecisionLogic::Create( + fs_hz, output_size_samples, kPlayoutFax, &decoder_database, packet_buffer, + &delay_manager, &buffer_level_filter, &tick_timer); + delete logic; + logic = DecisionLogic::Create( + fs_hz, output_size_samples, kPlayoutOff, &decoder_database, packet_buffer, &delay_manager, &buffer_level_filter, &tick_timer); delete logic; } diff --git a/modules/audio_coding/neteq/defines.h b/modules/audio_coding/neteq/defines.h index 768f0b955e..496a36dfa5 100644 --- a/modules/audio_coding/neteq/defines.h +++ b/modules/audio_coding/neteq/defines.h @@ -24,6 +24,10 @@ enum Operations { kRfc3389CngNoPacket, kCodecInternalCng, kDtmf, + kAlternativePlc, + kAlternativePlcIncreaseTimestamp, + kAudioRepetition, + kAudioRepetitionIncreaseTimestamp, kUndefined = -1 }; diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h index ce1448a1de..273979b333 100644 --- a/modules/audio_coding/neteq/include/neteq.h +++ b/modules/audio_coding/neteq/include/neteq.h @@ -74,6 +74,13 @@ struct NetEqLifetimeStatistics { uint64_t voice_concealed_samples = 0; }; +enum NetEqPlayoutMode { + kPlayoutOn, + kPlayoutOff, + kPlayoutFax, + kPlayoutStreaming +}; + // This is the interface class for NetEq. class NetEq { public: @@ -91,10 +98,10 @@ class NetEq { bool enable_post_decode_vad = false; size_t max_packets_in_buffer = 50; int max_delay_ms = 2000; + NetEqPlayoutMode playout_mode = kPlayoutOn; bool enable_fast_accelerate = false; bool enable_muted_state = false; absl::optional codec_pair_id; - bool for_test_no_time_stretching = false; // Use only for testing. }; enum ReturnCodes { kOK = 0, kFail = -1, kNotImplemented = -2 }; @@ -202,6 +209,16 @@ class NetEq { // The packet buffer part of the delay is not updated during DTX/CNG periods. virtual int FilteredCurrentDelayMs() const = 0; + // Sets the playout mode to |mode|. + // Deprecated. Set the mode in the Config struct passed to the constructor. + // TODO(henrik.lundin) Delete. + virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0; + + // Returns the current playout mode. + // Deprecated. + // TODO(henrik.lundin) Delete. + virtual NetEqPlayoutMode PlayoutMode() const = 0; + // Writes the current network statistics to |stats|. The statistics are reset // after the call. virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0; diff --git a/modules/audio_coding/neteq/neteq.cc b/modules/audio_coding/neteq/neteq.cc index cf1c6aa665..55af23e99a 100644 --- a/modules/audio_coding/neteq/neteq.cc +++ b/modules/audio_coding/neteq/neteq.cc @@ -30,7 +30,7 @@ std::string NetEq::Config::ToString() const { ss << "sample_rate_hz=" << sample_rate_hz << ", enable_post_decode_vad=" << (enable_post_decode_vad ? "true" : "false") << ", max_packets_in_buffer=" << max_packets_in_buffer - << ", enable_fast_accelerate=" + << ", playout_mode=" << playout_mode << ", enable_fast_accelerate=" << (enable_fast_accelerate ? " true" : "false") << ", enable_muted_state=" << (enable_muted_state ? " true" : "false"); return ss.str(); diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 4630448efd..afc15bf248 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -101,6 +101,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, reset_decoder_(false), ssrc_(0), first_packet_(true), + playout_mode_(config.playout_mode), enable_fast_accelerate_(config.enable_fast_accelerate), nack_enabled_(false), enable_muted_state_(config.enable_muted_state), @@ -109,8 +110,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, tick_timer_.get()), speech_expand_uma_logger_("WebRTC.Audio.SpeechExpandRatePercent", 10, // Report once every 10 s. - tick_timer_.get()), - no_time_stretching_(config.for_test_no_time_stretching) { + tick_timer_.get()) { RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString(); int fs = config.sample_rate_hz; if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { @@ -358,6 +358,23 @@ int NetEqImpl::FilteredCurrentDelayMs() const { return static_cast(delay_samples) / rtc::CheckedDivExact(fs_hz_, 1000); } +// Deprecated. +// TODO(henrik.lundin) Delete. +void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) { + rtc::CritScope lock(&crit_sect_); + if (mode != playout_mode_) { + playout_mode_ = mode; + CreateDecisionLogic(); + } +} + +// Deprecated. +// TODO(henrik.lundin) Delete. +NetEqPlayoutMode NetEqImpl::PlayoutMode() const { + rtc::CritScope lock(&crit_sect_); + return playout_mode_; +} + int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { rtc::CritScope lock(&crit_sect_); assert(decoder_database_.get()); @@ -920,6 +937,33 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) { return_value = DoDtmf(dtmf_event, &play_dtmf); break; } + case kAlternativePlc: { + // TODO(hlundin): Write test for this. + DoAlternativePlc(false); + break; + } + case kAlternativePlcIncreaseTimestamp: { + // TODO(hlundin): Write test for this. + DoAlternativePlc(true); + break; + } + case kAudioRepetitionIncreaseTimestamp: { + // TODO(hlundin): Write test for this. + sync_buffer_->IncreaseEndTimestamp( + static_cast(output_size_samples_)); + // Skipping break on purpose. Execution should move on into the + // next case. + RTC_FALLTHROUGH(); + } + case kAudioRepetition: { + // TODO(hlundin): Write test for this. + // Copy last |output_size_samples_| from |sync_buffer_| to + // |algorithm_buffer|. + algorithm_buffer_->PushBackFromIndex( + *sync_buffer_, sync_buffer_->Size() - output_size_samples_); + expand_->Reset(); + break; + } case kUndefined: { RTC_LOG(LS_ERROR) << "Invalid operation kUndefined."; assert(false); // This should not happen. @@ -1247,7 +1291,10 @@ int NetEqImpl::GetDecision(Operations* operation, // Get packets from buffer. int extracted_samples = 0; - if (packet) { + if (packet && *operation != kAlternativePlc && + *operation != kAlternativePlcIncreaseTimestamp && + *operation != kAudioRepetition && + *operation != kAudioRepetitionIncreaseTimestamp) { sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp); if (decision_logic_->CngOff()) { // Adjustment of timestamp only corresponds to an actual packet loss @@ -1836,6 +1883,29 @@ int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { return 0; } +void NetEqImpl::DoAlternativePlc(bool increase_timestamp) { + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + size_t length; + if (decoder && decoder->HasDecodePlc()) { + // Use the decoder's packet-loss concealment. + // TODO(hlundin): Will probably need a longer buffer for multi-channel. + int16_t decoded_buffer[kMaxFrameSize]; + length = decoder->DecodePlc(1, decoded_buffer); + if (length > 0) + algorithm_buffer_->PushBackInterleaved(decoded_buffer, length); + } else { + // Do simple zero-stuffing. + length = output_size_samples_; + algorithm_buffer_->Zeros(length); + // By not advancing the timestamp, NetEq inserts samples. + stats_.AddZeros(length); + } + if (increase_timestamp) { + sync_buffer_->IncreaseEndTimestamp(static_cast(length)); + } + expand_->Reset(); +} + int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, int16_t* output) const { @@ -2060,8 +2130,8 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() { void NetEqImpl::CreateDecisionLogic() { decision_logic_.reset(DecisionLogic::Create( - fs_hz_, output_size_samples_, no_time_stretching_, - decoder_database_.get(), *packet_buffer_.get(), delay_manager_.get(), - buffer_level_filter_.get(), tick_timer_.get())); + fs_hz_, output_size_samples_, playout_mode_, decoder_database_.get(), + *packet_buffer_.get(), delay_manager_.get(), buffer_level_filter_.get(), + tick_timer_.get())); } } // namespace webrtc diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h index 6b8764d131..6f69680a08 100644 --- a/modules/audio_coding/neteq/neteq_impl.h +++ b/modules/audio_coding/neteq/neteq_impl.h @@ -168,6 +168,16 @@ class NetEqImpl : public webrtc::NetEq { int FilteredCurrentDelayMs() const override; + // Sets the playout mode to |mode|. + // Deprecated. + // TODO(henrik.lundin) Delete. + void SetPlayoutMode(NetEqPlayoutMode mode) override; + + // Returns the current playout mode. + // Deprecated. + // TODO(henrik.lundin) Delete. + NetEqPlayoutMode PlayoutMode() const override; + // Writes the current network statistics to |stats|. The statistics are reset // after the call. int NetworkStatistics(NetEqNetworkStatistics* stats) override; @@ -326,6 +336,12 @@ class NetEqImpl : public webrtc::NetEq { int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Produces packet-loss concealment using alternative methods. If the codec + // has an internal PLC, it is called to generate samples. Otherwise, the + // method performs zero-stuffing. + void DoAlternativePlc(bool increase_timestamp) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Overdub DTMF on top of |output|. int DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, @@ -413,6 +429,7 @@ class NetEqImpl : public webrtc::NetEq { RTC_GUARDED_BY(crit_sect_); uint32_t ssrc_ RTC_GUARDED_BY(crit_sect_); bool first_packet_ RTC_GUARDED_BY(crit_sect_); + NetEqPlayoutMode playout_mode_ RTC_GUARDED_BY(crit_sect_); bool enable_fast_accelerate_ RTC_GUARDED_BY(crit_sect_); std::unique_ptr nack_ RTC_GUARDED_BY(crit_sect_); bool nack_enabled_ RTC_GUARDED_BY(crit_sect_); @@ -424,7 +441,6 @@ class NetEqImpl : public webrtc::NetEq { std::vector last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_); ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); - bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test. private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc index b772dfa71d..c8fd91aaaa 100644 --- a/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -1377,6 +1377,32 @@ class NetEqImplTest120ms : public NetEqImplTest { uint16_t sequence_number_ = 1; }; +TEST_F(NetEqImplTest120ms, AudioRepetition) { + config_.playout_mode = kPlayoutFax; + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(kAudioRepetition, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, AlternativePlc) { + config_.playout_mode = kPlayoutOff; + CreateInstanceNoMocks(); + Register120msCodec(AudioDecoder::kSpeech); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(kAlternativePlc, neteq_->last_operation_for_test()); +} + TEST_F(NetEqImplTest120ms, CodecInternalCng) { CreateInstanceNoMocks(); Register120msCodec(AudioDecoder::kComfortNoise); diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc index 27e9535551..4ed7a6b088 100644 --- a/modules/audio_coding/neteq/neteq_unittest.cc +++ b/modules/audio_coding/neteq/neteq_unittest.cc @@ -25,8 +25,6 @@ #include "common_types.h" // NOLINT(build/include) #include "modules/audio_coding/codecs/pcm16b/pcm16b.h" #include "modules/audio_coding/neteq/tools/audio_loop.h" -#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" -#include "modules/audio_coding/neteq/tools/neteq_test.h" #include "modules/audio_coding/neteq/tools/rtp_file_source.h" #include "rtc_base/ignore_wundef.h" #include "rtc_base/messagedigest.h" @@ -557,7 +555,7 @@ TEST_F(NetEqDecodingTest, MAYBE_TestOpusDtxBitExactness) { class NetEqDecodingTestFaxMode : public NetEqDecodingTest { protected: NetEqDecodingTestFaxMode() : NetEqDecodingTest() { - config_.for_test_no_time_stretching = true; + config_.playout_mode = kPlayoutFax; } void TestJitterBufferDelay(bool apply_packet_loss); }; @@ -1725,37 +1723,4 @@ TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) { TestJitterBufferDelay(true); } -namespace test { -// TODO(henrik.lundin) NetEqRtpDumpInput requires protobuf support. It shouldn't -// need it, but because it is bundled with NetEqEventLogInput, it is neded. -// This should be refactored. -#if WEBRTC_ENABLE_PROTOBUF -TEST(NetEqNoTimeStretchingMode, RunTest) { - NetEq::Config config; - config.for_test_no_time_stretching = true; - auto codecs = NetEqTest::StandardDecoderMap(); - NetEqTest::ExtDecoderMap ext_codecs; - NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { - {1, kRtpExtensionAudioLevel}, - {3, kRtpExtensionAbsoluteSendTime}, - {5, kRtpExtensionTransportSequenceNumber}, - {7, kRtpExtensionVideoContentType}, - {8, kRtpExtensionVideoTiming}}; - std::unique_ptr input(new NetEqRtpDumpInput( - webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"), - rtp_ext_map)); - std::unique_ptr input_time_limit( - new TimeLimitedNetEqInput(std::move(input), 20000)); - std::unique_ptr output(new VoidAudioSink); - NetEqTest::Callbacks callbacks; - NetEqTest test(config, codecs, ext_codecs, std::move(input_time_limit), - std::move(output), callbacks); - test.Run(); - const auto stats = test.SimulationStats(); - EXPECT_EQ(0, stats.accelerate_rate); - EXPECT_EQ(0, stats.preemptive_rate); -} -#endif - -} // namespace test } // namespace webrtc diff --git a/modules/audio_coding/neteq/tools/neteq_input.cc b/modules/audio_coding/neteq/tools/neteq_input.cc index 12a75fcf41..44513ab15b 100644 --- a/modules/audio_coding/neteq/tools/neteq_input.cc +++ b/modules/audio_coding/neteq/tools/neteq_input.cc @@ -28,50 +28,5 @@ std::string NetEqInput::PacketData::ToString() const { return ss.str(); } -TimeLimitedNetEqInput::TimeLimitedNetEqInput(std::unique_ptr input, - int64_t duration_ms) - : input_(std::move(input)), - start_time_ms_(input_->NextEventTime()), - duration_ms_(duration_ms) {} - -rtc::Optional TimeLimitedNetEqInput::NextPacketTime() const { - return ended_ ? rtc::Optional() : input_->NextPacketTime(); -} - -rtc::Optional TimeLimitedNetEqInput::NextOutputEventTime() const { - return ended_ ? rtc::Optional() : input_->NextOutputEventTime(); -} - -std::unique_ptr TimeLimitedNetEqInput::PopPacket() { - if (ended_) { - return std::unique_ptr(); - } - auto packet = input_->PopPacket(); - MaybeSetEnded(); - return packet; -} - -void TimeLimitedNetEqInput::AdvanceOutputEvent() { - if (!ended_) { - input_->AdvanceOutputEvent(); - MaybeSetEnded(); - } -} - -bool TimeLimitedNetEqInput::ended() const { - return ended_ || input_->ended(); -} - -rtc::Optional TimeLimitedNetEqInput::NextHeader() const { - return ended_ ? rtc::Optional() : input_->NextHeader(); -} - -void TimeLimitedNetEqInput::MaybeSetEnded() { - if (NextEventTime() && start_time_ms_ && - *NextEventTime() - *start_time_ms_ > duration_ms_) { - ended_ = true; - } -} - } // namespace test } // namespace webrtc diff --git a/modules/audio_coding/neteq/tools/neteq_input.h b/modules/audio_coding/neteq/tools/neteq_input.h index a13a86eb97..5e2cbd2c3e 100644 --- a/modules/audio_coding/neteq/tools/neteq_input.h +++ b/modules/audio_coding/neteq/tools/neteq_input.h @@ -78,28 +78,6 @@ class NetEqInput { virtual absl::optional NextHeader() const = 0; }; -// Wrapper class to impose a time limit on a NetEqInput object, typically -// another time limit than what the object itself provides. For example, an -// input taken from a file can be cut shorter by wrapping it in this class. -class TimeLimitedNetEqInput : public NetEqInput { - public: - TimeLimitedNetEqInput(std::unique_ptr input, int64_t duration_ms); - rtc::Optional NextPacketTime() const override; - rtc::Optional NextOutputEventTime() const override; - std::unique_ptr PopPacket() override; - void AdvanceOutputEvent() override; - bool ended() const override; - rtc::Optional NextHeader() const override; - - private: - void MaybeSetEnded(); - - std::unique_ptr input_; - const rtc::Optional start_time_ms_; - const int64_t duration_ms_; - bool ended_ = false; -}; - } // namespace test } // namespace webrtc #endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ diff --git a/modules/audio_coding/neteq/tools/neteq_test.cc b/modules/audio_coding/neteq/tools/neteq_test.cc index 6d8e7ffa4a..e6dd114e38 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.cc +++ b/modules/audio_coding/neteq/tools/neteq_test.cc @@ -115,34 +115,6 @@ NetEqLifetimeStatistics NetEqTest::LifetimeStats() const { return neteq_->GetLifetimeStatistics(); } -NetEqTest::DecoderMap NetEqTest::StandardDecoderMap() { - DecoderMap codecs = { - {0, std::make_pair(NetEqDecoder::kDecoderPCMu, "pcmu")}, - {8, std::make_pair(NetEqDecoder::kDecoderPCMa, "pcma")}, - {102, std::make_pair(NetEqDecoder::kDecoderILBC, "ilbc")}, - {103, std::make_pair(NetEqDecoder::kDecoderISAC, "isac")}, -#if !defined(WEBRTC_ANDROID) - {104, std::make_pair(NetEqDecoder::kDecoderISACswb, "isac-swb")}, -#endif - {111, std::make_pair(NetEqDecoder::kDecoderOpus, "opus")}, - {93, std::make_pair(NetEqDecoder::kDecoderPCM16B, "pcm16-nb")}, - {94, std::make_pair(NetEqDecoder::kDecoderPCM16Bwb, "pcm16-wb")}, - {95, std::make_pair(NetEqDecoder::kDecoderPCM16Bswb32kHz, "pcm16-swb32")}, - {96, std::make_pair(NetEqDecoder::kDecoderPCM16Bswb48kHz, "pcm16-swb48")}, - {9, std::make_pair(NetEqDecoder::kDecoderG722, "g722")}, - {106, std::make_pair(NetEqDecoder::kDecoderAVT, "avt")}, - {114, std::make_pair(NetEqDecoder::kDecoderAVT16kHz, "avt-16")}, - {115, std::make_pair(NetEqDecoder::kDecoderAVT32kHz, "avt-32")}, - {116, std::make_pair(NetEqDecoder::kDecoderAVT48kHz, "avt-48")}, - {117, std::make_pair(NetEqDecoder::kDecoderRED, "red")}, - {13, std::make_pair(NetEqDecoder::kDecoderCNGnb, "cng-nb")}, - {98, std::make_pair(NetEqDecoder::kDecoderCNGwb, "cng-wb")}, - {99, std::make_pair(NetEqDecoder::kDecoderCNGswb32kHz, "cng-swb32")}, - {100, std::make_pair(NetEqDecoder::kDecoderCNGswb48kHz, "cng-swb48")} - }; - return codecs; -} - void NetEqTest::RegisterDecoders(const DecoderMap& codecs) { for (const auto& c : codecs) { RTC_CHECK_EQ( diff --git a/modules/audio_coding/neteq/tools/neteq_test.h b/modules/audio_coding/neteq/tools/neteq_test.h index 9c05fc468a..e645e42814 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.h +++ b/modules/audio_coding/neteq/tools/neteq_test.h @@ -91,8 +91,6 @@ class NetEqTest { NetEqNetworkStatistics SimulationStats(); NetEqLifetimeStatistics LifetimeStats() const; - static DecoderMap StandardDecoderMap(); - private: void RegisterDecoders(const DecoderMap& codecs); void RegisterExternalDecoders(const ExtDecoderMap& codecs); diff --git a/modules/audio_coding/test/TwoWayCommunication.cc b/modules/audio_coding/test/TwoWayCommunication.cc index 4367faf6a7..5a78c11981 100644 --- a/modules/audio_coding/test/TwoWayCommunication.cc +++ b/modules/audio_coding/test/TwoWayCommunication.cc @@ -40,9 +40,8 @@ TwoWayCommunication::TwoWayCommunication(int testMode) AudioCodingModule::Config(CreateBuiltinAudioDecoderFactory()))), _testMode(testMode) { AudioCodingModule::Config config; - // The clicks will be more obvious if time-stretching is not allowed. - // TODO(henrik.lundin) Really? - config.neteq_config.for_test_no_time_stretching = true; + // The clicks will be more obvious in FAX mode. TODO(henrik.lundin) Really? + config.neteq_config.playout_mode = kPlayoutFax; config.decoder_factory = CreateBuiltinAudioDecoderFactory(); _acmB.reset(AudioCodingModule::Create(config)); _acmRefB.reset(AudioCodingModule::Create(config)); diff --git a/test/fuzzers/neteq_rtp_fuzzer.cc b/test/fuzzers/neteq_rtp_fuzzer.cc index 2b150a73b2..e28af90f31 100644 --- a/test/fuzzers/neteq_rtp_fuzzer.cc +++ b/test/fuzzers/neteq_rtp_fuzzer.cc @@ -133,13 +133,29 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) { std::unique_ptr output(new AudioChecksum); NetEqTest::Callbacks callbacks; NetEq::Config config; - auto codecs = NetEqTest::StandardDecoderMap(); - // kPayloadType is the payload type that will be used for encoding. Verify - // that it is included in the standard decoder map, and that it points to the - // expected decoder type. - RTC_CHECK_EQ(codecs.count(kPayloadType), 1); - RTC_CHECK(codecs[kPayloadType].first == NetEqDecoder::kDecoderPCM16Bswb32kHz); - + NetEqTest::DecoderMap codecs; + codecs[0] = std::make_pair(NetEqDecoder::kDecoderPCMu, "pcmu"); + codecs[8] = std::make_pair(NetEqDecoder::kDecoderPCMa, "pcma"); + codecs[103] = std::make_pair(NetEqDecoder::kDecoderISAC, "isac"); + codecs[104] = std::make_pair(NetEqDecoder::kDecoderISACswb, "isac-swb"); + codecs[111] = std::make_pair(NetEqDecoder::kDecoderOpus, "opus"); + codecs[93] = std::make_pair(NetEqDecoder::kDecoderPCM16B, "pcm16-nb"); + codecs[94] = std::make_pair(NetEqDecoder::kDecoderPCM16Bwb, "pcm16-wb"); + codecs[96] = + std::make_pair(NetEqDecoder::kDecoderPCM16Bswb48kHz, "pcm16-swb48"); + codecs[9] = std::make_pair(NetEqDecoder::kDecoderG722, "g722"); + codecs[106] = std::make_pair(NetEqDecoder::kDecoderAVT, "avt"); + codecs[114] = std::make_pair(NetEqDecoder::kDecoderAVT16kHz, "avt-16"); + codecs[115] = std::make_pair(NetEqDecoder::kDecoderAVT32kHz, "avt-32"); + codecs[116] = std::make_pair(NetEqDecoder::kDecoderAVT48kHz, "avt-48"); + codecs[117] = std::make_pair(NetEqDecoder::kDecoderRED, "red"); + codecs[13] = std::make_pair(NetEqDecoder::kDecoderCNGnb, "cng-nb"); + codecs[98] = std::make_pair(NetEqDecoder::kDecoderCNGwb, "cng-wb"); + codecs[99] = std::make_pair(NetEqDecoder::kDecoderCNGswb32kHz, "cng-swb32"); + codecs[100] = std::make_pair(NetEqDecoder::kDecoderCNGswb48kHz, "cng-swb48"); + // This is the payload type that will be used for encoding. + codecs[kPayloadType] = + std::make_pair(NetEqDecoder::kDecoderPCM16Bswb32kHz, "pcm16-swb32"); NetEqTest::ExtDecoderMap ext_codecs; NetEqTest test(config, codecs, ext_codecs, std::move(input), diff --git a/test/fuzzers/neteq_signal_fuzzer.cc b/test/fuzzers/neteq_signal_fuzzer.cc index 0ba25d5842..611964d70e 100644 --- a/test/fuzzers/neteq_signal_fuzzer.cc +++ b/test/fuzzers/neteq_signal_fuzzer.cc @@ -167,22 +167,31 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) { NetEq::Config config; config.enable_post_decode_vad = true; config.enable_fast_accelerate = true; - auto codecs = NetEqTest::StandardDecoderMap(); - // rate_types contains the payload types that will be used for encoding. - // Verify that they all are included in the standard decoder map, and that - // they point to the expected decoder types. - RTC_CHECK_EQ(codecs.count(rate_types[0].second), 1); - RTC_CHECK(codecs[rate_types[0].second].first == NetEqDecoder::kDecoderPCM16B); - RTC_CHECK_EQ(codecs.count(rate_types[1].second), 1); - RTC_CHECK(codecs[rate_types[1].second].first == - NetEqDecoder::kDecoderPCM16Bwb); - RTC_CHECK_EQ(codecs.count(rate_types[2].second), 1); - RTC_CHECK(codecs[rate_types[2].second].first == - NetEqDecoder::kDecoderPCM16Bswb32kHz); - RTC_CHECK_EQ(codecs.count(rate_types[3].second), 1); - RTC_CHECK(codecs[rate_types[3].second].first == - NetEqDecoder::kDecoderPCM16Bswb48kHz); - + NetEqTest::DecoderMap codecs; + codecs[0] = std::make_pair(NetEqDecoder::kDecoderPCMu, "pcmu"); + codecs[8] = std::make_pair(NetEqDecoder::kDecoderPCMa, "pcma"); + codecs[103] = std::make_pair(NetEqDecoder::kDecoderISAC, "isac"); + codecs[104] = std::make_pair(NetEqDecoder::kDecoderISACswb, "isac-swb"); + codecs[111] = std::make_pair(NetEqDecoder::kDecoderOpus, "opus"); + codecs[9] = std::make_pair(NetEqDecoder::kDecoderG722, "g722"); + codecs[106] = std::make_pair(NetEqDecoder::kDecoderAVT, "avt"); + codecs[114] = std::make_pair(NetEqDecoder::kDecoderAVT16kHz, "avt-16"); + codecs[115] = std::make_pair(NetEqDecoder::kDecoderAVT32kHz, "avt-32"); + codecs[116] = std::make_pair(NetEqDecoder::kDecoderAVT48kHz, "avt-48"); + codecs[117] = std::make_pair(NetEqDecoder::kDecoderRED, "red"); + codecs[13] = std::make_pair(NetEqDecoder::kDecoderCNGnb, "cng-nb"); + codecs[98] = std::make_pair(NetEqDecoder::kDecoderCNGwb, "cng-wb"); + codecs[99] = std::make_pair(NetEqDecoder::kDecoderCNGswb32kHz, "cng-swb32"); + codecs[100] = std::make_pair(NetEqDecoder::kDecoderCNGswb48kHz, "cng-swb48"); + // One of these payload types will be used for encoding. + codecs[rate_types[0].second] = + std::make_pair(NetEqDecoder::kDecoderPCM16B, "pcm16-nb"); + codecs[rate_types[1].second] = + std::make_pair(NetEqDecoder::kDecoderPCM16Bwb, "pcm16-wb"); + codecs[rate_types[2].second] = + std::make_pair(NetEqDecoder::kDecoderPCM16Bswb32kHz, "pcm16-swb32"); + codecs[rate_types[3].second] = + std::make_pair(NetEqDecoder::kDecoderPCM16Bswb48kHz, "pcm16-swb48"); NetEqTest::ExtDecoderMap ext_codecs; NetEqTest test(config, codecs, ext_codecs, std::move(input),