diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.cc b/webrtc/modules/audio_coding/neteq/decision_logic.cc index b702e6dc50..8cef2c96d4 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic.cc +++ b/webrtc/modules/audio_coding/neteq/decision_logic.cc @@ -67,7 +67,6 @@ DecisionLogic::DecisionLogic(int fs_hz, delay_manager_(delay_manager), buffer_level_filter_(buffer_level_filter), cng_state_(kCngOff), - generated_noise_samples_(0), packet_length_samples_(0), sample_memory_(0), prev_time_scale_(false), @@ -80,7 +79,7 @@ DecisionLogic::DecisionLogic(int fs_hz, void DecisionLogic::Reset() { cng_state_ = kCngOff; - generated_noise_samples_ = 0; + noise_fast_forward_ = 0; packet_length_samples_ = 0; sample_memory_ = 0; prev_time_scale_ = false; @@ -107,15 +106,15 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, size_t decoder_frame_length, const RTPHeader* packet_header, Modes prev_mode, - bool play_dtmf, bool* reset_decoder) { + bool play_dtmf, + size_t generated_noise_samples, + bool* reset_decoder) { if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng || prev_mode == kModeExpand) { // If last mode was CNG (or Expand, since this could be covering up for - // a lost CNG packet), increase the |generated_noise_samples_| counter. - generated_noise_samples_ += output_size_samples_; - // Remember that CNG is on. This is needed if comfort noise is interrupted - // by DTMF. + // a lost CNG packet), remember that CNG is on. This is needed if comfort + // noise is interrupted by DTMF. if (prev_mode == kModeRfc3389Cng) { cng_state_ = kCngRfc3389On; } else if (prev_mode == kModeCodecInternalCng) { @@ -139,7 +138,7 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length, packet_header, prev_mode, play_dtmf, - reset_decoder); + reset_decoder, generated_noise_samples); } void DecisionLogic::ExpandDecision(Operations operation) { diff --git a/webrtc/modules/audio_coding/neteq/decision_logic.h b/webrtc/modules/audio_coding/neteq/decision_logic.h index 72121b7aac..18f424aa09 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic.h +++ b/webrtc/modules/audio_coding/neteq/decision_logic.h @@ -79,6 +79,7 @@ class DecisionLogic { const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, + size_t generated_noise_samples, bool* reset_decoder); // These methods test the |cng_state_| for different conditions. @@ -101,10 +102,7 @@ class DecisionLogic { // Accessors and mutators. void set_sample_memory(int32_t value) { sample_memory_ = value; } - size_t generated_noise_samples() const { return generated_noise_samples_; } - void set_generated_noise_samples(size_t value) { - generated_noise_samples_ = value; - } + size_t noise_fast_forward() const { return noise_fast_forward_; } size_t packet_length_samples() const { return packet_length_samples_; } void set_packet_length_samples(size_t value) { packet_length_samples_ = value; @@ -138,7 +136,8 @@ class DecisionLogic { const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, - bool* reset_decoder) = 0; + bool* reset_decoder, + size_t generated_noise_samples) = 0; // Updates the |buffer_level_filter_| with the current buffer level // |buffer_size_packets|. @@ -152,7 +151,7 @@ class DecisionLogic { size_t output_size_samples_; CngState cng_state_; // Remember if comfort noise is interrupted by other // event (e.g., DTMF). - size_t generated_noise_samples_; + size_t noise_fast_forward_ = 0; size_t packet_length_samples_; int sample_memory_; bool prev_time_scale_; diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc index ddea64425f..aace402a7d 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc +++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.cc @@ -26,7 +26,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized( const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, - bool* reset_decoder) { + bool* reset_decoder, + size_t generated_noise_samples) { assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff); uint32_t target_timestamp = sync_buffer.end_timestamp(); uint32_t available_timestamp = 0; @@ -37,7 +38,7 @@ Operations DecisionLogicFax::GetDecisionSpecialized( decoder_database_->IsComfortNoise(packet_header->payloadType); } if (is_cng_packet) { - if (static_cast((generated_noise_samples_ + target_timestamp) + if (static_cast((generated_noise_samples + target_timestamp) - available_timestamp) >= 0) { // Time to play this packet now. return kRfc3389Cng; @@ -70,13 +71,13 @@ Operations DecisionLogicFax::GetDecisionSpecialized( } else if (target_timestamp == available_timestamp) { return kNormal; } else { - if (static_cast((generated_noise_samples_ + target_timestamp) + if (static_cast((generated_noise_samples + target_timestamp) - available_timestamp) >= 0) { return kNormal; } else { // If currently playing comfort noise, continue with that. Do not - // increase the timestamp counter since generated_noise_samples_ will - // be increased. + // increase the timestamp counter since generated_noise_stopwatch_ in + // NetEqImpl will take care of the time-keeping. if (cng_state_ == kCngRfc3389On) { return kRfc3389CngNoPacket; } else if (cng_state_ == kCngInternalOn) { diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h index 204dcc168a..fbb0cc0aee 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic_fax.h +++ b/webrtc/modules/audio_coding/neteq/decision_logic_fax.h @@ -50,7 +50,8 @@ class DecisionLogicFax : public DecisionLogic { const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, - bool* reset_decoder) override; + bool* reset_decoder, + size_t generated_noise_samples) override; private: RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax); diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc index 0252d1cdfa..37a75d7f5a 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc +++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.cc @@ -31,7 +31,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, - bool* reset_decoder) { + bool* reset_decoder, + size_t generated_noise_samples) { assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming); // Guard for errors, to avoid getting stuck in error mode. if (prev_mode == kModeError) { @@ -52,7 +53,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( } if (is_cng_packet) { - return CngOperation(prev_mode, target_timestamp, available_timestamp); + return CngOperation(prev_mode, target_timestamp, available_timestamp, + generated_noise_samples); } // Handle the case with no packet at all available (except maybe DTMF). @@ -76,7 +78,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( available_timestamp, target_timestamp, five_seconds_samples)) { return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp, - available_timestamp, play_dtmf); + available_timestamp, play_dtmf, + generated_noise_samples); } else { // This implies that available_timestamp < target_timestamp, which can // happen when a new stream or codec is received. Signal for a reset. @@ -86,10 +89,11 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( Operations DecisionLogicNormal::CngOperation(Modes prev_mode, uint32_t target_timestamp, - uint32_t available_timestamp) { + uint32_t available_timestamp, + size_t generated_noise_samples) { // Signed difference between target and available timestamp. int32_t timestamp_diff = static_cast( - static_cast(generated_noise_samples_ + target_timestamp) - + static_cast(generated_noise_samples + target_timestamp) - available_timestamp); int32_t optimal_level_samp = static_cast( (delay_manager_->TargetLevel() * packet_length_samples_) >> 8); @@ -97,9 +101,9 @@ Operations DecisionLogicNormal::CngOperation(Modes prev_mode, if (excess_waiting_time_samp > optimal_level_samp / 2) { // The waiting time for this packet will be longer than 1.5 - // times the wanted buffer delay. Advance the clock to cut + // times the wanted buffer delay. Apply fast-forward to cut the // waiting time down to the optimal. - generated_noise_samples_ += excess_waiting_time_samp; + noise_fast_forward_ += excess_waiting_time_samp; timestamp_diff += excess_waiting_time_samp; } @@ -109,6 +113,7 @@ Operations DecisionLogicNormal::CngOperation(Modes prev_mode, return kRfc3389CngNoPacket; } else { // Otherwise, go for the CNG packet now. + noise_fast_forward_ = 0; return kRfc3389Cng; } } @@ -153,7 +158,8 @@ Operations DecisionLogicNormal::FuturePacketAvailable( Modes prev_mode, uint32_t target_timestamp, uint32_t available_timestamp, - bool play_dtmf) { + bool play_dtmf, + size_t generated_noise_samples) { // Required packet is not available, but a future packet is. // Check if we should continue with an ongoing expand because the new packet // is too far into the future. @@ -184,7 +190,7 @@ Operations DecisionLogicNormal::FuturePacketAvailable( // safety precaution), but make sure that the number of samples in buffer // is no higher than 4 times the optimal level. (Note that TargetLevel() // is in Q8.) - if (static_cast(generated_noise_samples_ + target_timestamp) >= + if (static_cast(generated_noise_samples + target_timestamp) >= available_timestamp || cur_size_samples > ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) * diff --git a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h index 7465906a38..57aa13bf8b 100644 --- a/webrtc/modules/audio_coding/neteq/decision_logic_normal.h +++ b/webrtc/modules/audio_coding/neteq/decision_logic_normal.h @@ -54,7 +54,8 @@ class DecisionLogicNormal : public DecisionLogic { const RTPHeader* packet_header, Modes prev_mode, bool play_dtmf, - bool* reset_decoder) override; + bool* reset_decoder, + size_t generated_noise_samples) override; // Returns the operation to do given that the expected packet is not // available, but a packet further into the future is at hand. @@ -65,7 +66,8 @@ class DecisionLogicNormal : public DecisionLogic { Modes prev_mode, uint32_t target_timestamp, uint32_t available_timestamp, - bool play_dtmf); + bool play_dtmf, + size_t generated_noise_samples); // Returns the operation to do given that the expected packet is available. virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); @@ -77,8 +79,10 @@ class DecisionLogicNormal : public DecisionLogic { private: // Returns the operation given that the next available packet is a comfort // noise payload (RFC 3389 only, not codec-internal). - Operations CngOperation(Modes prev_mode, uint32_t target_timestamp, - uint32_t available_timestamp); + Operations CngOperation(Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + size_t generated_noise_samples); // Checks if enough time has elapsed since the last successful timescale // operation was done (i.e., accelerate or preemptive expand). diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index b4ba650fef..6d705e133e 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -833,6 +833,11 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) { vad_->Update(decoded_buffer_.get(), static_cast(length), speech_type, sid_frame_available, fs_hz_); + if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) { + // Start a new stopwatch since we are decoding a new CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + algorithm_buffer_->Clear(); switch (operation) { case kNormal: { @@ -1006,6 +1011,12 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) { : timestamp_scaler_->ToExternal(playout_timestamp_) - static_cast(audio_frame->samples_per_channel_); + if (!(last_mode_ == kModeRfc3389Cng || + last_mode_ == kModeCodecInternalCng || + last_mode_ == kModeExpand)) { + generated_noise_stopwatch_.reset(); + } + if (decode_return_value) return decode_return_value; return return_value; } @@ -1029,14 +1040,22 @@ int NetEqImpl::GetDecision(Operations* operation, } const RTPHeader* header = packet_buffer_->NextRtpHeader(); + RTC_DCHECK(!generated_noise_stopwatch_ || + generated_noise_stopwatch_->ElapsedTicks() >= 1); + uint64_t generated_noise_samples = + generated_noise_stopwatch_ + ? (generated_noise_stopwatch_->ElapsedTicks() - 1) * + output_size_samples_ + + decision_logic_->noise_fast_forward() + : 0; + if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) { // Because of timestamp peculiarities, we have to "manually" disallow using // a CNG packet with the same timestamp as the one that was last played. // This can happen when using redundancy and will cause the timing to shift. while (header && decoder_database_->IsComfortNoise(header->payloadType) && (end_timestamp >= header->timestamp || - end_timestamp + decision_logic_->generated_noise_samples() > - header->timestamp)) { + end_timestamp + generated_noise_samples > header->timestamp)) { // Don't use this packet, discard it. if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { assert(false); // Must be ok by design. @@ -1064,7 +1083,7 @@ int NetEqImpl::GetDecision(Operations* operation, // Check if it is time to play a DTMF event. if (dtmf_buffer_->GetEvent( static_cast( - end_timestamp + decision_logic_->generated_noise_samples()), + end_timestamp + generated_noise_samples), dtmf_event)) { *play_dtmf = true; } @@ -1072,13 +1091,14 @@ int NetEqImpl::GetDecision(Operations* operation, // Get instruction. assert(sync_buffer_.get()); assert(expand_.get()); - *operation = decision_logic_->GetDecision(*sync_buffer_, - *expand_, - decoder_frame_length_, - header, - last_mode_, - *play_dtmf, - &reset_decoder_); + generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ + + decision_logic_->noise_fast_forward() + : 0; + *operation = decision_logic_->GetDecision( + *sync_buffer_, *expand_, decoder_frame_length_, header, last_mode_, + *play_dtmf, generated_noise_samples, &reset_decoder_); // Check if we already have enough samples in the |sync_buffer_|. If so, // change decision to normal, unless the decision was merge, accelerate, or @@ -1151,15 +1171,19 @@ int NetEqImpl::GetDecision(Operations* operation, // TODO(hlundin): Write test for this. // Update timestamp. timestamp_ = end_timestamp; - if (decision_logic_->generated_noise_samples() > 0 && - last_mode_ != kModeDtmf) { + const uint64_t generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * + output_size_samples_ + + decision_logic_->noise_fast_forward() + : 0; + if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) { // Make a jump in timestamp due to the recently played comfort noise. uint32_t timestamp_jump = - static_cast(decision_logic_->generated_noise_samples()); + static_cast(generated_noise_samples); sync_buffer_->IncreaseEndTimestamp(timestamp_jump); timestamp_ += timestamp_jump; } - decision_logic_->set_generated_noise_samples(0); return 0; } case kAccelerate: @@ -1242,9 +1266,6 @@ int NetEqImpl::GetDecision(Operations* operation, // We are about to decode and use a non-CNG packet. decision_logic_->SetCngOff(); } - // Reset CNG timestamp as a new packet will be delivered. - // (Also if this is a CNG packet, since playedOutTS is updated.) - decision_logic_->set_generated_noise_samples(0); extracted_samples = ExtractPackets(required_samples, packet_list); if (extracted_samples < 0) { @@ -1577,6 +1598,12 @@ int NetEqImpl::DoExpand(bool play_dtmf) { if (!play_dtmf) { dtmf_tone_generator_->Reset(); } + + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + return 0; } diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h index a707f2565c..2203de1a39 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h @@ -405,6 +405,8 @@ class NetEqImpl : public webrtc::NetEq { bool nack_enabled_ GUARDED_BY(crit_sect_); AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) = AudioFrame::kVadPassive; + std::unique_ptr generated_noise_stopwatch_ + GUARDED_BY(crit_sect_); private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);