NetEq: Move counting of generated CNG samples from DecisionLogic

The counting is moved to NetEqImpl, and the new counter is realized as a
Stopwatch object. The DecisionLogic class still has to maintain record
of when the CNG period is shortened, in order to reduce the delay. This
is recorded in a new noise_fast_forward_ member in DecisionLogic.

BUG=webrtc:5608

Review-Url: https://codereview.webrtc.org/1914303004
Cr-Commit-Position: refs/heads/master@{#12608}
This commit is contained in:
henrik.lundin 2016-05-03 08:18:47 -07:00 committed by Commit bot
parent b46083ed63
commit b1fb72bebb
8 changed files with 89 additions and 50 deletions

View File

@ -67,7 +67,6 @@ DecisionLogic::DecisionLogic(int fs_hz,
delay_manager_(delay_manager), delay_manager_(delay_manager),
buffer_level_filter_(buffer_level_filter), buffer_level_filter_(buffer_level_filter),
cng_state_(kCngOff), cng_state_(kCngOff),
generated_noise_samples_(0),
packet_length_samples_(0), packet_length_samples_(0),
sample_memory_(0), sample_memory_(0),
prev_time_scale_(false), prev_time_scale_(false),
@ -80,7 +79,7 @@ DecisionLogic::DecisionLogic(int fs_hz,
void DecisionLogic::Reset() { void DecisionLogic::Reset() {
cng_state_ = kCngOff; cng_state_ = kCngOff;
generated_noise_samples_ = 0; noise_fast_forward_ = 0;
packet_length_samples_ = 0; packet_length_samples_ = 0;
sample_memory_ = 0; sample_memory_ = 0;
prev_time_scale_ = false; prev_time_scale_ = false;
@ -107,15 +106,15 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
size_t decoder_frame_length, size_t decoder_frame_length,
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool* reset_decoder) { bool play_dtmf,
size_t generated_noise_samples,
bool* reset_decoder) {
if (prev_mode == kModeRfc3389Cng || if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng || prev_mode == kModeCodecInternalCng ||
prev_mode == kModeExpand) { prev_mode == kModeExpand) {
// If last mode was CNG (or Expand, since this could be covering up for // If last mode was CNG (or Expand, since this could be covering up for
// a lost CNG packet), increase the |generated_noise_samples_| counter. // a lost CNG packet), remember that CNG is on. This is needed if comfort
generated_noise_samples_ += output_size_samples_; // noise is interrupted by DTMF.
// Remember that CNG is on. This is needed if comfort noise is interrupted
// by DTMF.
if (prev_mode == kModeRfc3389Cng) { if (prev_mode == kModeRfc3389Cng) {
cng_state_ = kCngRfc3389On; cng_state_ = kCngRfc3389On;
} else if (prev_mode == kModeCodecInternalCng) { } else if (prev_mode == kModeCodecInternalCng) {
@ -139,7 +138,7 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length, return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
packet_header, prev_mode, play_dtmf, packet_header, prev_mode, play_dtmf,
reset_decoder); reset_decoder, generated_noise_samples);
} }
void DecisionLogic::ExpandDecision(Operations operation) { void DecisionLogic::ExpandDecision(Operations operation) {

View File

@ -79,6 +79,7 @@ class DecisionLogic {
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
size_t generated_noise_samples,
bool* reset_decoder); bool* reset_decoder);
// These methods test the |cng_state_| for different conditions. // These methods test the |cng_state_| for different conditions.
@ -101,10 +102,7 @@ class DecisionLogic {
// Accessors and mutators. // Accessors and mutators.
void set_sample_memory(int32_t value) { sample_memory_ = value; } void set_sample_memory(int32_t value) { sample_memory_ = value; }
size_t generated_noise_samples() const { return generated_noise_samples_; } size_t noise_fast_forward() const { return noise_fast_forward_; }
void set_generated_noise_samples(size_t value) {
generated_noise_samples_ = value;
}
size_t packet_length_samples() const { return packet_length_samples_; } size_t packet_length_samples() const { return packet_length_samples_; }
void set_packet_length_samples(size_t value) { void set_packet_length_samples(size_t value) {
packet_length_samples_ = value; packet_length_samples_ = value;
@ -138,7 +136,8 @@ class DecisionLogic {
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
bool* reset_decoder) = 0; bool* reset_decoder,
size_t generated_noise_samples) = 0;
// Updates the |buffer_level_filter_| with the current buffer level // Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|. // |buffer_size_packets|.
@ -152,7 +151,7 @@ class DecisionLogic {
size_t output_size_samples_; size_t output_size_samples_;
CngState cng_state_; // Remember if comfort noise is interrupted by other CngState cng_state_; // Remember if comfort noise is interrupted by other
// event (e.g., DTMF). // event (e.g., DTMF).
size_t generated_noise_samples_; size_t noise_fast_forward_ = 0;
size_t packet_length_samples_; size_t packet_length_samples_;
int sample_memory_; int sample_memory_;
bool prev_time_scale_; bool prev_time_scale_;

View File

@ -26,7 +26,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
bool* reset_decoder) { bool* reset_decoder,
size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff); assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp(); uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0; uint32_t available_timestamp = 0;
@ -37,7 +38,7 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
decoder_database_->IsComfortNoise(packet_header->payloadType); decoder_database_->IsComfortNoise(packet_header->payloadType);
} }
if (is_cng_packet) { if (is_cng_packet) {
if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp) if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) { - available_timestamp) >= 0) {
// Time to play this packet now. // Time to play this packet now.
return kRfc3389Cng; return kRfc3389Cng;
@ -70,13 +71,13 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
} else if (target_timestamp == available_timestamp) { } else if (target_timestamp == available_timestamp) {
return kNormal; return kNormal;
} else { } else {
if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp) if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) { - available_timestamp) >= 0) {
return kNormal; return kNormal;
} else { } else {
// If currently playing comfort noise, continue with that. Do not // If currently playing comfort noise, continue with that. Do not
// increase the timestamp counter since generated_noise_samples_ will // increase the timestamp counter since generated_noise_stopwatch_ in
// be increased. // NetEqImpl will take care of the time-keeping.
if (cng_state_ == kCngRfc3389On) { if (cng_state_ == kCngRfc3389On) {
return kRfc3389CngNoPacket; return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) { } else if (cng_state_ == kCngInternalOn) {

View File

@ -50,7 +50,8 @@ class DecisionLogicFax : public DecisionLogic {
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
bool* reset_decoder) override; bool* reset_decoder,
size_t generated_noise_samples) override;
private: private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax); RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);

View File

@ -31,7 +31,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
bool* reset_decoder) { bool* reset_decoder,
size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming); assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode. // Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) { if (prev_mode == kModeError) {
@ -52,7 +53,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
} }
if (is_cng_packet) { if (is_cng_packet) {
return CngOperation(prev_mode, target_timestamp, available_timestamp); return CngOperation(prev_mode, target_timestamp, available_timestamp,
generated_noise_samples);
} }
// Handle the case with no packet at all available (except maybe DTMF). // Handle the case with no packet at all available (except maybe DTMF).
@ -76,7 +78,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
available_timestamp, target_timestamp, five_seconds_samples)) { available_timestamp, target_timestamp, five_seconds_samples)) {
return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length, return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
prev_mode, target_timestamp, prev_mode, target_timestamp,
available_timestamp, play_dtmf); available_timestamp, play_dtmf,
generated_noise_samples);
} else { } else {
// This implies that available_timestamp < target_timestamp, which can // This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset. // happen when a new stream or codec is received. Signal for a reset.
@ -86,10 +89,11 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
Operations DecisionLogicNormal::CngOperation(Modes prev_mode, Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
uint32_t target_timestamp, uint32_t target_timestamp,
uint32_t available_timestamp) { uint32_t available_timestamp,
size_t generated_noise_samples) {
// Signed difference between target and available timestamp. // Signed difference between target and available timestamp.
int32_t timestamp_diff = static_cast<int32_t>( int32_t timestamp_diff = static_cast<int32_t>(
static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) - static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
available_timestamp); available_timestamp);
int32_t optimal_level_samp = static_cast<int32_t>( int32_t optimal_level_samp = static_cast<int32_t>(
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8); (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
@ -97,9 +101,9 @@ Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
if (excess_waiting_time_samp > optimal_level_samp / 2) { if (excess_waiting_time_samp > optimal_level_samp / 2) {
// The waiting time for this packet will be longer than 1.5 // The waiting time for this packet will be longer than 1.5
// times the wanted buffer delay. Advance the clock to cut // times the wanted buffer delay. Apply fast-forward to cut the
// waiting time down to the optimal. // waiting time down to the optimal.
generated_noise_samples_ += excess_waiting_time_samp; noise_fast_forward_ += excess_waiting_time_samp;
timestamp_diff += excess_waiting_time_samp; timestamp_diff += excess_waiting_time_samp;
} }
@ -109,6 +113,7 @@ Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
return kRfc3389CngNoPacket; return kRfc3389CngNoPacket;
} else { } else {
// Otherwise, go for the CNG packet now. // Otherwise, go for the CNG packet now.
noise_fast_forward_ = 0;
return kRfc3389Cng; return kRfc3389Cng;
} }
} }
@ -153,7 +158,8 @@ Operations DecisionLogicNormal::FuturePacketAvailable(
Modes prev_mode, Modes prev_mode,
uint32_t target_timestamp, uint32_t target_timestamp,
uint32_t available_timestamp, uint32_t available_timestamp,
bool play_dtmf) { bool play_dtmf,
size_t generated_noise_samples) {
// Required packet is not available, but a future packet is. // Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet // Check if we should continue with an ongoing expand because the new packet
// is too far into the future. // is too far into the future.
@ -184,7 +190,7 @@ Operations DecisionLogicNormal::FuturePacketAvailable(
// safety precaution), but make sure that the number of samples in buffer // safety precaution), but make sure that the number of samples in buffer
// is no higher than 4 times the optimal level. (Note that TargetLevel() // is no higher than 4 times the optimal level. (Note that TargetLevel()
// is in Q8.) // is in Q8.)
if (static_cast<uint32_t>(generated_noise_samples_ + target_timestamp) >= if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
available_timestamp || available_timestamp ||
cur_size_samples > cur_size_samples >
((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) * ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *

View File

@ -54,7 +54,8 @@ class DecisionLogicNormal : public DecisionLogic {
const RTPHeader* packet_header, const RTPHeader* packet_header,
Modes prev_mode, Modes prev_mode,
bool play_dtmf, bool play_dtmf,
bool* reset_decoder) override; bool* reset_decoder,
size_t generated_noise_samples) override;
// Returns the operation to do given that the expected packet is not // Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand. // available, but a packet further into the future is at hand.
@ -65,7 +66,8 @@ class DecisionLogicNormal : public DecisionLogic {
Modes prev_mode, Modes prev_mode,
uint32_t target_timestamp, uint32_t target_timestamp,
uint32_t available_timestamp, uint32_t available_timestamp,
bool play_dtmf); bool play_dtmf,
size_t generated_noise_samples);
// Returns the operation to do given that the expected packet is available. // Returns the operation to do given that the expected packet is available.
virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
@ -77,8 +79,10 @@ class DecisionLogicNormal : public DecisionLogic {
private: private:
// Returns the operation given that the next available packet is a comfort // Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal). // noise payload (RFC 3389 only, not codec-internal).
Operations CngOperation(Modes prev_mode, uint32_t target_timestamp, Operations CngOperation(Modes prev_mode,
uint32_t available_timestamp); uint32_t target_timestamp,
uint32_t available_timestamp,
size_t generated_noise_samples);
// Checks if enough time has elapsed since the last successful timescale // Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand). // operation was done (i.e., accelerate or preemptive expand).

View File

@ -833,6 +833,11 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) {
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type, vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
sid_frame_available, fs_hz_); sid_frame_available, fs_hz_);
if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) {
// Start a new stopwatch since we are decoding a new CNG packet.
generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
}
algorithm_buffer_->Clear(); algorithm_buffer_->Clear();
switch (operation) { switch (operation) {
case kNormal: { case kNormal: {
@ -1006,6 +1011,12 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) {
: timestamp_scaler_->ToExternal(playout_timestamp_) - : timestamp_scaler_->ToExternal(playout_timestamp_) -
static_cast<uint32_t>(audio_frame->samples_per_channel_); static_cast<uint32_t>(audio_frame->samples_per_channel_);
if (!(last_mode_ == kModeRfc3389Cng ||
last_mode_ == kModeCodecInternalCng ||
last_mode_ == kModeExpand)) {
generated_noise_stopwatch_.reset();
}
if (decode_return_value) return decode_return_value; if (decode_return_value) return decode_return_value;
return return_value; return return_value;
} }
@ -1029,14 +1040,22 @@ int NetEqImpl::GetDecision(Operations* operation,
} }
const RTPHeader* header = packet_buffer_->NextRtpHeader(); const RTPHeader* header = packet_buffer_->NextRtpHeader();
RTC_DCHECK(!generated_noise_stopwatch_ ||
generated_noise_stopwatch_->ElapsedTicks() >= 1);
uint64_t generated_noise_samples =
generated_noise_stopwatch_
? (generated_noise_stopwatch_->ElapsedTicks() - 1) *
output_size_samples_ +
decision_logic_->noise_fast_forward()
: 0;
if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) { if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
// Because of timestamp peculiarities, we have to "manually" disallow using // Because of timestamp peculiarities, we have to "manually" disallow using
// a CNG packet with the same timestamp as the one that was last played. // a CNG packet with the same timestamp as the one that was last played.
// This can happen when using redundancy and will cause the timing to shift. // This can happen when using redundancy and will cause the timing to shift.
while (header && decoder_database_->IsComfortNoise(header->payloadType) && while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
(end_timestamp >= header->timestamp || (end_timestamp >= header->timestamp ||
end_timestamp + decision_logic_->generated_noise_samples() > end_timestamp + generated_noise_samples > header->timestamp)) {
header->timestamp)) {
// Don't use this packet, discard it. // Don't use this packet, discard it.
if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
assert(false); // Must be ok by design. assert(false); // Must be ok by design.
@ -1064,7 +1083,7 @@ int NetEqImpl::GetDecision(Operations* operation,
// Check if it is time to play a DTMF event. // Check if it is time to play a DTMF event.
if (dtmf_buffer_->GetEvent( if (dtmf_buffer_->GetEvent(
static_cast<uint32_t>( static_cast<uint32_t>(
end_timestamp + decision_logic_->generated_noise_samples()), end_timestamp + generated_noise_samples),
dtmf_event)) { dtmf_event)) {
*play_dtmf = true; *play_dtmf = true;
} }
@ -1072,13 +1091,14 @@ int NetEqImpl::GetDecision(Operations* operation,
// Get instruction. // Get instruction.
assert(sync_buffer_.get()); assert(sync_buffer_.get());
assert(expand_.get()); assert(expand_.get());
*operation = decision_logic_->GetDecision(*sync_buffer_, generated_noise_samples =
*expand_, generated_noise_stopwatch_
decoder_frame_length_, ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
header, decision_logic_->noise_fast_forward()
last_mode_, : 0;
*play_dtmf, *operation = decision_logic_->GetDecision(
&reset_decoder_); *sync_buffer_, *expand_, decoder_frame_length_, header, last_mode_,
*play_dtmf, generated_noise_samples, &reset_decoder_);
// Check if we already have enough samples in the |sync_buffer_|. If so, // Check if we already have enough samples in the |sync_buffer_|. If so,
// change decision to normal, unless the decision was merge, accelerate, or // change decision to normal, unless the decision was merge, accelerate, or
@ -1151,15 +1171,19 @@ int NetEqImpl::GetDecision(Operations* operation,
// TODO(hlundin): Write test for this. // TODO(hlundin): Write test for this.
// Update timestamp. // Update timestamp.
timestamp_ = end_timestamp; timestamp_ = end_timestamp;
if (decision_logic_->generated_noise_samples() > 0 && const uint64_t generated_noise_samples =
last_mode_ != kModeDtmf) { generated_noise_stopwatch_
? generated_noise_stopwatch_->ElapsedTicks() *
output_size_samples_ +
decision_logic_->noise_fast_forward()
: 0;
if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) {
// Make a jump in timestamp due to the recently played comfort noise. // Make a jump in timestamp due to the recently played comfort noise.
uint32_t timestamp_jump = uint32_t timestamp_jump =
static_cast<uint32_t>(decision_logic_->generated_noise_samples()); static_cast<uint32_t>(generated_noise_samples);
sync_buffer_->IncreaseEndTimestamp(timestamp_jump); sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
timestamp_ += timestamp_jump; timestamp_ += timestamp_jump;
} }
decision_logic_->set_generated_noise_samples(0);
return 0; return 0;
} }
case kAccelerate: case kAccelerate:
@ -1242,9 +1266,6 @@ int NetEqImpl::GetDecision(Operations* operation,
// We are about to decode and use a non-CNG packet. // We are about to decode and use a non-CNG packet.
decision_logic_->SetCngOff(); decision_logic_->SetCngOff();
} }
// Reset CNG timestamp as a new packet will be delivered.
// (Also if this is a CNG packet, since playedOutTS is updated.)
decision_logic_->set_generated_noise_samples(0);
extracted_samples = ExtractPackets(required_samples, packet_list); extracted_samples = ExtractPackets(required_samples, packet_list);
if (extracted_samples < 0) { if (extracted_samples < 0) {
@ -1577,6 +1598,12 @@ int NetEqImpl::DoExpand(bool play_dtmf) {
if (!play_dtmf) { if (!play_dtmf) {
dtmf_tone_generator_->Reset(); dtmf_tone_generator_->Reset();
} }
if (!generated_noise_stopwatch_) {
// Start a new stopwatch since we may be covering for a lost CNG packet.
generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
}
return 0; return 0;
} }

View File

@ -405,6 +405,8 @@ class NetEqImpl : public webrtc::NetEq {
bool nack_enabled_ GUARDED_BY(crit_sect_); bool nack_enabled_ GUARDED_BY(crit_sect_);
AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) = AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) =
AudioFrame::kVadPassive; AudioFrame::kVadPassive;
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
GUARDED_BY(crit_sect_);
private: private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);