diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc index 5a5ac34c5c..f07cd42068 100644 --- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc +++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc @@ -380,7 +380,8 @@ AudioEncoderOpusImpl::AudioEncoderOpusImpl( inst_(nullptr), packet_loss_fraction_smoother_(new PacketLossFractionSmoother()), audio_network_adaptor_creator_(audio_network_adaptor_creator), - bitrate_smoother_(std::move(bitrate_smoother)) { + bitrate_smoother_(std::move(bitrate_smoother)), + consecutive_dtx_frames_(0) { RTC_DCHECK(0 <= payload_type && payload_type <= 127); // Sanity check of the redundant payload type field that we want to get rid @@ -603,14 +604,23 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl( }); input_buffer_.clear(); + bool dtx_frame = (info.encoded_bytes <= 2); + // Will use new packet size for next encoding. config_.frame_size_ms = next_frame_length_ms_; info.encoded_timestamp = first_timestamp_in_buffer_; info.payload_type = payload_type_; info.send_even_if_empty = true; // Allows Opus to send empty packets. - info.speech = (info.encoded_bytes > 0); + // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame + // coding the background noise. Avoid flagging this frame as speech + // (even though there is a probability of the frame being speech). + info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20); info.encoder_type = CodecType::kOpus; + + // Increase or reset DTX counter. + consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0); + return info; } diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.h b/modules/audio_coding/codecs/opus/audio_encoder_opus.h index 8e51dbd082..22967c4c15 100644 --- a/modules/audio_coding/codecs/opus/audio_encoder_opus.h +++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.h @@ -161,6 +161,7 @@ class AudioEncoderOpusImpl final : public AudioEncoder { rtc::Optional overhead_bytes_per_packet_; const std::unique_ptr bitrate_smoother_; rtc::Optional bitrate_smoother_last_update_time_; + int consecutive_dtx_frames_; friend struct AudioEncoderOpus; RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl); diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc index 868de8caa2..c3ad488dac 100644 --- a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc @@ -753,4 +753,66 @@ TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) { EXPECT_EQ(64000, config.bitrate_bps); } +TEST(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) { + // Create encoder with DTX enabled. + AudioEncoderOpusConfig config; + config.dtx_enabled = true; + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + + // Open file containing speech and silence. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + test::AudioLoop audio_loop; + // Use the file as if it were sampled at 48 kHz. + constexpr int kSampleRateHz = 48000; + EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz()); + constexpr size_t kMaxLoopLengthSamples = + kSampleRateHz * 10; // Max 10 second loop. + constexpr size_t kInputBlockSizeSamples = + 10 * kSampleRateHz / 1000; // 10 ms. + EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)); + + // Encode. + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + int nonspeech_frames = 0; + int max_nonspeech_frames = 0; + int dtx_frames = 0; + int max_dtx_frames = 0; + uint32_t rtp_timestamp = 0u; + for (size_t i = 0; i < 500; ++i) { + encoded.Clear(); + + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + info = + encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded); + rtp_timestamp += kInputBlockSizeSamples; + } + + // Bookkeeping of number of DTX frames. + if (info.encoded_bytes <= 2) { + ++dtx_frames; + } else { + if (dtx_frames > max_dtx_frames) + max_dtx_frames = dtx_frames; + dtx_frames = 0; + } + + // Bookkeeping of number of non-speech frames. + if (info.speech == 0) { + ++nonspeech_frames; + } else { + if (nonspeech_frames > max_nonspeech_frames) + max_nonspeech_frames = nonspeech_frames; + nonspeech_frames = 0; + } + } + + // Maximum number of consecutive non-speech packets should exceed 20. + EXPECT_GT(max_nonspeech_frames, 20); +} + } // namespace webrtc diff --git a/modules/audio_coding/test/TestVADDTX.cc b/modules/audio_coding/test/TestVADDTX.cc index 628582d55c..8064448989 100644 --- a/modules/audio_coding/test/TestVADDTX.cc +++ b/modules/audio_coding/test/TestVADDTX.cc @@ -257,6 +257,7 @@ void TestOpusDtx::Perform() { EXPECT_EQ(0, acm_send_->EnableOpusDtx()); expects[kEmptyFrame] = 1; + expects[kAudioFrameCN] = 1; Run(webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), 32000, 1, out_filename, true, expects); @@ -265,12 +266,14 @@ void TestOpusDtx::Perform() { RegisterCodec(kOpusStereo); EXPECT_EQ(0, acm_send_->DisableOpusDtx()); expects[kEmptyFrame] = 0; + expects[kAudioFrameCN] = 0; Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"), 32000, 2, out_filename, false, expects); EXPECT_EQ(0, acm_send_->EnableOpusDtx()); expects[kEmptyFrame] = 1; + expects[kAudioFrameCN] = 1; Run(webrtc::test::ResourcePath("audio_coding/teststereo32kHz", "pcm"), 32000, 2, out_filename, true, expects); #endif