diff --git a/api/audio_codecs/audio_decoder.h b/api/audio_codecs/audio_decoder.h index 557ffe2759..ce235946da 100644 --- a/api/audio_codecs/audio_decoder.h +++ b/api/audio_codecs/audio_decoder.h @@ -136,7 +136,7 @@ class AudioDecoder { // with the decoded audio on either side of the concealment. // Note: The default implementation of GeneratePlc will be deleted soon. All // implementations must provide their own, which can be a simple as a no-op. - // TODO(bugs.webrtc.org/9676): Remove default impementation. + // TODO(bugs.webrtc.org/9676): Remove default implementation. virtual void GeneratePlc(size_t requested_samples_per_channel, rtc::BufferT* concealment_audio); diff --git a/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc b/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc index daf81f2a9c..cb0a3d88f8 100644 --- a/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc +++ b/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc @@ -10,7 +10,6 @@ // Test to verify correct operation when using the decoder-internal PLC. -#include #include #include #include @@ -33,6 +32,9 @@ namespace webrtc { namespace test { namespace { +constexpr int kSampleRateHz = 32000; +constexpr int kRunTimeMs = 10000; + // This class implements a fake decoder. The decoder will read audio from a file // and present as output, both for regular decoding and for PLC. class AudioDecoderPlc : public AudioDecoder { @@ -48,7 +50,8 @@ class AudioDecoderPlc : public AudioDecoder { int sample_rate_hz, int16_t* decoded, SpeechType* speech_type) override { - RTC_CHECK_EQ(encoded_len / 2, 20 * sample_rate_hz_ / 1000); + RTC_CHECK_GE(encoded_len / 2, 10 * sample_rate_hz_ / 1000); + RTC_CHECK_LE(encoded_len / 2, 2 * 10 * sample_rate_hz_ / 1000); RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_); RTC_CHECK(decoded); RTC_CHECK(speech_type); @@ -60,17 +63,21 @@ class AudioDecoderPlc : public AudioDecoder { void GeneratePlc(size_t requested_samples_per_channel, rtc::BufferT* concealment_audio) override { + // Instead of generating random data for GeneratePlc we use the same data as + // the input, so we can check that we produce the same result independently + // of the losses. + RTC_DCHECK_EQ(requested_samples_per_channel, 10 * sample_rate_hz_ / 1000); + // Must keep a local copy of this since DecodeInternal sets it to false. const bool last_was_plc = last_was_plc_; - SpeechType speech_type; + std::vector decoded(5760); - int dec_len = DecodeInternal(nullptr, 2 * 20 * sample_rate_hz_ / 1000, + SpeechType speech_type; + int dec_len = DecodeInternal(nullptr, 2 * 10 * sample_rate_hz_ / 1000, sample_rate_hz_, decoded.data(), &speech_type); - // This fake decoder can only generate 20 ms of PLC data each time. Make - // sure the caller didn't ask for more. - RTC_CHECK_GE(dec_len, requested_samples_per_channel); concealment_audio->AppendData(decoded.data(), dec_len); concealed_samples_ += rtc::checked_cast(dec_len); + if (!last_was_plc) { ++concealment_events_; } @@ -103,11 +110,15 @@ class ZeroSampleGenerator : public EncodeNetEqInput::Generator { }; // A NetEqInput which connects to another NetEqInput, but drops a number of -// packets on the way. +// consecutive packets on the way class LossyInput : public NetEqInput { public: - LossyInput(int loss_cadence, std::unique_ptr input) - : loss_cadence_(loss_cadence), input_(std::move(input)) {} + LossyInput(int loss_cadence, + int burst_length, + std::unique_ptr input) + : loss_cadence_(loss_cadence), + burst_length_(burst_length), + input_(std::move(input)) {} absl::optional NextPacketTime() const override { return input_->NextPacketTime(); @@ -119,8 +130,12 @@ class LossyInput : public NetEqInput { std::unique_ptr PopPacket() override { if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) { - // Pop one extra packet to create the loss. - input_->PopPacket(); + // Pop `burst_length_` packets to create the loss. + auto packet_to_return = input_->PopPacket(); + for (int i = 0; i < burst_length_; i++) { + input_->PopPacket(); + } + return packet_to_return; } return input_->PopPacket(); } @@ -135,6 +150,7 @@ class LossyInput : public NetEqInput { private: const int loss_cadence_; + const int burst_length_; int count_ = 0; const std::unique_ptr input_; }; @@ -149,7 +165,14 @@ class AudioChecksumWithOutput : public AudioChecksum { std::string& output_str_; }; -NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) { +struct TestStatistics { + NetEqNetworkStatistics network; + NetEqLifetimeStatistics lifetime; +}; + +TestStatistics RunTest(int loss_cadence, + int burst_length, + std::string* checksum) { NetEq::Config config; config.for_test_no_time_stretching = true; @@ -157,20 +180,18 @@ NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) { // but the actual encoded samples will never be used by the decoder in the // test. See below about the decoder. auto generator = std::make_unique(); - constexpr int kSampleRateHz = 32000; constexpr int kPayloadType = 100; AudioEncoderPcm16B::Config encoder_config; encoder_config.sample_rate_hz = kSampleRateHz; encoder_config.payload_type = kPayloadType; auto encoder = std::make_unique(encoder_config); - constexpr int kRunTimeMs = 10000; auto input = std::make_unique( std::move(generator), std::move(encoder), kRunTimeMs); // Wrap the input in a loss function. - auto lossy_input = - std::make_unique(loss_cadence, std::move(input)); + auto lossy_input = std::make_unique(loss_cadence, burst_length, + std::move(input)); - // Settinng up decoders. + // Setting up decoders. NetEqTest::DecoderMap decoders; // Using a fake decoder which simply reads the output audio from a file. auto input_file = std::make_unique( @@ -195,24 +216,98 @@ NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) { auto lifetime_stats = neteq_test.LifetimeStats(); EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples); EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events); - - return neteq_test.SimulationStats(); + return {neteq_test.SimulationStats(), neteq_test.LifetimeStats()}; } } // namespace -TEST(NetEqDecoderPlc, Test) { +// Check that some basic metrics are produced in the right direction. In +// particular, expand_rate should only increase if there are losses present. Our +// dummy decoder is designed such as the checksum should always be the same +// regardless of the losses given that calls are executed in the right order. +TEST(NetEqDecoderPlc, BasicMetrics) { std::string checksum; - auto stats = RunTest(10, &checksum); + + // Drop 1 packet every 10 packets. + auto stats = RunTest(10, 1, &checksum); std::string checksum_no_loss; - auto stats_no_loss = RunTest(0, &checksum_no_loss); + auto stats_no_loss = RunTest(0, 0, &checksum_no_loss); EXPECT_EQ(checksum, checksum_no_loss); - EXPECT_EQ(stats.preemptive_rate, stats_no_loss.preemptive_rate); - EXPECT_EQ(stats.accelerate_rate, stats_no_loss.accelerate_rate); - EXPECT_EQ(0, stats_no_loss.expand_rate); - EXPECT_GT(stats.expand_rate, 0); + EXPECT_EQ(stats.network.preemptive_rate, + stats_no_loss.network.preemptive_rate); + EXPECT_EQ(stats.network.accelerate_rate, + stats_no_loss.network.accelerate_rate); + EXPECT_EQ(0, stats_no_loss.network.expand_rate); + EXPECT_GT(stats.network.expand_rate, 0); +} + +// Checks that interruptions are not counted in small losses but they are +// correctly counted in long interruptions. +TEST(NetEqDecoderPlc, CountInterruptions) { + std::string checksum; + std::string checksum_2; + std::string checksum_3; + + // Half of the packets lost but in short interruptions. + auto stats_no_interruptions = RunTest(1, 1, &checksum); + // One lost of 500 ms (250 packets). + auto stats_one_interruption = RunTest(200, 250, &checksum_2); + // Two losses of 250ms each (125 packets). + auto stats_two_interruptions = RunTest(125, 125, &checksum_3); + + EXPECT_EQ(checksum, checksum_2); + EXPECT_EQ(checksum, checksum_3); + EXPECT_GT(stats_no_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.interruption_count, 0); + + EXPECT_GT(stats_one_interruption.network.expand_rate, 0); + EXPECT_EQ(stats_one_interruption.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_one_interruption.lifetime.interruption_count, 1); + + EXPECT_GT(stats_two_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_two_interruptions.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_two_interruptions.lifetime.interruption_count, 2); +} + +// Checks that small losses do not produce interruptions. +TEST(NetEqDecoderPlc, NoInterruptionsInSmallLosses) { + std::string checksum_1; + std::string checksum_4; + + auto stats_1 = RunTest(300, 1, &checksum_1); + auto stats_4 = RunTest(300, 4, &checksum_4); + + EXPECT_EQ(checksum_1, checksum_4); + + EXPECT_EQ(stats_1.lifetime.interruption_count, 0); + EXPECT_EQ(stats_1.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_1.lifetime.concealed_samples, 640u); // 20ms of concealment. + EXPECT_EQ(stats_1.lifetime.concealment_events, 1u); // in just one event. + + EXPECT_EQ(stats_4.lifetime.interruption_count, 0); + EXPECT_EQ(stats_4.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_4.lifetime.concealed_samples, 2560u); // 80ms of concealment. + EXPECT_EQ(stats_4.lifetime.concealment_events, 1u); // in just one event. +} + +// Checks that interruptions of different sizes report correct duration. +TEST(NetEqDecoderPlc, InterruptionsReportCorrectSize) { + std::string checksum; + + for (int burst_length = 5; burst_length < 10; burst_length++) { + auto stats = RunTest(300, burst_length, &checksum); + auto duration = stats.lifetime.total_interruption_duration_ms; + if (burst_length < 8) { + EXPECT_EQ(duration, 0); + } else { + EXPECT_EQ(duration, burst_length * 20); + } + } } } // namespace test diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 9ec7bd5bca..d156352a26 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -1214,6 +1214,11 @@ int NetEqImpl::GetDecision(Operation* operation, } controller_->ExpandDecision(*operation); + if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) { + // Getting out of the PLC expand mode, reporting interruptions. + // NetEq PLC reports this metrics in expand.cc + stats_->EndExpandEvent(fs_hz_); + } // Check conditions for reset. if (new_codec_ || *operation == Operation::kUndefined) { @@ -2159,7 +2164,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { expand_->overlap_length()); normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, - expand_.get())); + expand_.get(), stats_.get())); accelerate_.reset( accelerate_factory_->Create(fs_hz, channels, *background_noise_)); preemptive_expand_.reset(preemptive_expand_factory_->Create( diff --git a/modules/audio_coding/neteq/normal.cc b/modules/audio_coding/neteq/normal.cc index 967deea77a..3ed0e26a75 100644 --- a/modules/audio_coding/neteq/normal.cc +++ b/modules/audio_coding/neteq/normal.cc @@ -14,7 +14,6 @@ #include // min -#include "api/audio_codecs/audio_decoder.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/neteq/audio_multi_vector.h" #include "modules/audio_coding/neteq/background_noise.h" @@ -50,6 +49,13 @@ int Normal::Process(const int16_t* input, // TODO(hlundin): Investigate this further. const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult); + // If last call resulted in a CodedPlc we don't need to do cross-fading but we + // need to report the end of the interruption once we are back to normal + // operation. + if (last_mode == NetEq::Mode::kCodecPlc) { + statistics_->EndExpandEvent(fs_hz_); + } + // Check if last RecOut call resulted in an Expand. If so, we have to take // care of some cross-fading and unmuting. if (last_mode == NetEq::Mode::kExpand) { diff --git a/modules/audio_coding/neteq/normal.h b/modules/audio_coding/neteq/normal.h index d8c13e6190..d6dc84a2d6 100644 --- a/modules/audio_coding/neteq/normal.h +++ b/modules/audio_coding/neteq/normal.h @@ -15,6 +15,7 @@ #include // Access to size_t. #include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" #include "rtc_base/checks.h" #include "rtc_base/constructor_magic.h" #include "rtc_base/numerics/safe_conversions.h" @@ -35,14 +36,16 @@ class Normal { Normal(int fs_hz, DecoderDatabase* decoder_database, const BackgroundNoise& background_noise, - Expand* expand) + Expand* expand, + StatisticsCalculator* statistics) : fs_hz_(fs_hz), decoder_database_(decoder_database), background_noise_(background_noise), expand_(expand), samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)), default_win_slope_Q14_( - rtc::dchecked_cast((1 << 14) / samples_per_ms_)) {} + rtc::dchecked_cast((1 << 14) / samples_per_ms_)), + statistics_(statistics) {} virtual ~Normal() {} @@ -64,6 +67,7 @@ class Normal { Expand* expand_; const size_t samples_per_ms_; const int16_t default_win_slope_Q14_; + StatisticsCalculator* const statistics_; RTC_DISALLOW_COPY_AND_ASSIGN(Normal); }; diff --git a/modules/audio_coding/neteq/normal_unittest.cc b/modules/audio_coding/neteq/normal_unittest.cc index 36751f8bcc..7e533bb1eb 100644 --- a/modules/audio_coding/neteq/normal_unittest.cc +++ b/modules/audio_coding/neteq/normal_unittest.cc @@ -50,7 +50,7 @@ TEST(Normal, CreateAndDestroy) { RandomVector random_vector; StatisticsCalculator statistics; Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); - Normal normal(fs, &db, bgn, &expand); + Normal normal(fs, &db, bgn, &expand, &statistics); EXPECT_CALL(db, Die()); // Called when |db| goes out of scope. } @@ -64,7 +64,7 @@ TEST(Normal, AvoidDivideByZero) { StatisticsCalculator statistics; MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); - Normal normal(fs, &db, bgn, &expand); + Normal normal(fs, &db, bgn, &expand, &statistics); int16_t input[1000] = {0}; AudioMultiVector output(channels); @@ -99,7 +99,7 @@ TEST(Normal, InputLengthAndChannelsDoNotMatch) { StatisticsCalculator statistics; MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); - Normal normal(fs, &db, bgn, &expand); + Normal normal(fs, &db, bgn, &expand, &statistics); int16_t input[1000] = {0}; AudioMultiVector output(channels); @@ -124,7 +124,7 @@ TEST(Normal, LastModeExpand120msPacket) { StatisticsCalculator statistics; MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs, kChannels); - Normal normal(kFs, &db, bgn, &expand); + Normal normal(kFs, &db, bgn, &expand, &statistics); int16_t input[kPacketsizeBytes] = {0}; AudioMultiVector output(kChannels);