From f6ae657b07eedf95594f76e77dfc2f67132d5bb4 Mon Sep 17 00:00:00 2001
From: Jakob Ivarsson <jakobi@webrtc.org>
Date: Mon, 5 Feb 2024 11:30:21 +0100
Subject: [PATCH] Adapt NetEq delay to received FEC (both RED and codec
 inband).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is achieved by notifing NetEq controller of all received packets
after splitting, which then does deduping so that only useful packets
are counted.

The goal is to reduce underruns when FEC is used.

The behavior is default enabled with a field trial kill-switch.

Bug: webrtc:13322
Change-Id: I2a1a78ead1a58940ef92da0d43413eda5ba1caf3
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/337440
Commit-Queue: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#41665}
---
 experiments/field_trials.py                  |  3 +
 modules/audio_coding/neteq/neteq_impl.cc     | 65 +++++++++++++++-----
 modules/audio_coding/neteq/neteq_impl.h      |  4 ++
 modules/audio_coding/neteq/neteq_unittest.cc |  9 +--
 4 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/experiments/field_trials.py b/experiments/field_trials.py
index 32ab6bf351..86066ec764 100755
--- a/experiments/field_trials.py
+++ b/experiments/field_trials.py
@@ -47,6 +47,9 @@ ACTIVE_FIELD_TRIALS: FrozenSet[FieldTrial] = frozenset([
     FieldTrial('WebRTC-Audio-GainController2',
                'webrtc:7494',
                date(2024, 4, 1)),
+    FieldTrial('WebRTC-Audio-NetEqFecDelayAdaptation',
+               'webrtc:13322',
+               date(2024, 4, 1)),
     FieldTrial('WebRTC-Audio-OpusSetSignalVoiceWithDtx',
                'webrtc:4559',
                date(2024, 4, 1)),
diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc
index e5c8bf6c08..144893b6d3 100644
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "api/audio_codecs/audio_decoder.h"
+#include "api/neteq/neteq_controller.h"
 #include "api/neteq/tick_timer.h"
 #include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "modules/audio_coding/codecs/cng/webrtc_cng.h"
@@ -50,6 +51,7 @@
 #include "rtc_base/strings/audio_format_to_string.h"
 #include "rtc_base/trace_event.h"
 #include "system_wrappers/include/clock.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
 namespace {
@@ -174,6 +176,8 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
       accelerate_factory_(std::move(deps.accelerate_factory)),
       preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
       stats_(std::move(deps.stats)),
+      enable_fec_delay_adaptation_(
+          !field_trial::IsDisabled("WebRTC-Audio-NetEqFecDelayAdaptation")),
       controller_(std::move(deps.neteq_controller)),
       last_mode_(Mode::kNormal),
       decoded_buffer_length_(kMaxFrameSize),
@@ -695,6 +699,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
       packet_buffer_->Flush();
       buffer_flush_occured = true;
     }
+    NetEqController::PacketArrivedInfo info = ToPacketArrivedInfo(packet);
     int return_val = packet_buffer_->InsertPacket(std::move(packet));
     if (return_val == PacketBuffer::kFlushed) {
       buffer_flush_occured = true;
@@ -702,6 +707,15 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
       // An error occurred.
       return kOtherError;
     }
+    if (enable_fec_delay_adaptation_) {
+      info.buffer_flush = buffer_flush_occured;
+      const bool should_update_stats = !new_codec_ && !buffer_flush_occured;
+      auto relative_delay =
+          controller_->PacketArrived(fs_hz_, should_update_stats, info);
+      if (relative_delay) {
+        stats_->RelativePacketArrivalDelay(relative_delay.value());
+      }
+    }
   }
 
   if (buffer_flush_occured) {
@@ -752,24 +766,26 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
     }
   }
 
-  const DecoderDatabase::DecoderInfo* dec_info =
-      decoder_database_->GetDecoderInfo(main_payload_type);
-  RTC_DCHECK(dec_info);  // Already checked that the payload type is known.
+  if (!enable_fec_delay_adaptation_) {
+    const DecoderDatabase::DecoderInfo* dec_info =
+        decoder_database_->GetDecoderInfo(main_payload_type);
+    RTC_DCHECK(dec_info);  // Already checked that the payload type is known.
 
-  NetEqController::PacketArrivedInfo info;
-  info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf();
-  info.packet_length_samples =
-      number_of_primary_packets * decoder_frame_length_;
-  info.main_timestamp = main_timestamp;
-  info.main_sequence_number = main_sequence_number;
-  info.is_dtx = is_dtx;
-  info.buffer_flush = buffer_flush_occured;
+    NetEqController::PacketArrivedInfo info;
+    info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf();
+    info.packet_length_samples =
+        number_of_primary_packets * decoder_frame_length_;
+    info.main_timestamp = main_timestamp;
+    info.main_sequence_number = main_sequence_number;
+    info.is_dtx = is_dtx;
+    info.buffer_flush = buffer_flush_occured;
 
-  const bool should_update_stats = !new_codec_;
-  auto relative_delay =
-      controller_->PacketArrived(fs_hz_, should_update_stats, info);
-  if (relative_delay) {
-    stats_->RelativePacketArrivalDelay(relative_delay.value());
+    const bool should_update_stats = !new_codec_;
+    auto relative_delay =
+        controller_->PacketArrived(fs_hz_, should_update_stats, info);
+    if (relative_delay) {
+      stats_->RelativePacketArrivalDelay(relative_delay.value());
+    }
   }
   return 0;
 }
@@ -2150,4 +2166,21 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() {
     return OutputType::kNormalSpeech;
   }
 }
+
+NetEqController::PacketArrivedInfo NetEqImpl::ToPacketArrivedInfo(
+    const Packet& packet) const {
+  const DecoderDatabase::DecoderInfo* dec_info =
+      decoder_database_->GetDecoderInfo(packet.payload_type);
+
+  NetEqController::PacketArrivedInfo info;
+  info.is_cng_or_dtmf =
+      dec_info && (dec_info->IsComfortNoise() || dec_info->IsDtmf());
+  info.packet_length_samples =
+      packet.frame ? packet.frame->Duration() : decoder_frame_length_;
+  info.main_timestamp = packet.timestamp;
+  info.main_sequence_number = packet.sequence_number;
+  info.is_dtx = packet.frame && packet.frame->IsDtxPacket();
+  return info;
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h
index f8f2b06410..f164238b09 100644
--- a/modules/audio_coding/neteq/neteq_impl.h
+++ b/modules/audio_coding/neteq/neteq_impl.h
@@ -342,6 +342,9 @@ class NetEqImpl : public webrtc::NetEq {
   NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const
       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
+  NetEqController::PacketArrivedInfo ToPacketArrivedInfo(
+      const Packet& packet) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
   Clock* const clock_;
 
   mutable Mutex mutex_;
@@ -363,6 +366,7 @@ class NetEqImpl : public webrtc::NetEq {
   const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
       RTC_GUARDED_BY(mutex_);
   const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_);
+  const bool enable_fec_delay_adaptation_ RTC_GUARDED_BY(mutex_);
 
   std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_);
   std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_);
diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc
index aec7e580ec..7104b7a6dc 100644
--- a/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_unittest.cc
@@ -76,12 +76,13 @@ TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) {
       webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp");
 
   const std::string output_checksum =
-      "2efdbea92c3fb2383c59f89d881efec9f94001d0|"
-      "a6831b946b59913852ae3e53f99fa8f209bb23cd";
+      "434bdc4ec08546510ee903d001c8be1a01c44e24|"
+      "4336be0091e2faad7a194c16ee0a05e727325727|"
+      "cefd2de4adfa8f6a9b66a3639ad63c2f6779d0cd";
 
   const std::string network_stats_checksum =
-      "dfaf4399fd60293405290476ccf1c05c807c71a0|"
-      "076662525572dba753b11578330bd491923f7f5e";
+      "5f2c8e3dff9cff55dd7a9f4167939de001566d95|"
+      "80ab17c17da030d4f2dfbf314ac44aacdadd7f0c";
 
   DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum,
                    absl::GetFlag(FLAGS_gen_ref));