From 97c594fafe3991ee0b4307516f37320ddd6ebc72 Mon Sep 17 00:00:00 2001
From: Harald Alvestrand <hta@webrtc.org>
Date: Thu, 12 Sep 2024 14:02:03 +0000
Subject: [PATCH] Add field trial for late PT allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note: Does not include code for the actual late allocation
of PTs.

Bug: webrtc:360058654
Change-Id: Iaa6bd2db2f974aad84fe1ae9c1aca5aea5d1d25e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/362320
Reviewed-by: Florent Castelli <orphis@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Commit-Queue: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43014}
---
 experiments/field_trials.py                  |   3 +
 media/engine/webrtc_voice_engine.cc          |  85 ++++++++-----
 media/engine/webrtc_voice_engine.h           |   1 +
 media/engine/webrtc_voice_engine_unittest.cc | 122 ++++++++++++++++++-
 4 files changed, 177 insertions(+), 34 deletions(-)
diff --git a/experiments/field_trials.py b/experiments/field_trials.py
index ebcace03e2..c4455015a0 100755
--- a/experiments/field_trials.py
+++ b/experiments/field_trials.py
@@ -155,6 +155,9 @@ ACTIVE_FIELD_TRIALS: FrozenSet[FieldTrial] = frozenset([
     FieldTrial('WebRTC-TaskQueue-ReplaceLibeventWithStdlib',
                42224654,
                date(2024, 4, 1)),
+    FieldTrial('WebRTC-PayloadTypesInTransport',
+               360058654,
+               date(2025, 9, 11)),
     FieldTrial('WebRTC-UseNtpTimeAbsoluteSendTime',
                42226305,
                date(2024, 9, 1)),
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index 93533dad58..70e19fed00 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -365,9 +365,9 @@ webrtc::SdpAudioFormat AudioCodecToSdpAudioFormat(const Codec& ac) {
 // payload type picker with a normal set of PTs.
 // TODO: https://issues.webrtc.org/360058654 - remove.
 std::vector<Codec> LegacyCollectCodecs(
-    const std::vector<webrtc::AudioCodecSpec>& specs) {
-  // Once payload assignment is done in the channel not in the factory,
-  // this needs to be a variable. But then this function should go away.
+    const std::vector<webrtc::AudioCodecSpec>& specs,
+    bool allocate_pt) {
+  // Only used for the legacy "allocate_pt = true" case.
   webrtc::PayloadTypePicker pt_mapper;
   std::vector<Codec> out;
 
@@ -380,41 +380,51 @@ std::vector<Codec> LegacyCollectCodecs(
 
   for (const auto& spec : specs) {
     cricket::Codec codec = CreateAudioCodec(spec.format);
-    auto pt_or_error = pt_mapper.SuggestMapping(codec, nullptr);
-    // We need to do some extra stuff before adding the main codecs to out.
-    if (pt_or_error.ok()) {
+    if (allocate_pt) {
+      auto pt_or_error = pt_mapper.SuggestMapping(codec, nullptr);
+      // We need to do some extra stuff before adding the main codecs to out.
+      if (!pt_or_error.ok()) {
+        continue;
+      }
       codec.id = pt_or_error.value();
-      if (spec.info.supports_network_adaption) {
-        codec.AddFeedbackParam(
-            FeedbackParam(kRtcpFbParamTransportCc, kParamValueEmpty));
+    }
+    if (spec.info.supports_network_adaption) {
+      codec.AddFeedbackParam(
+          FeedbackParam(kRtcpFbParamTransportCc, kParamValueEmpty));
+    }
+
+    if (spec.info.allow_comfort_noise) {
+      // Generate a CN entry if the decoder allows it and we support the
+      // clockrate.
+      auto cn = generate_cn.find(spec.format.clockrate_hz);
+      if (cn != generate_cn.end()) {
+        cn->second = true;
       }
+    }
 
-      if (spec.info.allow_comfort_noise) {
-        // Generate a CN entry if the decoder allows it and we support the
-        // clockrate.
-        auto cn = generate_cn.find(spec.format.clockrate_hz);
-        if (cn != generate_cn.end()) {
-          cn->second = true;
-        }
-      }
+    // Generate a telephone-event entry if we support the clockrate.
+    auto dtmf = generate_dtmf.find(spec.format.clockrate_hz);
+    if (dtmf != generate_dtmf.end()) {
+      dtmf->second = true;
+    }
 
-      // Generate a telephone-event entry if we support the clockrate.
-      auto dtmf = generate_dtmf.find(spec.format.clockrate_hz);
-      if (dtmf != generate_dtmf.end()) {
-        dtmf->second = true;
-      }
+    out.push_back(codec);
 
-      out.push_back(codec);
-
-      // TODO(hta):  Don't assign RED codecs until we know that the PT for Opus
-      // is final
-      if (codec.name == kOpusCodecName) {
+    // TODO(hta):  Don't assign RED codecs until we know that the PT for Opus
+    // is final
+    if (codec.name == kOpusCodecName) {
+      if (allocate_pt) {
         std::string red_fmtp =
             rtc::ToString(codec.id) + "/" + rtc::ToString(codec.id);
         cricket::Codec red_codec =
             CreateAudioCodec({kRedCodecName, 48000, 2, {{"", red_fmtp}}});
         red_codec.id = pt_mapper.SuggestMapping(red_codec, nullptr).value();
         out.push_back(red_codec);
+      } else {
+        // We don't know the PT to put into the RED fmtp parameter yet.
+        // Leave it out.
+        cricket::Codec red_codec = CreateAudioCodec({kRedCodecName, 48000, 2});
+        out.push_back(red_codec);
       }
     }
   }
@@ -423,7 +433,9 @@ std::vector<Codec> LegacyCollectCodecs(
   for (const auto& cn : generate_cn) {
     if (cn.second) {
       cricket::Codec cn_codec = CreateAudioCodec({kCnCodecName, cn.first, 1});
-      cn_codec.id = pt_mapper.SuggestMapping(cn_codec, nullptr).value();
+      if (allocate_pt) {
+        cn_codec.id = pt_mapper.SuggestMapping(cn_codec, nullptr).value();
+      }
       out.push_back(cn_codec);
     }
   }
@@ -433,11 +445,12 @@ std::vector<Codec> LegacyCollectCodecs(
     if (dtmf.second) {
       cricket::Codec dtmf_codec =
           CreateAudioCodec({kDtmfCodecName, dtmf.first, 1});
-      dtmf_codec.id = pt_mapper.SuggestMapping(dtmf_codec, nullptr).value();
+      if (allocate_pt) {
+        dtmf_codec.id = pt_mapper.SuggestMapping(dtmf_codec, nullptr).value();
+      }
       out.push_back(dtmf_codec);
     }
   }
-
   return out;
 }
 
@@ -460,7 +473,9 @@ WebRtcVoiceEngine::WebRtcVoiceEngine(
       apm_(audio_processing),
       audio_frame_processor_(std::move(audio_frame_processor)),
       minimized_remsampling_on_mobile_trial_enabled_(
-          IsEnabled(trials, "WebRTC-Audio-MinimizeResamplingOnMobile")) {
+          IsEnabled(trials, "WebRTC-Audio-MinimizeResamplingOnMobile")),
+      payload_types_in_transport_trial_enabled_(
+          IsEnabled(trials, "WebRTC-PayloadTypesInTransport")) {
   RTC_LOG(LS_INFO) << "WebRtcVoiceEngine::WebRtcVoiceEngine";
   RTC_DCHECK(decoder_factory);
   RTC_DCHECK(encoder_factory);
@@ -492,13 +507,17 @@ void WebRtcVoiceEngine::Init() {
 
   // Load our audio codec lists.
   RTC_LOG(LS_VERBOSE) << "Supported send codecs in order of preference:";
-  send_codecs_ = LegacyCollectCodecs(encoder_factory_->GetSupportedEncoders());
+  send_codecs_ =
+      LegacyCollectCodecs(encoder_factory_->GetSupportedEncoders(),
+                          !payload_types_in_transport_trial_enabled_);
   for (const Codec& codec : send_codecs_) {
     RTC_LOG(LS_VERBOSE) << ToString(codec);
   }
 
   RTC_LOG(LS_VERBOSE) << "Supported recv codecs in order of preference:";
-  recv_codecs_ = LegacyCollectCodecs(decoder_factory_->GetSupportedDecoders());
+  recv_codecs_ =
+      LegacyCollectCodecs(decoder_factory_->GetSupportedDecoders(),
+                          !payload_types_in_transport_trial_enabled_);
   for (const Codec& codec : recv_codecs_) {
     RTC_LOG(LS_VERBOSE) << ToString(codec);
   }
diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h
index dd5b84b4d0..0fa1728a10 100644
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@@ -174,6 +174,7 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface {
   int audio_jitter_buffer_min_delay_ms_ = 0;
 
   const bool minimized_remsampling_on_mobile_trial_enabled_;
+  const bool payload_types_in_transport_trial_enabled_;
 };
 
 class WebRtcVoiceSendChannel final : public MediaChannelUtil,
diff --git a/media/engine/webrtc_voice_engine_unittest.cc b/media/engine/webrtc_voice_engine_unittest.cc
index e15690db11..de86c738aa 100644
--- a/media/engine/webrtc_voice_engine_unittest.cc
+++ b/media/engine/webrtc_voice_engine_unittest.cc
@@ -10,37 +10,69 @@
 
 #include "media/engine/webrtc_voice_engine.h"
 
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <map>
 #include <memory>
 #include <optional>
+#include <set>
+#include <string>
 #include <utility>
+#include <vector>
 
-#include "absl/memory/memory.h"
 #include "absl/strings/match.h"
+#include "api/audio/audio_processing.h"
+#include "api/audio_codecs/audio_codec_pair_id.h"
+#include "api/audio_codecs/audio_format.h"
 #include "api/audio_codecs/builtin_audio_decoder_factory.h"
 #include "api/audio_codecs/builtin_audio_encoder_factory.h"
+#include "api/audio_options.h"
+#include "api/call/audio_sink.h"
+#include "api/crypto/crypto_options.h"
 #include "api/environment/environment.h"
 #include "api/environment/environment_factory.h"
+#include "api/make_ref_counted.h"
 #include "api/media_types.h"
+#include "api/priority.h"
+#include "api/ref_count.h"
+#include "api/rtc_error.h"
+#include "api/rtp_headers.h"
 #include "api/rtp_parameters.h"
 #include "api/scoped_refptr.h"
 #include "api/task_queue/default_task_queue_factory.h"
+#include "api/transport/bitrate_settings.h"
 #include "api/transport/field_trial_based_config.h"
+#include "api/transport/rtp/rtp_source.h"
+#include "call/audio_receive_stream.h"
+#include "call/audio_send_stream.h"
+#include "call/audio_state.h"
 #include "call/call.h"
+#include "call/call_config.h"
 #include "media/base/codec.h"
 #include "media/base/fake_media_engine.h"
 #include "media/base/fake_network_interface.h"
 #include "media/base/fake_rtp.h"
 #include "media/base/media_channel.h"
+#include "media/base/media_config.h"
 #include "media/base/media_constants.h"
+#include "media/base/media_engine.h"
+#include "media/base/stream_params.h"
 #include "media/engine/fake_webrtc_call.h"
 #include "modules/audio_device/include/mock_audio_device.h"
 #include "modules/audio_mixer/audio_mixer_impl.h"
 #include "modules/audio_processing/include/mock_audio_processing.h"
+#include "modules/rtp_rtcp/include/rtp_header_extension_map.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
 #include "rtc_base/arraysize.h"
 #include "rtc_base/byte_order.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/copy_on_write_buffer.h"
+#include "rtc_base/dscp.h"
 #include "rtc_base/numerics/safe_conversions.h"
+#include "rtc_base/thread.h"
+#include "test/gmock.h"
 #include "test/gtest.h"
 #include "test/mock_audio_decoder_factory.h"
 #include "test/mock_audio_encoder_factory.h"
@@ -3994,3 +4026,91 @@ TEST(WebRtcVoiceEngineTest, CollectRecvCodecs) {
     EXPECT_GE(find_codec({"telephone-event", 48000, 1}), num_specs);
   }
 }
+
+TEST(WebRtcVoiceEngineTest, CollectRecvCodecsWithLatePtAssignment) {
+  webrtc::test::ScopedKeyValueConfig field_trials(
+      "WebRTC-PayloadTypesInTransport/Enabled/");
+
+  for (bool use_null_apm : {false, true}) {
+    std::vector<webrtc::AudioCodecSpec> specs;
+    webrtc::AudioCodecSpec spec1{{"codec1", 48000, 2, {{"param1", "value1"}}},
+                                 {48000, 2, 16000, 10000, 20000}};
+    spec1.info.allow_comfort_noise = false;
+    spec1.info.supports_network_adaption = true;
+    specs.push_back(spec1);
+    webrtc::AudioCodecSpec spec2{{"codec2", 32000, 1}, {32000, 1, 32000}};
+    spec2.info.allow_comfort_noise = false;
+    specs.push_back(spec2);
+    specs.push_back(webrtc::AudioCodecSpec{
+        {"codec3", 16000, 1, {{"param1", "value1b"}, {"param2", "value2"}}},
+        {16000, 1, 13300}});
+    specs.push_back(
+        webrtc::AudioCodecSpec{{"codec4", 8000, 1}, {8000, 1, 64000}});
+    specs.push_back(
+        webrtc::AudioCodecSpec{{"codec5", 8000, 2}, {8000, 1, 64000}});
+
+    std::unique_ptr<webrtc::TaskQueueFactory> task_queue_factory =
+        webrtc::CreateDefaultTaskQueueFactory();
+    rtc::scoped_refptr<webrtc::MockAudioEncoderFactory> unused_encoder_factory =
+        webrtc::MockAudioEncoderFactory::CreateUnusedFactory();
+    rtc::scoped_refptr<webrtc::MockAudioDecoderFactory> mock_decoder_factory =
+        rtc::make_ref_counted<webrtc::MockAudioDecoderFactory>();
+    EXPECT_CALL(*mock_decoder_factory.get(), GetSupportedDecoders())
+        .WillOnce(Return(specs));
+    rtc::scoped_refptr<webrtc::test::MockAudioDeviceModule> adm =
+        webrtc::test::MockAudioDeviceModule::CreateNice();
+
+    rtc::scoped_refptr<webrtc::AudioProcessing> apm =
+        use_null_apm ? nullptr : webrtc::AudioProcessingBuilder().Create();
+    cricket::WebRtcVoiceEngine engine(
+        task_queue_factory.get(), adm.get(), unused_encoder_factory,
+        mock_decoder_factory, nullptr, apm, nullptr, field_trials);
+    engine.Init();
+    auto codecs = engine.recv_codecs();
+    EXPECT_EQ(11u, codecs.size());
+
+    // Rather than just ASSERTing that there are enough codecs, ensure that we
+    // can check the actual values safely, to provide better test results.
+    auto get_codec = [&codecs](size_t index) -> const cricket::Codec& {
+      static const cricket::Codec missing_codec =
+          cricket::CreateAudioCodec(0, "<missing>", 0, 0);
+      if (codecs.size() > index)
+        return codecs[index];
+      return missing_codec;
+    };
+
+    // Ensure the general codecs are generated first and in order.
+    for (size_t i = 0; i != specs.size(); ++i) {
+      EXPECT_EQ(specs[i].format.name, get_codec(i).name);
+      EXPECT_EQ(specs[i].format.clockrate_hz, get_codec(i).clockrate);
+      EXPECT_EQ(specs[i].format.num_channels, get_codec(i).channels);
+      EXPECT_EQ(specs[i].format.parameters, get_codec(i).params);
+    }
+
+    // Find the index of a codec, or -1 if not found, so that we can easily
+    // check supplementary codecs are ordered after the general codecs.
+    auto find_codec = [&codecs](const webrtc::SdpAudioFormat& format) -> int {
+      for (size_t i = 0; i != codecs.size(); ++i) {
+        const cricket::Codec& codec = codecs[i];
+        if (absl::EqualsIgnoreCase(codec.name, format.name) &&
+            codec.clockrate == format.clockrate_hz &&
+            codec.channels == format.num_channels) {
+          return rtc::checked_cast<int>(i);
+        }
+      }
+      return -1;
+    };
+
+    // Ensure all supplementary codecs are generated last. Their internal
+    // ordering is not important. Without this cast, the comparison turned
+    // unsigned and, thus, failed for -1.
+    const int num_specs = static_cast<int>(specs.size());
+    EXPECT_GE(find_codec({"cn", 8000, 1}), num_specs);
+    EXPECT_GE(find_codec({"cn", 16000, 1}), num_specs);
+    EXPECT_EQ(find_codec({"cn", 32000, 1}), -1);
+    EXPECT_GE(find_codec({"telephone-event", 8000, 1}), num_specs);
+    EXPECT_GE(find_codec({"telephone-event", 16000, 1}), num_specs);
+    EXPECT_GE(find_codec({"telephone-event", 32000, 1}), num_specs);
+    EXPECT_GE(find_codec({"telephone-event", 48000, 1}), num_specs);
+  }
+}