Adding a new constraint to set NetEq buffer capacity from peerconnection

This change makes it possible to set a custom value for the maximum capacity of the packet buffer in NetEq (the audio jitter buffer). The default value is 50 packets, but any value can be set with the new functionality. R=jmarusic@webrtc.org, mflodman@webrtc.org, pthatcher@webrtc.org Review URL: https://webrtc-codereview.appspot.com/50869004 Cr-Commit-Position: refs/heads/master@{#9159}
2015-05-08 12:58:47 +02:00 · 2015-05-08 12:58:47 +02:00 · 208a2294cd
commit 208a2294cd
parent 83b5c053b9
16 changed files with 124 additions and 34 deletions
--- a/talk/app/webrtc/java/jni/peerconnection_jni.cc
+++ b/talk/app/webrtc/java/jni/peerconnection_jni.cc
@ -1304,6 +1304,9 @@ JOW(jlong, PeerConnectionFactory_nativeCreatePeerConnection)(
      "Ljava/util/List;");
  jobject j_ice_servers = GetObjectField(jni, j_rtc_config, j_ice_servers_id);

+  jfieldID j_audio_jitter_buffer_max_packets_id = GetFieldID(
+      jni, j_rtc_config_class, "audioJitterBufferMaxPackets",
+      "I");
  PeerConnectionInterface::RTCConfiguration rtc_config;

  rtc_config.type =
@ -1312,6 +1315,8 @@ JOW(jlong, PeerConnectionFactory_nativeCreatePeerConnection)(
  rtc_config.tcp_candidate_policy =
      JavaTcpCandidatePolicyToNativeType(jni, j_tcp_candidate_policy);
  JavaIceServersToJsepIceServers(jni, j_ice_servers, &rtc_config.servers);
+  rtc_config.audio_jitter_buffer_max_packets =
+      GetIntField(jni, j_rtc_config, j_audio_jitter_buffer_max_packets_id);

  PCOJava* observer = reinterpret_cast<PCOJava*>(observer_p);
  observer->SetConstraints(new ConstraintsWrapper(jni, j_constraints));
--- a/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java
+++ b/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java
@ -128,12 +128,14 @@ public class PeerConnection {
    public List<IceServer> iceServers;
    public BundlePolicy bundlePolicy;
    public TcpCandidatePolicy tcpCandidatePolicy;
+    public int audioJitterBufferMaxPackets;

    public RTCConfiguration(List<IceServer> iceServers) {
      iceTransportsType = IceTransportsType.ALL;
      bundlePolicy = BundlePolicy.BALANCED;
      tcpCandidatePolicy = TcpCandidatePolicy.ENABLED;
      this.iceServers = iceServers;
+      audioJitterBufferMaxPackets = 50;
    }
  };

--- a/talk/app/webrtc/peerconnection.cc
+++ b/talk/app/webrtc/peerconnection.cc
@ -382,9 +382,7 @@ bool PeerConnection::Initialize(

  // Initialize the WebRtcSession. It creates transport channels etc.
  if (!session_->Initialize(factory_->options(), constraints,
-                            dtls_identity_service,
-                            configuration.type,
-                            configuration.bundle_policy))
+                            dtls_identity_service, configuration))
    return false;

  // Register PeerConnection as receiver of local ice candidates.
--- a/talk/app/webrtc/peerconnectioninterface.h
+++ b/talk/app/webrtc/peerconnectioninterface.h
@ -211,11 +211,13 @@ class PeerConnectionInterface : public rtc::RefCountInterface {
    IceServers servers;
    BundlePolicy bundle_policy;
    TcpCandidatePolicy tcp_candidate_policy;
+    int audio_jitter_buffer_max_packets;

    RTCConfiguration()
        : type(kAll),
          bundle_policy(kBundlePolicyBalanced),
-          tcp_candidate_policy(kTcpCandidatePolicyEnabled) {}
+          tcp_candidate_policy(kTcpCandidatePolicyEnabled),
+          audio_jitter_buffer_max_packets(50) {}
  };

  struct RTCOfferAnswerOptions {
--- a/talk/app/webrtc/webrtcsession.cc
+++ b/talk/app/webrtc/webrtcsession.cc
@ -521,9 +521,8 @@ bool WebRtcSession::Initialize(
    const PeerConnectionFactoryInterface::Options& options,
    const MediaConstraintsInterface*  constraints,
    DTLSIdentityServiceInterface* dtls_identity_service,
-    PeerConnectionInterface::IceTransportsType ice_transport_type,
-    PeerConnectionInterface::BundlePolicy bundle_policy) {
-  bundle_policy_ = bundle_policy;
+    const PeerConnectionInterface::RTCConfiguration& rtc_configuration) {
+  bundle_policy_ = rtc_configuration.bundle_policy;

  // TODO(perkj): Take |constraints| into consideration. Return false if not all
  // mandatory constraints can be fulfilled. Note that |constraints|
@ -640,6 +639,9 @@ bool WebRtcSession::Initialize(
      MediaConstraintsInterface::kCombinedAudioVideoBwe,
      &audio_options_.combined_audio_video_bwe);

+  audio_options_.audio_jitter_buffer_max_packets.Set(
+      rtc_configuration.audio_jitter_buffer_max_packets);
+
  const cricket::VideoCodec default_codec(
      JsepSessionDescription::kDefaultVideoCodecId,
      JsepSessionDescription::kDefaultVideoCodecName,
@ -667,7 +669,7 @@ bool WebRtcSession::Initialize(
    webrtc_session_desc_factory_->SetSdesPolicy(cricket::SEC_DISABLED);
  }
  port_allocator()->set_candidate_filter(
-      ConvertIceTransportTypeToCandidateFilter(ice_transport_type));
+      ConvertIceTransportTypeToCandidateFilter(rtc_configuration.type));
  return true;
 }

--- a/talk/app/webrtc/webrtcsession.h
+++ b/talk/app/webrtc/webrtcsession.h
@ -117,11 +117,11 @@ class WebRtcSession : public cricket::BaseSession,
                MediaStreamSignaling* mediastream_signaling);
  virtual ~WebRtcSession();

-  bool Initialize(const PeerConnectionFactoryInterface::Options& options,
-                  const MediaConstraintsInterface* constraints,
-                  DTLSIdentityServiceInterface* dtls_identity_service,
-                  PeerConnectionInterface::IceTransportsType ice_transport_type,
-                  PeerConnectionInterface::BundlePolicy bundle_policy);
+  bool Initialize(
+      const PeerConnectionFactoryInterface::Options& options,
+      const MediaConstraintsInterface* constraints,
+      DTLSIdentityServiceInterface* dtls_identity_service,
+      const PeerConnectionInterface::RTCConfiguration& rtc_configuration);
  // Deletes the voice, video and data channel and changes the session state
  // to STATE_RECEIVEDTERMINATE.
  void Terminate();
--- a/talk/app/webrtc/webrtcsession_unittest.cc
+++ b/talk/app/webrtc/webrtcsession_unittest.cc
@ -156,6 +156,8 @@ static const char kSdpWithRtx[] =
    "a=rtpmap:96 rtx/90000\r\n"
    "a=fmtp:96 apt=0\r\n";

+static const int kAudioJitterBufferMaxPackets = 50;
+
 // Add some extra |newlines| to the |message| after |line|.
 static void InjectAfter(const std::string& line,
                        const std::string& newlines,
@ -383,8 +385,7 @@ class WebRtcSessionTest : public testing::Test {

  void Init(
      DTLSIdentityServiceInterface* identity_service,
-      PeerConnectionInterface::IceTransportsType ice_transport_type,
-      PeerConnectionInterface::BundlePolicy bundle_policy) {
+      const PeerConnectionInterface::RTCConfiguration& rtc_configuration) {
    ASSERT_TRUE(session_.get() == NULL);
    session_.reset(new WebRtcSessionForTest(
        channel_manager_.get(), rtc::Thread::Current(),
@ -398,33 +399,51 @@ class WebRtcSessionTest : public testing::Test {
        observer_.ice_gathering_state_);

    EXPECT_TRUE(session_->Initialize(options_, constraints_.get(),
-                                     identity_service, ice_transport_type,
-                                     bundle_policy));
+                                     identity_service, rtc_configuration));
    session_->set_metrics_observer(&metrics_observer_);
  }

  void Init() {
-    Init(NULL, PeerConnectionInterface::kAll,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
  }

  void InitWithIceTransport(
      PeerConnectionInterface::IceTransportsType ice_transport_type) {
-    Init(NULL, ice_transport_type,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = ice_transport_type;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
  }

  void InitWithBundlePolicy(
      PeerConnectionInterface::BundlePolicy bundle_policy) {
-    Init(NULL, PeerConnectionInterface::kAll, bundle_policy);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy = bundle_policy;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(NULL, configuration);
  }

  void InitWithDtls(bool identity_request_should_fail = false) {
    FakeIdentityService* identity_service = new FakeIdentityService();
    identity_service->set_should_fail(identity_request_should_fail);
-    Init(identity_service,
-         PeerConnectionInterface::kAll,
-         PeerConnectionInterface::kBundlePolicyBalanced);
+    PeerConnectionInterface::RTCConfiguration configuration;
+    configuration.type = PeerConnectionInterface::kAll;
+    configuration.bundle_policy =
+        PeerConnectionInterface::kBundlePolicyBalanced;
+    configuration.audio_jitter_buffer_max_packets =
+        kAudioJitterBufferMaxPackets;
+    Init(identity_service, configuration);
  }

  void InitWithDtmfCodec() {
--- a/talk/media/base/mediachannel.h
+++ b/talk/media/base/mediachannel.h
@ -150,6 +150,8 @@ struct AudioOptions {
    noise_suppression.SetFrom(change.noise_suppression);
    highpass_filter.SetFrom(change.highpass_filter);
    stereo_swapping.SetFrom(change.stereo_swapping);
+    audio_jitter_buffer_max_packets.SetFrom(
+        change.audio_jitter_buffer_max_packets);
    typing_detection.SetFrom(change.typing_detection);
    aecm_generate_comfort_noise.SetFrom(change.aecm_generate_comfort_noise);
    conference_mode.SetFrom(change.conference_mode);
@ -180,6 +182,7 @@ struct AudioOptions {
        noise_suppression == o.noise_suppression &&
        highpass_filter == o.highpass_filter &&
        stereo_swapping == o.stereo_swapping &&
+        audio_jitter_buffer_max_packets == o.audio_jitter_buffer_max_packets &&
        typing_detection == o.typing_detection &&
        aecm_generate_comfort_noise == o.aecm_generate_comfort_noise &&
        conference_mode == o.conference_mode &&
@ -210,6 +213,8 @@ struct AudioOptions {
    ost << ToStringIfSet("ns", noise_suppression);
    ost << ToStringIfSet("hf", highpass_filter);
    ost << ToStringIfSet("swap", stereo_swapping);
+    ost << ToStringIfSet("audio_jitter_buffer_max_packets",
+                         audio_jitter_buffer_max_packets);
    ost << ToStringIfSet("typing", typing_detection);
    ost << ToStringIfSet("comfort_noise", aecm_generate_comfort_noise);
    ost << ToStringIfSet("conference", conference_mode);
@ -248,6 +253,8 @@ struct AudioOptions {
  Settable<bool> highpass_filter;
  // Audio processing to swap the left and right channels.
  Settable<bool> stereo_swapping;
+  // Audio receiver jitter buffer (NetEq) max capacity in number of packets.
+  Settable<int> audio_jitter_buffer_max_packets;
  // Audio processing to detect typing.
  Settable<bool> typing_detection;
  Settable<bool> aecm_generate_comfort_noise;
--- a/talk/media/webrtc/fakewebrtcvoiceengine.h
+++ b/talk/media/webrtc/fakewebrtcvoiceengine.h
@ -41,6 +41,7 @@
 #include "webrtc/base/checks.h"
 #include "webrtc/base/gunit.h"
 #include "webrtc/base/stringutils.h"
+#include "webrtc/config.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"

 namespace cricket {
@ -213,7 +214,8 @@ class FakeWebRtcVoiceEngine
          send_audio_level_ext_(-1),
          receive_audio_level_ext_(-1),
          send_absolute_sender_time_ext_(-1),
-          receive_absolute_sender_time_ext_(-1) {
+          receive_absolute_sender_time_ext_(-1),
+          neteq_capacity(-1) {
      memset(&send_codec, 0, sizeof(send_codec));
      memset(&rx_agc_config, 0, sizeof(rx_agc_config));
    }
@ -249,6 +251,7 @@ class FakeWebRtcVoiceEngine
    webrtc::CodecInst send_codec;
    webrtc::PacketTime last_rtp_packet_time;
    std::list<std::string> packets;
+    int neteq_capacity;
  };

  FakeWebRtcVoiceEngine(const cricket::AudioCodec* const* codecs,
@ -391,7 +394,7 @@ class FakeWebRtcVoiceEngine
                                true);
    }
  }
-  int AddChannel() {
+  int AddChannel(const webrtc::Config& config) {
    if (fail_create_channel_) {
      return -1;
    }
@ -401,6 +404,9 @@ class FakeWebRtcVoiceEngine
      GetCodec(i, codec);
      ch->recv_codecs.push_back(codec);
    }
+    if (config.Get<webrtc::NetEqCapacityConfig>().enabled) {
+      ch->neteq_capacity = config.Get<webrtc::NetEqCapacityConfig>().capacity;
+    }
    channels_[++last_channel_] = ch;
    return last_channel_;
  }
@ -447,10 +453,11 @@ class FakeWebRtcVoiceEngine
    return &audio_processing_;
  }
  WEBRTC_FUNC(CreateChannel, ()) {
-    return AddChannel();
+    webrtc::Config empty_config;
+    return AddChannel(empty_config);
  }
-  WEBRTC_FUNC(CreateChannel, (const webrtc::Config& /*config*/)) {
-    return AddChannel();
+  WEBRTC_FUNC(CreateChannel, (const webrtc::Config& config)) {
+    return AddChannel(config);
  }
  WEBRTC_FUNC(DeleteChannel, (int channel)) {
    WEBRTC_CHECK_CHANNEL(channel);
@ -1243,6 +1250,11 @@ class FakeWebRtcVoiceEngine
  WEBRTC_STUB(GetAudioFrame, (int channel, int desired_sample_rate_hz,
                              webrtc::AudioFrame* frame));
  WEBRTC_STUB(SetExternalMixing, (int channel, bool enable));
+  int GetNetEqCapacity() const {
+    auto ch = channels_.find(last_channel_);
+    ASSERT(ch != channels_.end());
+    return ch->second->neteq_capacity;
+  }

 private:
  int GetNumDevices(int& num) {
--- a/talk/media/webrtc/webrtcvoiceengine.cc
+++ b/talk/media/webrtc/webrtcvoiceengine.cc
@ -353,6 +353,7 @@ static AudioOptions GetDefaultEngineOptions() {
  options.noise_suppression.Set(true);
  options.highpass_filter.Set(true);
  options.stereo_swapping.Set(false);
+  options.audio_jitter_buffer_max_packets.Set(50);
  options.typing_detection.Set(true);
  options.conference_mode.Set(false);
  options.adjust_agc_delta.Set(0);
@ -955,6 +956,14 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
    }
  }

+  int audio_jitter_buffer_max_packets;
+  if (options.audio_jitter_buffer_max_packets.Get(
+          &audio_jitter_buffer_max_packets)) {
+    LOG(LS_INFO) << "NetEq capacity is " << audio_jitter_buffer_max_packets;
+    voe_config_.Set<webrtc::NetEqCapacityConfig>(
+        new webrtc::NetEqCapacityConfig(audio_jitter_buffer_max_packets));
+  }
+
  bool typing_detection;
  if (options.typing_detection.Get(&typing_detection)) {
    LOG(LS_INFO) << "Typing detection is enabled? " << typing_detection;
--- a/talk/media/webrtc/webrtcvoiceengine.h
+++ b/talk/media/webrtc/webrtcvoiceengine.h
@ -46,6 +46,7 @@
 #include "webrtc/base/thread_checker.h"
 #include "webrtc/call.h"
 #include "webrtc/common.h"
+#include "webrtc/config.h"

 #if !defined(LIBPEERCONNECTION_LIB) && \
    !defined(LIBPEERCONNECTION_IMPLEMENTATION)
--- a/talk/media/webrtc/webrtcvoiceengine_unittest.cc
+++ b/talk/media/webrtc/webrtcvoiceengine_unittest.cc
@ -2882,6 +2882,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetAudioOptions) {
  EXPECT_TRUE(typing_detection_enabled);
  EXPECT_EQ(ec_mode, webrtc::kEcConference);
  EXPECT_EQ(ns_mode, webrtc::kNsHighSuppression);
+  EXPECT_EQ(50, voe_.GetNetEqCapacity());  // From GetDefaultEngineOptions().

  // Turn echo cancellation off
  options.echo_cancellation.Set(false);
--- a/webrtc/config.h
+++ b/webrtc/config.h
@ -113,6 +113,20 @@ struct VideoEncoderConfig {
  int min_transmit_bitrate_bps;
 };

+// Controls the capacity of the packet buffer in NetEq. The capacity is the
+// maximum number of packets that the buffer can contain. If the limit is
+// exceeded, the buffer will be flushed. The capacity does not affect the actual
+// audio delay in the general case, since this is governed by the target buffer
+// level (calculated from the jitter profile). It is only in the rare case of
+// severe network freezes that a higher capacity will lead to a (transient)
+// increase in audio delay.
+struct NetEqCapacityConfig {
+  NetEqCapacityConfig() : enabled(false), capacity(0) {}
+  explicit NetEqCapacityConfig(int value) : enabled(true), capacity(value) {}
+  bool enabled;
+  int capacity;
+};
+
 }  // namespace webrtc

 #endif  // WEBRTC_CONFIG_H_
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc
@ -10,6 +10,7 @@

 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"

+#include "webrtc/base/checks.h"
 #include "webrtc/common_types.h"
 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
 #include "webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.h"
@ -20,13 +21,20 @@ namespace webrtc {

 // Create module
 AudioCodingModule* AudioCodingModule::Create(int id) {
-  return Create(id, Clock::GetRealTimeClock());
+  Config config;
+  config.id = id;
+  config.clock = Clock::GetRealTimeClock();
+  return Create(config);
 }

 AudioCodingModule* AudioCodingModule::Create(int id, Clock* clock) {
-  AudioCodingModule::Config config;
+  Config config;
  config.id = id;
  config.clock = clock;
+  return Create(config);
+}
+
+AudioCodingModule* AudioCodingModule::Create(const Config& config) {
  return new acm2::AudioCodingModuleImpl(config);
 }

--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@ -99,6 +99,7 @@ class AudioCodingModule {
  //
  static AudioCodingModule* Create(int id);
  static AudioCodingModule* Create(int id, Clock* clock);
+  static AudioCodingModule* Create(const Config& config);
  virtual ~AudioCodingModule() {};

  ///////////////////////////////////////////////////////////////////////////
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -10,9 +10,11 @@

 #include "webrtc/voice_engine/channel.h"

+#include "webrtc/base/checks.h"
 #include "webrtc/base/format_macros.h"
 #include "webrtc/base/timeutils.h"
 #include "webrtc/common.h"
+#include "webrtc/config.h"
 #include "webrtc/modules/audio_device/include/audio_device.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
 #include "webrtc/modules/interface/module_common_types.h"
@ -757,8 +759,6 @@ Channel::Channel(int32_t channelId,
        VoEModuleId(instanceId, channelId), Clock::GetRealTimeClock(), this,
        this, this, rtp_payload_registry_.get())),
    telephone_event_handler_(rtp_receiver_->GetTelephoneEventHandler()),
-    audio_coding_(AudioCodingModule::Create(
-        VoEModuleId(instanceId, channelId))),
    _rtpDumpIn(*RtpDump::CreateRtpDump()),
    _rtpDumpOut(*RtpDump::CreateRtpDump()),
    _outputAudioLevel(),
@ -828,6 +828,15 @@ Channel::Channel(int32_t channelId,
 {
    WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId,_channelId),
                 "Channel::Channel() - ctor");
+    AudioCodingModule::Config acm_config;
+    acm_config.id = VoEModuleId(instanceId, channelId);
+    if (config.Get<NetEqCapacityConfig>().enabled) {
+      acm_config.neteq_config.max_packets_in_buffer =
+          config.Get<NetEqCapacityConfig>().capacity;
+      CHECK_GT(acm_config.neteq_config.max_packets_in_buffer, 0);
+    }
+    audio_coding_.reset(AudioCodingModule::Create(acm_config));
+
    _inbandDtmfQueue.ResetDtmf();
    _inbandDtmfGenerator.Init();
    _outputAudioLevel.Clear();