From 208a2294cde839025318f1b3d57559cb0611a4e7 Mon Sep 17 00:00:00 2001 From: Henrik Lundin Date: Fri, 8 May 2015 12:58:47 +0200 Subject: [PATCH] Adding a new constraint to set NetEq buffer capacity from peerconnection This change makes it possible to set a custom value for the maximum capacity of the packet buffer in NetEq (the audio jitter buffer). The default value is 50 packets, but any value can be set with the new functionality. R=jmarusic@webrtc.org, mflodman@webrtc.org, pthatcher@webrtc.org Review URL: https://webrtc-codereview.appspot.com/50869004 Cr-Commit-Position: refs/heads/master@{#9159} --- .../app/webrtc/java/jni/peerconnection_jni.cc | 5 +++ .../java/src/org/webrtc/PeerConnection.java | 2 + talk/app/webrtc/peerconnection.cc | 4 +- talk/app/webrtc/peerconnectioninterface.h | 4 +- talk/app/webrtc/webrtcsession.cc | 10 +++-- talk/app/webrtc/webrtcsession.h | 10 ++--- talk/app/webrtc/webrtcsession_unittest.cc | 43 +++++++++++++------ talk/media/base/mediachannel.h | 7 +++ talk/media/webrtc/fakewebrtcvoiceengine.h | 22 +++++++--- talk/media/webrtc/webrtcvoiceengine.cc | 9 ++++ talk/media/webrtc/webrtcvoiceengine.h | 1 + .../webrtc/webrtcvoiceengine_unittest.cc | 1 + webrtc/config.h | 14 ++++++ .../main/acm2/audio_coding_module.cc | 12 +++++- .../main/interface/audio_coding_module.h | 1 + webrtc/voice_engine/channel.cc | 13 +++++- 16 files changed, 124 insertions(+), 34 deletions(-) diff --git a/talk/app/webrtc/java/jni/peerconnection_jni.cc b/talk/app/webrtc/java/jni/peerconnection_jni.cc index 460ad7efb3..f8f363ae38 100644 --- a/talk/app/webrtc/java/jni/peerconnection_jni.cc +++ b/talk/app/webrtc/java/jni/peerconnection_jni.cc @@ -1304,6 +1304,9 @@ JOW(jlong, PeerConnectionFactory_nativeCreatePeerConnection)( "Ljava/util/List;"); jobject j_ice_servers = GetObjectField(jni, j_rtc_config, j_ice_servers_id); + jfieldID j_audio_jitter_buffer_max_packets_id = GetFieldID( + jni, j_rtc_config_class, "audioJitterBufferMaxPackets", + "I"); PeerConnectionInterface::RTCConfiguration rtc_config; rtc_config.type = @@ -1312,6 +1315,8 @@ JOW(jlong, PeerConnectionFactory_nativeCreatePeerConnection)( rtc_config.tcp_candidate_policy = JavaTcpCandidatePolicyToNativeType(jni, j_tcp_candidate_policy); JavaIceServersToJsepIceServers(jni, j_ice_servers, &rtc_config.servers); + rtc_config.audio_jitter_buffer_max_packets = + GetIntField(jni, j_rtc_config, j_audio_jitter_buffer_max_packets_id); PCOJava* observer = reinterpret_cast(observer_p); observer->SetConstraints(new ConstraintsWrapper(jni, j_constraints)); diff --git a/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java b/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java index 8fcc975cc9..80e7bfe6c9 100644 --- a/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java +++ b/talk/app/webrtc/java/src/org/webrtc/PeerConnection.java @@ -128,12 +128,14 @@ public class PeerConnection { public List iceServers; public BundlePolicy bundlePolicy; public TcpCandidatePolicy tcpCandidatePolicy; + public int audioJitterBufferMaxPackets; public RTCConfiguration(List iceServers) { iceTransportsType = IceTransportsType.ALL; bundlePolicy = BundlePolicy.BALANCED; tcpCandidatePolicy = TcpCandidatePolicy.ENABLED; this.iceServers = iceServers; + audioJitterBufferMaxPackets = 50; } }; diff --git a/talk/app/webrtc/peerconnection.cc b/talk/app/webrtc/peerconnection.cc index 39284c1289..52260413f7 100644 --- a/talk/app/webrtc/peerconnection.cc +++ b/talk/app/webrtc/peerconnection.cc @@ -382,9 +382,7 @@ bool PeerConnection::Initialize( // Initialize the WebRtcSession. It creates transport channels etc. if (!session_->Initialize(factory_->options(), constraints, - dtls_identity_service, - configuration.type, - configuration.bundle_policy)) + dtls_identity_service, configuration)) return false; // Register PeerConnection as receiver of local ice candidates. diff --git a/talk/app/webrtc/peerconnectioninterface.h b/talk/app/webrtc/peerconnectioninterface.h index ed4f5b3855..e32676e259 100644 --- a/talk/app/webrtc/peerconnectioninterface.h +++ b/talk/app/webrtc/peerconnectioninterface.h @@ -211,11 +211,13 @@ class PeerConnectionInterface : public rtc::RefCountInterface { IceServers servers; BundlePolicy bundle_policy; TcpCandidatePolicy tcp_candidate_policy; + int audio_jitter_buffer_max_packets; RTCConfiguration() : type(kAll), bundle_policy(kBundlePolicyBalanced), - tcp_candidate_policy(kTcpCandidatePolicyEnabled) {} + tcp_candidate_policy(kTcpCandidatePolicyEnabled), + audio_jitter_buffer_max_packets(50) {} }; struct RTCOfferAnswerOptions { diff --git a/talk/app/webrtc/webrtcsession.cc b/talk/app/webrtc/webrtcsession.cc index dd5d858ea7..e2a9d60f2c 100644 --- a/talk/app/webrtc/webrtcsession.cc +++ b/talk/app/webrtc/webrtcsession.cc @@ -521,9 +521,8 @@ bool WebRtcSession::Initialize( const PeerConnectionFactoryInterface::Options& options, const MediaConstraintsInterface* constraints, DTLSIdentityServiceInterface* dtls_identity_service, - PeerConnectionInterface::IceTransportsType ice_transport_type, - PeerConnectionInterface::BundlePolicy bundle_policy) { - bundle_policy_ = bundle_policy; + const PeerConnectionInterface::RTCConfiguration& rtc_configuration) { + bundle_policy_ = rtc_configuration.bundle_policy; // TODO(perkj): Take |constraints| into consideration. Return false if not all // mandatory constraints can be fulfilled. Note that |constraints| @@ -640,6 +639,9 @@ bool WebRtcSession::Initialize( MediaConstraintsInterface::kCombinedAudioVideoBwe, &audio_options_.combined_audio_video_bwe); + audio_options_.audio_jitter_buffer_max_packets.Set( + rtc_configuration.audio_jitter_buffer_max_packets); + const cricket::VideoCodec default_codec( JsepSessionDescription::kDefaultVideoCodecId, JsepSessionDescription::kDefaultVideoCodecName, @@ -667,7 +669,7 @@ bool WebRtcSession::Initialize( webrtc_session_desc_factory_->SetSdesPolicy(cricket::SEC_DISABLED); } port_allocator()->set_candidate_filter( - ConvertIceTransportTypeToCandidateFilter(ice_transport_type)); + ConvertIceTransportTypeToCandidateFilter(rtc_configuration.type)); return true; } diff --git a/talk/app/webrtc/webrtcsession.h b/talk/app/webrtc/webrtcsession.h index ce0fb0cb0f..aa1deb523c 100644 --- a/talk/app/webrtc/webrtcsession.h +++ b/talk/app/webrtc/webrtcsession.h @@ -117,11 +117,11 @@ class WebRtcSession : public cricket::BaseSession, MediaStreamSignaling* mediastream_signaling); virtual ~WebRtcSession(); - bool Initialize(const PeerConnectionFactoryInterface::Options& options, - const MediaConstraintsInterface* constraints, - DTLSIdentityServiceInterface* dtls_identity_service, - PeerConnectionInterface::IceTransportsType ice_transport_type, - PeerConnectionInterface::BundlePolicy bundle_policy); + bool Initialize( + const PeerConnectionFactoryInterface::Options& options, + const MediaConstraintsInterface* constraints, + DTLSIdentityServiceInterface* dtls_identity_service, + const PeerConnectionInterface::RTCConfiguration& rtc_configuration); // Deletes the voice, video and data channel and changes the session state // to STATE_RECEIVEDTERMINATE. void Terminate(); diff --git a/talk/app/webrtc/webrtcsession_unittest.cc b/talk/app/webrtc/webrtcsession_unittest.cc index 3efc112c6a..e4f39f822b 100644 --- a/talk/app/webrtc/webrtcsession_unittest.cc +++ b/talk/app/webrtc/webrtcsession_unittest.cc @@ -156,6 +156,8 @@ static const char kSdpWithRtx[] = "a=rtpmap:96 rtx/90000\r\n" "a=fmtp:96 apt=0\r\n"; +static const int kAudioJitterBufferMaxPackets = 50; + // Add some extra |newlines| to the |message| after |line|. static void InjectAfter(const std::string& line, const std::string& newlines, @@ -383,8 +385,7 @@ class WebRtcSessionTest : public testing::Test { void Init( DTLSIdentityServiceInterface* identity_service, - PeerConnectionInterface::IceTransportsType ice_transport_type, - PeerConnectionInterface::BundlePolicy bundle_policy) { + const PeerConnectionInterface::RTCConfiguration& rtc_configuration) { ASSERT_TRUE(session_.get() == NULL); session_.reset(new WebRtcSessionForTest( channel_manager_.get(), rtc::Thread::Current(), @@ -398,33 +399,51 @@ class WebRtcSessionTest : public testing::Test { observer_.ice_gathering_state_); EXPECT_TRUE(session_->Initialize(options_, constraints_.get(), - identity_service, ice_transport_type, - bundle_policy)); + identity_service, rtc_configuration)); session_->set_metrics_observer(&metrics_observer_); } void Init() { - Init(NULL, PeerConnectionInterface::kAll, - PeerConnectionInterface::kBundlePolicyBalanced); + PeerConnectionInterface::RTCConfiguration configuration; + configuration.type = PeerConnectionInterface::kAll; + configuration.bundle_policy = + PeerConnectionInterface::kBundlePolicyBalanced; + configuration.audio_jitter_buffer_max_packets = + kAudioJitterBufferMaxPackets; + Init(NULL, configuration); } void InitWithIceTransport( PeerConnectionInterface::IceTransportsType ice_transport_type) { - Init(NULL, ice_transport_type, - PeerConnectionInterface::kBundlePolicyBalanced); + PeerConnectionInterface::RTCConfiguration configuration; + configuration.type = ice_transport_type; + configuration.bundle_policy = + PeerConnectionInterface::kBundlePolicyBalanced; + configuration.audio_jitter_buffer_max_packets = + kAudioJitterBufferMaxPackets; + Init(NULL, configuration); } void InitWithBundlePolicy( PeerConnectionInterface::BundlePolicy bundle_policy) { - Init(NULL, PeerConnectionInterface::kAll, bundle_policy); + PeerConnectionInterface::RTCConfiguration configuration; + configuration.type = PeerConnectionInterface::kAll; + configuration.bundle_policy = bundle_policy; + configuration.audio_jitter_buffer_max_packets = + kAudioJitterBufferMaxPackets; + Init(NULL, configuration); } void InitWithDtls(bool identity_request_should_fail = false) { FakeIdentityService* identity_service = new FakeIdentityService(); identity_service->set_should_fail(identity_request_should_fail); - Init(identity_service, - PeerConnectionInterface::kAll, - PeerConnectionInterface::kBundlePolicyBalanced); + PeerConnectionInterface::RTCConfiguration configuration; + configuration.type = PeerConnectionInterface::kAll; + configuration.bundle_policy = + PeerConnectionInterface::kBundlePolicyBalanced; + configuration.audio_jitter_buffer_max_packets = + kAudioJitterBufferMaxPackets; + Init(identity_service, configuration); } void InitWithDtmfCodec() { diff --git a/talk/media/base/mediachannel.h b/talk/media/base/mediachannel.h index dd78f0e04e..d77ddbb248 100644 --- a/talk/media/base/mediachannel.h +++ b/talk/media/base/mediachannel.h @@ -150,6 +150,8 @@ struct AudioOptions { noise_suppression.SetFrom(change.noise_suppression); highpass_filter.SetFrom(change.highpass_filter); stereo_swapping.SetFrom(change.stereo_swapping); + audio_jitter_buffer_max_packets.SetFrom( + change.audio_jitter_buffer_max_packets); typing_detection.SetFrom(change.typing_detection); aecm_generate_comfort_noise.SetFrom(change.aecm_generate_comfort_noise); conference_mode.SetFrom(change.conference_mode); @@ -180,6 +182,7 @@ struct AudioOptions { noise_suppression == o.noise_suppression && highpass_filter == o.highpass_filter && stereo_swapping == o.stereo_swapping && + audio_jitter_buffer_max_packets == o.audio_jitter_buffer_max_packets && typing_detection == o.typing_detection && aecm_generate_comfort_noise == o.aecm_generate_comfort_noise && conference_mode == o.conference_mode && @@ -210,6 +213,8 @@ struct AudioOptions { ost << ToStringIfSet("ns", noise_suppression); ost << ToStringIfSet("hf", highpass_filter); ost << ToStringIfSet("swap", stereo_swapping); + ost << ToStringIfSet("audio_jitter_buffer_max_packets", + audio_jitter_buffer_max_packets); ost << ToStringIfSet("typing", typing_detection); ost << ToStringIfSet("comfort_noise", aecm_generate_comfort_noise); ost << ToStringIfSet("conference", conference_mode); @@ -248,6 +253,8 @@ struct AudioOptions { Settable highpass_filter; // Audio processing to swap the left and right channels. Settable stereo_swapping; + // Audio receiver jitter buffer (NetEq) max capacity in number of packets. + Settable audio_jitter_buffer_max_packets; // Audio processing to detect typing. Settable typing_detection; Settable aecm_generate_comfort_noise; diff --git a/talk/media/webrtc/fakewebrtcvoiceengine.h b/talk/media/webrtc/fakewebrtcvoiceengine.h index 24ef846058..5b86b538be 100644 --- a/talk/media/webrtc/fakewebrtcvoiceengine.h +++ b/talk/media/webrtc/fakewebrtcvoiceengine.h @@ -41,6 +41,7 @@ #include "webrtc/base/checks.h" #include "webrtc/base/gunit.h" #include "webrtc/base/stringutils.h" +#include "webrtc/config.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" namespace cricket { @@ -213,7 +214,8 @@ class FakeWebRtcVoiceEngine send_audio_level_ext_(-1), receive_audio_level_ext_(-1), send_absolute_sender_time_ext_(-1), - receive_absolute_sender_time_ext_(-1) { + receive_absolute_sender_time_ext_(-1), + neteq_capacity(-1) { memset(&send_codec, 0, sizeof(send_codec)); memset(&rx_agc_config, 0, sizeof(rx_agc_config)); } @@ -249,6 +251,7 @@ class FakeWebRtcVoiceEngine webrtc::CodecInst send_codec; webrtc::PacketTime last_rtp_packet_time; std::list packets; + int neteq_capacity; }; FakeWebRtcVoiceEngine(const cricket::AudioCodec* const* codecs, @@ -391,7 +394,7 @@ class FakeWebRtcVoiceEngine true); } } - int AddChannel() { + int AddChannel(const webrtc::Config& config) { if (fail_create_channel_) { return -1; } @@ -401,6 +404,9 @@ class FakeWebRtcVoiceEngine GetCodec(i, codec); ch->recv_codecs.push_back(codec); } + if (config.Get().enabled) { + ch->neteq_capacity = config.Get().capacity; + } channels_[++last_channel_] = ch; return last_channel_; } @@ -447,10 +453,11 @@ class FakeWebRtcVoiceEngine return &audio_processing_; } WEBRTC_FUNC(CreateChannel, ()) { - return AddChannel(); + webrtc::Config empty_config; + return AddChannel(empty_config); } - WEBRTC_FUNC(CreateChannel, (const webrtc::Config& /*config*/)) { - return AddChannel(); + WEBRTC_FUNC(CreateChannel, (const webrtc::Config& config)) { + return AddChannel(config); } WEBRTC_FUNC(DeleteChannel, (int channel)) { WEBRTC_CHECK_CHANNEL(channel); @@ -1243,6 +1250,11 @@ class FakeWebRtcVoiceEngine WEBRTC_STUB(GetAudioFrame, (int channel, int desired_sample_rate_hz, webrtc::AudioFrame* frame)); WEBRTC_STUB(SetExternalMixing, (int channel, bool enable)); + int GetNetEqCapacity() const { + auto ch = channels_.find(last_channel_); + ASSERT(ch != channels_.end()); + return ch->second->neteq_capacity; + } private: int GetNumDevices(int& num) { diff --git a/talk/media/webrtc/webrtcvoiceengine.cc b/talk/media/webrtc/webrtcvoiceengine.cc index baae3de3ef..fcf8ef754f 100644 --- a/talk/media/webrtc/webrtcvoiceengine.cc +++ b/talk/media/webrtc/webrtcvoiceengine.cc @@ -353,6 +353,7 @@ static AudioOptions GetDefaultEngineOptions() { options.noise_suppression.Set(true); options.highpass_filter.Set(true); options.stereo_swapping.Set(false); + options.audio_jitter_buffer_max_packets.Set(50); options.typing_detection.Set(true); options.conference_mode.Set(false); options.adjust_agc_delta.Set(0); @@ -955,6 +956,14 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { } } + int audio_jitter_buffer_max_packets; + if (options.audio_jitter_buffer_max_packets.Get( + &audio_jitter_buffer_max_packets)) { + LOG(LS_INFO) << "NetEq capacity is " << audio_jitter_buffer_max_packets; + voe_config_.Set( + new webrtc::NetEqCapacityConfig(audio_jitter_buffer_max_packets)); + } + bool typing_detection; if (options.typing_detection.Get(&typing_detection)) { LOG(LS_INFO) << "Typing detection is enabled? " << typing_detection; diff --git a/talk/media/webrtc/webrtcvoiceengine.h b/talk/media/webrtc/webrtcvoiceengine.h index 4596c4db8f..242467dd99 100644 --- a/talk/media/webrtc/webrtcvoiceengine.h +++ b/talk/media/webrtc/webrtcvoiceengine.h @@ -46,6 +46,7 @@ #include "webrtc/base/thread_checker.h" #include "webrtc/call.h" #include "webrtc/common.h" +#include "webrtc/config.h" #if !defined(LIBPEERCONNECTION_LIB) && \ !defined(LIBPEERCONNECTION_IMPLEMENTATION) diff --git a/talk/media/webrtc/webrtcvoiceengine_unittest.cc b/talk/media/webrtc/webrtcvoiceengine_unittest.cc index 7737f3413a..f995093648 100644 --- a/talk/media/webrtc/webrtcvoiceengine_unittest.cc +++ b/talk/media/webrtc/webrtcvoiceengine_unittest.cc @@ -2882,6 +2882,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetAudioOptions) { EXPECT_TRUE(typing_detection_enabled); EXPECT_EQ(ec_mode, webrtc::kEcConference); EXPECT_EQ(ns_mode, webrtc::kNsHighSuppression); + EXPECT_EQ(50, voe_.GetNetEqCapacity()); // From GetDefaultEngineOptions(). // Turn echo cancellation off options.echo_cancellation.Set(false); diff --git a/webrtc/config.h b/webrtc/config.h index 4e2faa30e2..09633ed492 100644 --- a/webrtc/config.h +++ b/webrtc/config.h @@ -113,6 +113,20 @@ struct VideoEncoderConfig { int min_transmit_bitrate_bps; }; +// Controls the capacity of the packet buffer in NetEq. The capacity is the +// maximum number of packets that the buffer can contain. If the limit is +// exceeded, the buffer will be flushed. The capacity does not affect the actual +// audio delay in the general case, since this is governed by the target buffer +// level (calculated from the jitter profile). It is only in the rare case of +// severe network freezes that a higher capacity will lead to a (transient) +// increase in audio delay. +struct NetEqCapacityConfig { + NetEqCapacityConfig() : enabled(false), capacity(0) {} + explicit NetEqCapacityConfig(int value) : enabled(true), capacity(value) {} + bool enabled; + int capacity; +}; + } // namespace webrtc #endif // WEBRTC_CONFIG_H_ diff --git a/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc index fa6349139c..51b9a78896 100644 --- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc +++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module.cc @@ -10,6 +10,7 @@ #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" +#include "webrtc/base/checks.h" #include "webrtc/common_types.h" #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h" #include "webrtc/modules/audio_coding/main/acm2/audio_coding_module_impl.h" @@ -20,13 +21,20 @@ namespace webrtc { // Create module AudioCodingModule* AudioCodingModule::Create(int id) { - return Create(id, Clock::GetRealTimeClock()); + Config config; + config.id = id; + config.clock = Clock::GetRealTimeClock(); + return Create(config); } AudioCodingModule* AudioCodingModule::Create(int id, Clock* clock) { - AudioCodingModule::Config config; + Config config; config.id = id; config.clock = clock; + return Create(config); +} + +AudioCodingModule* AudioCodingModule::Create(const Config& config) { return new acm2::AudioCodingModuleImpl(config); } diff --git a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h index 4d4a6f325b..4660589162 100644 --- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h +++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h @@ -99,6 +99,7 @@ class AudioCodingModule { // static AudioCodingModule* Create(int id); static AudioCodingModule* Create(int id, Clock* clock); + static AudioCodingModule* Create(const Config& config); virtual ~AudioCodingModule() {}; /////////////////////////////////////////////////////////////////////////// diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index 40a974bd37..c477538aae 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -10,9 +10,11 @@ #include "webrtc/voice_engine/channel.h" +#include "webrtc/base/checks.h" #include "webrtc/base/format_macros.h" #include "webrtc/base/timeutils.h" #include "webrtc/common.h" +#include "webrtc/config.h" #include "webrtc/modules/audio_device/include/audio_device.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/interface/module_common_types.h" @@ -757,8 +759,6 @@ Channel::Channel(int32_t channelId, VoEModuleId(instanceId, channelId), Clock::GetRealTimeClock(), this, this, this, rtp_payload_registry_.get())), telephone_event_handler_(rtp_receiver_->GetTelephoneEventHandler()), - audio_coding_(AudioCodingModule::Create( - VoEModuleId(instanceId, channelId))), _rtpDumpIn(*RtpDump::CreateRtpDump()), _rtpDumpOut(*RtpDump::CreateRtpDump()), _outputAudioLevel(), @@ -828,6 +828,15 @@ Channel::Channel(int32_t channelId, { WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId,_channelId), "Channel::Channel() - ctor"); + AudioCodingModule::Config acm_config; + acm_config.id = VoEModuleId(instanceId, channelId); + if (config.Get().enabled) { + acm_config.neteq_config.max_packets_in_buffer = + config.Get().capacity; + CHECK_GT(acm_config.neteq_config.max_packets_in_buffer, 0); + } + audio_coding_.reset(AudioCodingModule::Create(acm_config)); + _inbandDtmfQueue.ResetDtmf(); _inbandDtmfGenerator.Init(); _outputAudioLevel.Clear();