From 2779bab02a3c9e576cdb9927ef86d3fc40772bdc Mon Sep 17 00:00:00 2001 From: solenberg Date: Thu, 17 Nov 2016 04:45:19 -0800 Subject: [PATCH] Support receiving DTMF for multiple RTP clock rates. BUG=webrtc:2795 Review-Url: https://codereview.webrtc.org/2337473002 Cr-Commit-Position: refs/heads/master@{#15128} --- webrtc/media/engine/payload_type_mapper.cc | 10 +- .../engine/payload_type_mapper_unittest.cc | 11 ++ webrtc/media/engine/webrtcvoiceengine.cc | 53 ++++++--- .../engine/webrtcvoiceengine_unittest.cc | 110 +++++++++++------- .../audio_coding/acm2/acm_codec_database.cc | 10 ++ .../audio_coding/acm2/acm_receive_test.cc | 8 +- .../modules/audio_coding/acm2/acm_receiver.cc | 1 - .../modules/audio_coding/acm2/rent_a_codec.cc | 9 ++ .../modules/audio_coding/acm2/rent_a_codec.h | 6 + .../audio_coding/neteq/audio_decoder_impl.cc | 3 + .../neteq/audio_decoder_unittest.cc | 3 + .../audio_coding/neteq/decoder_database.h | 4 + .../modules/audio_coding/neteq/neteq_impl.cc | 2 +- .../audio_coding/neteq/neteq_impl_unittest.cc | 85 +++++++++----- .../audio_coding/neteq/tools/neteq_rtpplay.cc | 41 +++++-- webrtc/test/fuzzers/neteq_rtp_fuzzer.cc | 3 + 16 files changed, 266 insertions(+), 93 deletions(-) diff --git a/webrtc/media/engine/payload_type_mapper.cc b/webrtc/media/engine/payload_type_mapper.cc index a0d4e0cde7..477c7ab093 100644 --- a/webrtc/media/engine/payload_type_mapper.cc +++ b/webrtc/media/engine/payload_type_mapper.cc @@ -53,7 +53,8 @@ PayloadTypeMapper::PayloadTypeMapper() {{"G729", 8000, 1}, 18}, // Payload type assignments currently used by WebRTC. - // Includes video, to reduce collisions (and thus reassignments) + // Includes video and data to reduce collisions (and thus + // reassignments). // RTX codecs mapping to specific video payload types {{kRtxCodecName, 90000, 0, {{kCodecParamAssociatedPayloadType, @@ -74,17 +75,24 @@ PayloadTypeMapper::PayloadTypeMapper() // Other codecs {{kVp8CodecName, 90000, 0}, kDefaultVp8PlType}, {{kVp9CodecName, 90000, 0}, kDefaultVp9PlType}, + {{kGoogleRtpDataCodecName, 0, 0}, kGoogleRtpDataCodecPlType}, {{kIlbcCodecName, 8000, 1}, 102}, {{kIsacCodecName, 16000, 1}, 103}, {{kIsacCodecName, 32000, 1}, 104}, {{kCnCodecName, 16000, 1}, 105}, {{kCnCodecName, 32000, 1}, 106}, {{kH264CodecName, 90000, 0}, kDefaultH264PlType}, + {{kGoogleSctpDataCodecName, 0, 0}, kGoogleSctpDataCodecPlType}, {{kOpusCodecName, 48000, 2, {{"minptime", "10"}, {"useinbandfec", "1"}}}, 111}, {{kRedCodecName, 90000, 0}, kDefaultRedPlType}, {{kUlpfecCodecName, 90000, 0}, kDefaultUlpfecType}, {{kFlexfecCodecName, 90000, 0}, kDefaultFlexfecPlType}, + // TODO(solenberg): Remove the hard coded 16k,32k,48k DTMF once we + // assign payload types dynamically for send side as well. + {{kDtmfCodecName, 48000, 1}, 110}, + {{kDtmfCodecName, 32000, 1}, 112}, + {{kDtmfCodecName, 16000, 1}, 113}, {{kDtmfCodecName, 8000, 1}, 126}}) { // TODO(ossu): Try to keep this as change-proof as possible until we're able // to remove the payload type constants from everywhere in the code. diff --git a/webrtc/media/engine/payload_type_mapper_unittest.cc b/webrtc/media/engine/payload_type_mapper_unittest.cc index 5d6976e666..6ff8c0f658 100644 --- a/webrtc/media/engine/payload_type_mapper_unittest.cc +++ b/webrtc/media/engine/payload_type_mapper_unittest.cc @@ -82,6 +82,12 @@ TEST_F(PayloadTypeMapperTest, WebRTCPayloadTypes) { rtx_mapping(kDefaultH264PlType)); EXPECT_EQ(kDefaultRtxRedPlType, rtx_mapping(kDefaultRedPlType)); + auto data_mapping = [this] (const char *name) { + return FindMapping({name, 0, 0}); + }; + EXPECT_EQ(kGoogleRtpDataCodecPlType, data_mapping(kGoogleRtpDataCodecName)); + EXPECT_EQ(kGoogleSctpDataCodecPlType, data_mapping(kGoogleSctpDataCodecName)); + EXPECT_EQ(102, FindMapping({kIlbcCodecName, 8000, 1})); EXPECT_EQ(103, FindMapping({kIsacCodecName, 16000, 1})); EXPECT_EQ(104, FindMapping({kIsacCodecName, 32000, 1})); @@ -89,6 +95,11 @@ TEST_F(PayloadTypeMapperTest, WebRTCPayloadTypes) { EXPECT_EQ(106, FindMapping({kCnCodecName, 32000, 1})); EXPECT_EQ(111, FindMapping({kOpusCodecName, 48000, 2, {{"minptime", "10"}, {"useinbandfec", "1"}}})); + // TODO(solenberg): Remove 16k, 32k, 48k DTMF checks once these payload types + // are dynamically assigned. + EXPECT_EQ(110, FindMapping({kDtmfCodecName, 48000, 1})); + EXPECT_EQ(112, FindMapping({kDtmfCodecName, 32000, 1})); + EXPECT_EQ(113, FindMapping({kDtmfCodecName, 16000, 1})); EXPECT_EQ(126, FindMapping({kDtmfCodecName, 8000, 1})); } diff --git a/webrtc/media/engine/webrtcvoiceengine.cc b/webrtc/media/engine/webrtcvoiceengine.cc index 9f544c1826..53c8e6fd0d 100644 --- a/webrtc/media/engine/webrtcvoiceengine.cc +++ b/webrtc/media/engine/webrtcvoiceengine.cc @@ -424,7 +424,7 @@ class WebRtcVoiceCodecs final { // Select the preferred send codec (the first non-telephone-event/CN codec). for (const AudioCodec& codec : codecs) { if (IsCodec(codec, kDtmfCodecName) || IsCodec(codec, kCnCodecName)) { - // Skip telephone-event/CN codec, which will be handled later. + // Skip telephone-event/CN codecs - they will be handled later. continue; } @@ -453,7 +453,7 @@ class WebRtcVoiceCodecs final { int max_bitrate_bps; }; // Note: keep the supported packet sizes in ascending order. - static const CodecPref kCodecPrefs[11]; + static const CodecPref kCodecPrefs[14]; static int SelectPacketSize(const CodecPref& codec_pref, int ptime_ms) { int selected_packet_size_ms = codec_pref.packet_sizes_ms[0]; @@ -478,7 +478,7 @@ class WebRtcVoiceCodecs final { } }; -const WebRtcVoiceCodecs::CodecPref WebRtcVoiceCodecs::kCodecPrefs[11] = { +const WebRtcVoiceCodecs::CodecPref WebRtcVoiceCodecs::kCodecPrefs[14] = { {kOpusCodecName, 48000, 2, 111, true, {10, 20, 40, 60}, kOpusMaxBitrateBps}, {kIsacCodecName, 16000, 1, 103, true, {30, 60}, kIsacMaxBitrateBps}, {kIsacCodecName, 32000, 1, 104, true, {30}, kIsacMaxBitrateBps}, @@ -490,7 +490,11 @@ const WebRtcVoiceCodecs::CodecPref WebRtcVoiceCodecs::kCodecPrefs[11] = { {kCnCodecName, 32000, 1, 106, false, {}}, {kCnCodecName, 16000, 1, 105, false, {}}, {kCnCodecName, 8000, 1, 13, false, {}}, - {kDtmfCodecName, 8000, 1, 126, false, {}}}; + {kDtmfCodecName, 48000, 1, 110, false, {}}, + {kDtmfCodecName, 32000, 1, 112, false, {}}, + {kDtmfCodecName, 16000, 1, 113, false, {}}, + {kDtmfCodecName, 8000, 1, 126, false, {}} +}; rtc::Optional ComputeSendBitrate(int max_send_bitrate_bps, int rtp_max_bitrate_bps, @@ -1124,10 +1128,15 @@ AudioCodecs WebRtcVoiceEngine::CollectRecvCodecs() const { const std::vector& specs = decoder_factory_->GetSupportedDecoders(); - // Only generate CN payload types for these clockrates + // Only generate CN payload types for these clockrates: std::map> generate_cn = {{ 8000, false }, { 16000, false }, { 32000, false }}; + // Only generate telephone-event payload types for these clockrates: + std::map> generate_dtmf = {{ 8000, false }, + { 16000, false }, + { 32000, false }, + { 48000, false }}; auto map_format = [&mapper, &out] (const webrtc::SdpAudioFormat& format) { rtc::Optional opt_codec = mapper.ToAudioCodec(format); @@ -1148,25 +1157,37 @@ AudioCodecs WebRtcVoiceEngine::CollectRecvCodecs() const { }; for (const auto& spec : specs) { - if (map_format(spec.format) && spec.allow_comfort_noise) { - // Generate a CN entry if the decoder allows it and we support the - // clockrate. - auto cn = generate_cn.find(spec.format.clockrate_hz); - if (cn != generate_cn.end()) { - cn->second = true; + if (map_format(spec.format)) { + if (spec.allow_comfort_noise) { + // Generate a CN entry if the decoder allows it and we support the + // clockrate. + auto cn = generate_cn.find(spec.format.clockrate_hz); + if (cn != generate_cn.end()) { + cn->second = true; + } + } + + // Generate a telephone-event entry if we support the clockrate. + auto dtmf = generate_dtmf.find(spec.format.clockrate_hz); + if (dtmf != generate_dtmf.end()) { + dtmf->second = true; } } } - // Add CN codecs after "proper" audio codecs + // Add CN codecs after "proper" audio codecs. for (const auto& cn : generate_cn) { if (cn.second) { map_format({kCnCodecName, cn.first, 1}); } } - // Add telephone-event codec last - map_format({kDtmfCodecName, 8000, 1}); + // Add telephone-event codecs last. + for (const auto& dtmf : generate_dtmf) { + if (dtmf.second) { + map_format({kDtmfCodecName, dtmf.first, 1}); + } + } return out; } @@ -1794,6 +1815,10 @@ bool WebRtcVoiceMediaChannel::SetRecvCodecs( // already be receiving packets with that payload type. for (const AudioCodec& codec : codecs) { AudioCodec old_codec; + // TODO(solenberg): This isn't strictly correct. It should be possible to + // add an additional payload type for a codec. That would result in a new + // decoder object being allocated. What shouldn't work is to remove a PT + // mapping that was previously configured. if (FindCodec(recv_codecs_, codec, &old_codec)) { if (old_codec.id != codec.id) { LOG(LS_ERROR) << codec.name << " payload type changed."; diff --git a/webrtc/media/engine/webrtcvoiceengine_unittest.cc b/webrtc/media/engine/webrtcvoiceengine_unittest.cc index 6f0ff6334b..1b7b5697aa 100644 --- a/webrtc/media/engine/webrtcvoiceengine_unittest.cc +++ b/webrtc/media/engine/webrtcvoiceengine_unittest.cc @@ -42,11 +42,11 @@ const cricket::AudioCodec kG722CodecVoE(9, "G722", 16000, 64000, 1); const cricket::AudioCodec kG722CodecSdp(9, "G722", 8000, 64000, 1); const cricket::AudioCodec kCn8000Codec(13, "CN", 8000, 0, 1); const cricket::AudioCodec kCn16000Codec(105, "CN", 16000, 0, 1); -const cricket::AudioCodec kTelephoneEventCodec(106, - "telephone-event", - 8000, - 0, - 1); +const cricket::AudioCodec + kTelephoneEventCodec1(106, "telephone-event", 8000, 0, 1); +const cricket::AudioCodec + kTelephoneEventCodec2(107, "telephone-event", 32000, 0, 1); + const uint32_t kSsrc1 = 0x99; const uint32_t kSsrc2 = 2; const uint32_t kSsrc3 = 3; @@ -235,7 +235,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test { SetSend(true); EXPECT_FALSE(channel_->CanInsertDtmf()); EXPECT_FALSE(channel_->InsertDtmf(ssrc, 1, 111)); - send_parameters_.codecs.push_back(kTelephoneEventCodec); + send_parameters_.codecs.push_back(kTelephoneEventCodec1); SetSendParameters(send_parameters_); EXPECT_TRUE(channel_->CanInsertDtmf()); @@ -255,7 +255,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test { EXPECT_EQ(-1, telephone_event.payload_type); EXPECT_TRUE(channel_->InsertDtmf(ssrc, 2, 123)); telephone_event = GetSendStream(kSsrc1).GetLatestTelephoneEvent(); - EXPECT_EQ(kTelephoneEventCodec.id, telephone_event.payload_type); + EXPECT_EQ(kTelephoneEventCodec1.id, telephone_event.payload_type); EXPECT_EQ(2, telephone_event.event_code); EXPECT_EQ(123, telephone_event.duration_ms); } @@ -634,7 +634,10 @@ TEST_F(WebRtcVoiceEngineTestFake, FindCodec) { // Find ISAC with explicit clockrate and 0 bitrate. EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst(kIsacCodec, &codec_inst)); // Find telephone-event with explicit clockrate and 0 bitrate. - EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst(kTelephoneEventCodec, + EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst(kTelephoneEventCodec1, + &codec_inst)); + // Find telephone-event with explicit clockrate and 0 bitrate. + EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst(kTelephoneEventCodec2, &codec_inst)); // Find ISAC with a different payload id. codec = kIsacCodec; @@ -667,12 +670,14 @@ TEST_F(WebRtcVoiceEngineTestFake, SetRecvCodecs) { cricket::AudioRecvParameters parameters; parameters.codecs.push_back(kIsacCodec); parameters.codecs.push_back(kPcmuCodec); - parameters.codecs.push_back(kTelephoneEventCodec); - parameters.codecs[0].id = 106; // collide with existing telephone-event + parameters.codecs.push_back(kTelephoneEventCodec1); + parameters.codecs.push_back(kTelephoneEventCodec2); + parameters.codecs[0].id = 106; // collide with existing CN 32k parameters.codecs[2].id = 126; EXPECT_TRUE(channel_->SetRecvParameters(parameters)); EXPECT_TRUE(AddRecvStream(kSsrc1)); int channel_num = voe_.GetLastChannel(); + webrtc::CodecInst gcodec; rtc::strcpyn(gcodec.plname, arraysize(gcodec.plname), "ISAC"); gcodec.plfreq = 16000; @@ -680,11 +685,17 @@ TEST_F(WebRtcVoiceEngineTestFake, SetRecvCodecs) { EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num, gcodec)); EXPECT_EQ(106, gcodec.pltype); EXPECT_STREQ("ISAC", gcodec.plname); + rtc::strcpyn(gcodec.plname, arraysize(gcodec.plname), "telephone-event"); gcodec.plfreq = 8000; EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num, gcodec)); EXPECT_EQ(126, gcodec.pltype); EXPECT_STREQ("telephone-event", gcodec.plname); + + gcodec.plfreq = 32000; + EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num, gcodec)); + EXPECT_EQ(107, gcodec.pltype); + EXPECT_STREQ("telephone-event", gcodec.plname); } // Test that we fail to set an unknown inbound codec. @@ -776,12 +787,14 @@ TEST_F(WebRtcVoiceEngineTestFake, SetRecvCodecsWithMultipleStreams) { cricket::AudioRecvParameters parameters; parameters.codecs.push_back(kIsacCodec); parameters.codecs.push_back(kPcmuCodec); - parameters.codecs.push_back(kTelephoneEventCodec); - parameters.codecs[0].id = 106; // collide with existing telephone-event + parameters.codecs.push_back(kTelephoneEventCodec1); + parameters.codecs.push_back(kTelephoneEventCodec2); + parameters.codecs[0].id = 106; // collide with existing CN 32k parameters.codecs[2].id = 126; EXPECT_TRUE(channel_->SetRecvParameters(parameters)); EXPECT_TRUE(AddRecvStream(kSsrc1)); int channel_num2 = voe_.GetLastChannel(); + webrtc::CodecInst gcodec; rtc::strcpyn(gcodec.plname, arraysize(gcodec.plname), "ISAC"); gcodec.plfreq = 16000; @@ -789,19 +802,25 @@ TEST_F(WebRtcVoiceEngineTestFake, SetRecvCodecsWithMultipleStreams) { EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num2, gcodec)); EXPECT_EQ(106, gcodec.pltype); EXPECT_STREQ("ISAC", gcodec.plname); + rtc::strcpyn(gcodec.plname, arraysize(gcodec.plname), "telephone-event"); gcodec.plfreq = 8000; gcodec.channels = 1; EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num2, gcodec)); EXPECT_EQ(126, gcodec.pltype); EXPECT_STREQ("telephone-event", gcodec.plname); + + gcodec.plfreq = 32000; + EXPECT_EQ(0, voe_.GetRecPayloadType(channel_num2, gcodec)); + EXPECT_EQ(107, gcodec.pltype); + EXPECT_STREQ("telephone-event", gcodec.plname); } TEST_F(WebRtcVoiceEngineTestFake, SetRecvCodecsAfterAddingStreams) { EXPECT_TRUE(SetupRecvStream()); cricket::AudioRecvParameters parameters; parameters.codecs.push_back(kIsacCodec); - parameters.codecs[0].id = 106; // collide with existing telephone-event + parameters.codecs[0].id = 106; // collide with existing CN 32k EXPECT_TRUE(channel_->SetRecvParameters(parameters)); int channel_num2 = voe_.GetLastChannel(); @@ -1849,7 +1868,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsNoCodecs) { TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsDTMFOnTop) { EXPECT_TRUE(SetupSendStream()); cricket::AudioSendParameters parameters; - parameters.codecs.push_back(kTelephoneEventCodec); + parameters.codecs.push_back(kTelephoneEventCodec1); parameters.codecs.push_back(kIsacCodec); parameters.codecs.push_back(kPcmuCodec); parameters.codecs[0].id = 98; // DTMF @@ -1865,7 +1884,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsDTMFOnTop) { TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsDTMFPayloadTypeOutOfRange) { EXPECT_TRUE(SetupSendStream()); cricket::AudioSendParameters parameters; - parameters.codecs.push_back(kTelephoneEventCodec); + parameters.codecs.push_back(kTelephoneEventCodec1); parameters.codecs.push_back(kIsacCodec); parameters.codecs[0].id = 0; // DTMF parameters.codecs[1].id = 96; @@ -1909,7 +1928,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsCNandDTMFAsCaller) { // TODO(juberti): cn 32000 parameters.codecs.push_back(kCn16000Codec); parameters.codecs.push_back(kCn8000Codec); - parameters.codecs.push_back(kTelephoneEventCodec); + parameters.codecs.push_back(kTelephoneEventCodec1); parameters.codecs[0].id = 96; parameters.codecs[2].id = 97; // wideband CN parameters.codecs[4].id = 98; // DTMF @@ -1933,7 +1952,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsCNandDTMFAsCallee) { // TODO(juberti): cn 32000 parameters.codecs.push_back(kCn16000Codec); parameters.codecs.push_back(kCn8000Codec); - parameters.codecs.push_back(kTelephoneEventCodec); + parameters.codecs.push_back(kTelephoneEventCodec1); parameters.codecs[0].id = 96; parameters.codecs[2].id = 97; // wideband CN parameters.codecs[4].id = 98; // DTMF @@ -2006,7 +2025,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetSendCodecsCaseInsensitive) { parameters.codecs.push_back(kPcmuCodec); parameters.codecs.push_back(kCn16000Codec); parameters.codecs.push_back(kCn8000Codec); - parameters.codecs.push_back(kTelephoneEventCodec); + parameters.codecs.push_back(kTelephoneEventCodec1); parameters.codecs[0].name = "iSaC"; parameters.codecs[0].id = 96; parameters.codecs[2].id = 97; // wideband CN @@ -3404,6 +3423,12 @@ TEST(WebRtcVoiceEngineTest, HasCorrectCodecs) { cricket::AudioCodec(96, "CN", 16000, 0, 1), nullptr)); EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst( cricket::AudioCodec(96, "telephone-event", 8000, 0, 1), nullptr)); + EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst( + cricket::AudioCodec(96, "telephone-event", 16000, 0, 1), nullptr)); + EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst( + cricket::AudioCodec(96, "telephone-event", 32000, 0, 1), nullptr)); + EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst( + cricket::AudioCodec(96, "telephone-event", 48000, 0, 1), nullptr)); // Check codecs with an id by id. EXPECT_TRUE(cricket::WebRtcVoiceEngine::ToCodecInst( cricket::AudioCodec(0, "", 8000, 0, 1), nullptr)); // PCMU @@ -3433,27 +3458,34 @@ TEST(WebRtcVoiceEngineTest, HasCorrectCodecs) { // type assignments checked here? It shouldn't really matter. cricket::WebRtcVoiceEngine engine( nullptr, webrtc::MockAudioDecoderFactory::CreateUnusedFactory()); - for (std::vector::const_iterator it = - engine.send_codecs().begin(); - it != engine.send_codecs().end(); ++it) { - if (it->name == "CN" && it->clockrate == 16000) { - EXPECT_EQ(105, it->id); - } else if (it->name == "CN" && it->clockrate == 32000) { - EXPECT_EQ(106, it->id); - } else if (it->name == "ISAC" && it->clockrate == 16000) { - EXPECT_EQ(103, it->id); - } else if (it->name == "ISAC" && it->clockrate == 32000) { - EXPECT_EQ(104, it->id); - } else if (it->name == "G722" && it->clockrate == 8000) { - EXPECT_EQ(9, it->id); - } else if (it->name == "telephone-event") { - EXPECT_EQ(126, it->id); - } else if (it->name == "opus") { - EXPECT_EQ(111, it->id); - ASSERT_TRUE(it->params.find("minptime") != it->params.end()); - EXPECT_EQ("10", it->params.find("minptime")->second); - ASSERT_TRUE(it->params.find("useinbandfec") != it->params.end()); - EXPECT_EQ("1", it->params.find("useinbandfec")->second); + for (const cricket::AudioCodec& codec : engine.send_codecs()) { + if (codec.name == "CN" && codec.clockrate == 16000) { + EXPECT_EQ(105, codec.id); + } else if (codec.name == "CN" && codec.clockrate == 32000) { + EXPECT_EQ(106, codec.id); + } else if (codec.name == "ISAC" && codec.clockrate == 16000) { + EXPECT_EQ(103, codec.id); + } else if (codec.name == "ISAC" && codec.clockrate == 32000) { + EXPECT_EQ(104, codec.id); + } else if (codec.name == "G722" && codec.clockrate == 8000) { + EXPECT_EQ(9, codec.id); + } else if (codec.name == "telephone-event" && codec.clockrate == 8000) { + EXPECT_EQ(126, codec.id); + // TODO(solenberg): 16k, 32k, 48k DTMF should be dynamically assigned. + // Remove these checks once both send and receive side assigns payload types + // dynamically. + } else if (codec.name == "telephone-event" && codec.clockrate == 16000) { + EXPECT_EQ(113, codec.id); + } else if (codec.name == "telephone-event" && codec.clockrate == 32000) { + EXPECT_EQ(112, codec.id); + } else if (codec.name == "telephone-event" && codec.clockrate == 48000) { + EXPECT_EQ(110, codec.id); + } else if (codec.name == "opus") { + EXPECT_EQ(111, codec.id); + ASSERT_TRUE(codec.params.find("minptime") != codec.params.end()); + EXPECT_EQ("10", codec.params.find("minptime")->second); + ASSERT_TRUE(codec.params.find("useinbandfec") != codec.params.end()); + EXPECT_EQ("1", codec.params.find("useinbandfec")->second); } } } diff --git a/webrtc/modules/audio_coding/acm2/acm_codec_database.cc b/webrtc/modules/audio_coding/acm2/acm_codec_database.cc index 5f3c07802b..4f5b263c24 100644 --- a/webrtc/modules/audio_coding/acm2/acm_codec_database.cc +++ b/webrtc/modules/audio_coding/acm2/acm_codec_database.cc @@ -102,6 +102,9 @@ const CodecInst ACMCodecDB::database_[] = { {100, "CN", 48000, 1440, 1, 0}, #endif {106, "telephone-event", 8000, 240, 1, 0}, + {114, "telephone-event", 16000, 240, 1, 0}, + {115, "telephone-event", 32000, 240, 1, 0}, + {116, "telephone-event", 48000, 240, 1, 0}, #ifdef WEBRTC_CODEC_RED {127, "red", 8000, 0, 1, 0}, #endif @@ -154,10 +157,14 @@ const ACMCodecDB::CodecSettings ACMCodecDB::codec_settings_[] = { {1, {240}, 240, 1}, {1, {480}, 480, 1}, {1, {960}, 960, 1}, +// TODO(solenberg): What is this flag? It is never set in the build files. #ifdef ENABLE_48000_HZ {1, {1440}, 1440, 1}, #endif {1, {240}, 240, 1}, + {1, {240}, 240, 1}, + {1, {240}, 240, 1}, + {1, {240}, 240, 1}, #ifdef WEBRTC_CODEC_RED {1, {0}, 0, 1}, #endif @@ -204,6 +211,9 @@ const NetEqDecoder ACMCodecDB::neteq_decoders_[] = { NetEqDecoder::kDecoderCNGswb48kHz, #endif NetEqDecoder::kDecoderAVT, + NetEqDecoder::kDecoderAVT16kHz, + NetEqDecoder::kDecoderAVT32kHz, + NetEqDecoder::kDecoderAVT48kHz, #ifdef WEBRTC_CODEC_RED NetEqDecoder::kDecoderRED, #endif diff --git a/webrtc/modules/audio_coding/acm2/acm_receive_test.cc b/webrtc/modules/audio_coding/acm2/acm_receive_test.cc index c2549323c7..88fe7c25e8 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receive_test.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receive_test.cc @@ -81,8 +81,14 @@ bool RemapPltypeAndUseThisCodec(const char* plname, *pltype = 103; } else if (STR_CASE_CMP(plname, "ISAC") == 0 && plfreq == 32000) { *pltype = 104; - } else if (STR_CASE_CMP(plname, "telephone-event") == 0) { + } else if (STR_CASE_CMP(plname, "telephone-event") == 0 && plfreq == 8000) { *pltype = 106; + } else if (STR_CASE_CMP(plname, "telephone-event") == 0 && plfreq == 16000) { + *pltype = 114; + } else if (STR_CASE_CMP(plname, "telephone-event") == 0 && plfreq == 32000) { + *pltype = 115; + } else if (STR_CASE_CMP(plname, "telephone-event") == 0 && plfreq == 48000) { + *pltype = 116; } else if (STR_CASE_CMP(plname, "red") == 0) { *pltype = 117; } else if (STR_CASE_CMP(plname, "L16") == 0 && plfreq == 8000) { diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc index 73518b84fc..57ae527e88 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -102,7 +102,6 @@ int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header, RTC_DCHECK(last_audio_format_); last_packet_sample_rate_hz_ = rtc::Optional(ci->plfreq); } - } // |crit_sect_| is released. if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) < diff --git a/webrtc/modules/audio_coding/acm2/rent_a_codec.cc b/webrtc/modules/audio_coding/acm2/rent_a_codec.cc index 9468f0c921..6787690544 100644 --- a/webrtc/modules/audio_coding/acm2/rent_a_codec.cc +++ b/webrtc/modules/audio_coding/acm2/rent_a_codec.cc @@ -99,6 +99,15 @@ rtc::Optional RentACodec::NetEqDecoderToSdpAudioFormat( case NetEqDecoder::kDecoderAVT: return rtc::Optional( SdpAudioFormat("telephone-event", 8000, 1)); + case NetEqDecoder::kDecoderAVT16kHz: + return rtc::Optional( + SdpAudioFormat("telephone-event", 16000, 1)); + case NetEqDecoder::kDecoderAVT32kHz: + return rtc::Optional( + SdpAudioFormat("telephone-event", 32000, 1)); + case NetEqDecoder::kDecoderAVT48kHz: + return rtc::Optional( + SdpAudioFormat("telephone-event", 48000, 1)); case NetEqDecoder::kDecoderCNGnb: return rtc::Optional(SdpAudioFormat("cn", 8000, 1)); case NetEqDecoder::kDecoderCNGwb: diff --git a/webrtc/modules/audio_coding/acm2/rent_a_codec.h b/webrtc/modules/audio_coding/acm2/rent_a_codec.h index daa56a43a6..02a5dd9b64 100644 --- a/webrtc/modules/audio_coding/acm2/rent_a_codec.h +++ b/webrtc/modules/audio_coding/acm2/rent_a_codec.h @@ -72,6 +72,9 @@ class RentACodec { kCNFB, #endif kAVT, + kAVT16kHz, + kAVT32kHz, + kAVT48kHz, #ifdef WEBRTC_CODEC_RED kRED, #endif @@ -127,6 +130,9 @@ class RentACodec { kDecoderG722_2ch, kDecoderRED, kDecoderAVT, + kDecoderAVT16kHz, + kDecoderAVT32kHz, + kDecoderAVT48kHz, kDecoderCNGnb, kDecoderCNGwb, kDecoderCNGswb32kHz, diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc index b3f307fa15..be35e5f568 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc @@ -69,6 +69,9 @@ bool CodecSupported(NetEqDecoder codec_type) { #endif case NetEqDecoder::kDecoderRED: case NetEqDecoder::kDecoderAVT: + case NetEqDecoder::kDecoderAVT16kHz: + case NetEqDecoder::kDecoderAVT32kHz: + case NetEqDecoder::kDecoderAVT48kHz: case NetEqDecoder::kDecoderCNGnb: case NetEqDecoder::kDecoderCNGwb: case NetEqDecoder::kDecoderCNGswb32kHz: diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc index 6b23a48180..3ff629ac89 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -683,6 +683,9 @@ TEST(AudioDecoder, CodecSupported) { EXPECT_EQ(has_g722, CodecSupported(NetEqDecoder::kDecoderG722_2ch)); EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderRED)); EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT)); + EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT16kHz)); + EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT32kHz)); + EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT48kHz)); EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGnb)); EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGwb)); EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGswb32kHz)); diff --git a/webrtc/modules/audio_coding/neteq/decoder_database.h b/webrtc/modules/audio_coding/neteq/decoder_database.h index 6e172600b7..83789daa91 100644 --- a/webrtc/modules/audio_coding/neteq/decoder_database.h +++ b/webrtc/modules/audio_coding/neteq/decoder_database.h @@ -62,6 +62,10 @@ class DecoderDatabase { void DropDecoder() const { decoder_.reset(); } int SampleRateHz() const { + if (IsDtmf()) { + // DTMF has a 1:1 mapping between clock rate and sample rate. + return audio_format_.clockrate_hz; + } const AudioDecoder* decoder = GetDecoder(); RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_); return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz; diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index 8019e192dc..0fcd842817 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -480,7 +480,7 @@ rtc::Optional NetEqImpl::GetDecoder(int payload_type) const { ci.pltype = payload_type; std::strncpy(ci.plname, di->get_name().c_str(), sizeof(ci.plname)); ci.plname[sizeof(ci.plname) - 1] = '\0'; - ci.plfreq = di->IsRed() || di->IsDtmf() ? 8000 : di->SampleRateHz(); + ci.plfreq = di->IsRed() ? 8000 : di->SampleRateHz(); AudioDecoder* const decoder = di->GetDecoder(); ci.channels = decoder ? decoder->Channels() : 1; return rtc::Optional(ci); diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc index 5178938947..71893e5017 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -173,6 +173,52 @@ class NetEqImplTest : public ::testing::Test { } } + void TestDtmfPacket(NetEqDecoder decoder_type) { + const size_t kPayloadLength = 4; + const uint8_t kPayloadType = 110; + const uint32_t kReceiveTime = 17; + const int kSampleRateHz = 16000; + config_.sample_rate_hz = kSampleRateHz; + UseNoMocks(); + CreateInstance(); + // Event: 2, E bit, Volume: 17, Length: 4336. + uint8_t payload[kPayloadLength] = { 0x02, 0x80 + 0x11, 0x10, 0xF0 }; + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = 0x1234; + rtp_header.header.timestamp = 0x12345678; + rtp_header.header.ssrc = 0x87654321; + + EXPECT_EQ(NetEq::kOK, neteq_->RegisterPayloadType( + decoder_type, "telephone-event", kPayloadType)); + + // Insert first packet. + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); + + // Pull audio once. + const size_t kMaxOutputSize = + static_cast(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Verify first 64 samples of actual output. + const std::vector kOutput({ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1578, -2816, -3460, -3403, -2709, -1594, + -363, 671, 1269, 1328, 908, 202, -513, -964, -955, -431, 504, 1617, + 2602, 3164, 3101, 2364, 1073, -511, -2047, -3198, -3721, -3525, -2688, + -1440, -99, 1015, 1663, 1744, 1319, 588, -171, -680, -747, -315, 515, + 1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, -3864, -3516, + -2534, -1163 }); + ASSERT_GE(kMaxOutputSize, kOutput.size()); + EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data_)); + } + std::unique_ptr neteq_; NetEq::Config config_; TickTimer* tick_timer_ = nullptr; @@ -385,37 +431,20 @@ TEST_F(NetEqImplTest, InsertPacketsUntilBufferIsFull) { EXPECT_EQ(rtp_header.header.sequenceNumber, test_packet->sequence_number); } -TEST_F(NetEqImplTest, TestDtmfPacket) { - UseNoMocks(); - CreateInstance(); - const size_t kPayloadLength = 4; - const uint8_t kPayloadType = 110; - const uint32_t kReceiveTime = 17; - const int kSampleRateHz = 8000; - // Event: 2, E bit, Volume: 63, Length: 4176. - uint8_t payload[kPayloadLength] = { 0x02, 0x80 + 0x3F, 0x10, 0xF0 }; - WebRtcRTPHeader rtp_header; - rtp_header.header.payloadType = kPayloadType; - rtp_header.header.sequenceNumber = 0x1234; - rtp_header.header.timestamp = 0x12345678; - rtp_header.header.ssrc = 0x87654321; +TEST_F(NetEqImplTest, TestDtmfPacketAVT) { + TestDtmfPacket(NetEqDecoder::kDecoderAVT); +} - EXPECT_EQ(NetEq::kOK, neteq_->RegisterPayloadType( - NetEqDecoder::kDecoderAVT, "telephone-event", kPayloadType)); +TEST_F(NetEqImplTest, TestDtmfPacketAVT16kHz) { + TestDtmfPacket(NetEqDecoder::kDecoderAVT16kHz); +} - // Insert one packet. - EXPECT_EQ(NetEq::kOK, - neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); +TEST_F(NetEqImplTest, TestDtmfPacketAVT32kHz) { + TestDtmfPacket(NetEqDecoder::kDecoderAVT32kHz); +} - // Pull audio once. - const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); - AudioFrame output; - bool muted; - EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); - ASSERT_FALSE(muted); - ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); - EXPECT_EQ(1u, output.num_channels_); - EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); +TEST_F(NetEqImplTest, TestDtmfPacketAVT48kHz) { + TestDtmfPacket(NetEqDecoder::kDecoderAVT48kHz); } // This test verifies that timestamps propagate from the incoming packets diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc index f22459332a..47edc334f5 100644 --- a/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc +++ b/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc @@ -116,9 +116,18 @@ const bool pcm16b_swb48_dummy = DEFINE_int32(g722, 9, "RTP payload type for G.722"); const bool g722_dummy = google::RegisterFlagValidator(&FLAGS_g722, &ValidatePayloadType); -DEFINE_int32(avt, 106, "RTP payload type for AVT/DTMF"); +DEFINE_int32(avt, 106, "RTP payload type for AVT/DTMF (8 kHz)"); const bool avt_dummy = google::RegisterFlagValidator(&FLAGS_avt, &ValidatePayloadType); +DEFINE_int32(avt_16, 114, "RTP payload type for AVT/DTMF (16 kHz)"); +const bool avt_16_dummy = + google::RegisterFlagValidator(&FLAGS_avt_16, &ValidatePayloadType); +DEFINE_int32(avt_32, 115, "RTP payload type for AVT/DTMF (32 kHz)"); +const bool avt_32_dummy = + google::RegisterFlagValidator(&FLAGS_avt_32, &ValidatePayloadType); +DEFINE_int32(avt_48, 116, "RTP payload type for AVT/DTMF (48 kHz)"); +const bool avt_48_dummy = + google::RegisterFlagValidator(&FLAGS_avt_48, &ValidatePayloadType); DEFINE_int32(red, 117, "RTP payload type for redundant audio (RED)"); const bool red_dummy = google::RegisterFlagValidator(&FLAGS_red, &ValidatePayloadType); @@ -179,7 +188,13 @@ std::string CodecName(NetEqDecoder codec) { case NetEqDecoder::kDecoderRED: return "redundant audio (RED)"; case NetEqDecoder::kDecoderAVT: - return "AVT/DTMF"; + return "AVT/DTMF (8 kHz)"; + case NetEqDecoder::kDecoderAVT16kHz: + return "AVT/DTMF (16 kHz)"; + case NetEqDecoder::kDecoderAVT32kHz: + return "AVT/DTMF (32 kHz)"; + case NetEqDecoder::kDecoderAVT48kHz: + return "AVT/DTMF (48 kHz)"; case NetEqDecoder::kDecoderCNGnb: return "comfort noise (8 kHz)"; case NetEqDecoder::kDecoderCNGwb: @@ -213,6 +228,9 @@ void PrintCodecMapping() { FLAGS_pcm16b_swb48); PrintCodecMappingEntry(NetEqDecoder::kDecoderG722, FLAGS_g722); PrintCodecMappingEntry(NetEqDecoder::kDecoderAVT, FLAGS_avt); + PrintCodecMappingEntry(NetEqDecoder::kDecoderAVT16kHz, FLAGS_avt_16); + PrintCodecMappingEntry(NetEqDecoder::kDecoderAVT32kHz, FLAGS_avt_32); + PrintCodecMappingEntry(NetEqDecoder::kDecoderAVT48kHz, FLAGS_avt_48); PrintCodecMappingEntry(NetEqDecoder::kDecoderRED, FLAGS_red); PrintCodecMappingEntry(NetEqDecoder::kDecoderCNGnb, FLAGS_cn_nb); PrintCodecMappingEntry(NetEqDecoder::kDecoderCNGwb, FLAGS_cn_wb); @@ -223,18 +241,19 @@ void PrintCodecMapping() { int CodecSampleRate(uint8_t payload_type) { if (payload_type == FLAGS_pcmu || payload_type == FLAGS_pcma || payload_type == FLAGS_ilbc || payload_type == FLAGS_pcm16b || - payload_type == FLAGS_cn_nb) + payload_type == FLAGS_cn_nb || payload_type == FLAGS_avt) return 8000; if (payload_type == FLAGS_isac || payload_type == FLAGS_pcm16b_wb || - payload_type == FLAGS_g722 || payload_type == FLAGS_cn_wb) + payload_type == FLAGS_g722 || payload_type == FLAGS_cn_wb || + payload_type == FLAGS_avt_16) return 16000; if (payload_type == FLAGS_isac_swb || payload_type == FLAGS_pcm16b_swb32 || - payload_type == FLAGS_cn_swb32) + payload_type == FLAGS_cn_swb32 || payload_type == FLAGS_avt_32) return 32000; if (payload_type == FLAGS_opus || payload_type == FLAGS_pcm16b_swb48 || - payload_type == FLAGS_cn_swb48) + payload_type == FLAGS_cn_swb48 || payload_type == FLAGS_avt_48) return 48000; - if (payload_type == FLAGS_avt || payload_type == FLAGS_red) + if (payload_type == FLAGS_red) return 0; return -1; } @@ -376,6 +395,11 @@ int RunTest(int argc, char* argv[]) { std::make_pair(NetEqDecoder::kDecoderPCM16Bswb48kHz, "pcm16-swb48")}, {FLAGS_g722, std::make_pair(NetEqDecoder::kDecoderG722, "g722")}, {FLAGS_avt, std::make_pair(NetEqDecoder::kDecoderAVT, "avt")}, + {FLAGS_avt_16, std::make_pair(NetEqDecoder::kDecoderAVT16kHz, "avt-16")}, + {FLAGS_avt_32, + std::make_pair(NetEqDecoder::kDecoderAVT32kHz, "avt-32")}, + {FLAGS_avt_48, + std::make_pair(NetEqDecoder::kDecoderAVT48kHz, "avt-48")}, {FLAGS_red, std::make_pair(NetEqDecoder::kDecoderRED, "red")}, {FLAGS_cn_nb, std::make_pair(NetEqDecoder::kDecoderCNGnb, "cng-nb")}, {FLAGS_cn_wb, std::make_pair(NetEqDecoder::kDecoderCNGwb, "cng-wb")}, @@ -407,7 +431,8 @@ int RunTest(int argc, char* argv[]) { std::set cn_types = std_set_int32_to_uint8( {FLAGS_cn_nb, FLAGS_cn_wb, FLAGS_cn_swb32, FLAGS_cn_swb48}); std::set forbidden_types = - std_set_int32_to_uint8({FLAGS_g722, FLAGS_red, FLAGS_avt}); + std_set_int32_to_uint8({FLAGS_g722, FLAGS_red, FLAGS_avt, + FLAGS_avt_16, FLAGS_avt_32, FLAGS_avt_48}); input.reset(new NetEqReplacementInput(std::move(input), replacement_pt, cn_types, forbidden_types)); diff --git a/webrtc/test/fuzzers/neteq_rtp_fuzzer.cc b/webrtc/test/fuzzers/neteq_rtp_fuzzer.cc index 55c5e13309..c0f250aeb6 100644 --- a/webrtc/test/fuzzers/neteq_rtp_fuzzer.cc +++ b/webrtc/test/fuzzers/neteq_rtp_fuzzer.cc @@ -141,6 +141,9 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) { std::make_pair(NetEqDecoder::kDecoderPCM16Bswb48kHz, "pcm16-swb48"); codecs[9] = std::make_pair(NetEqDecoder::kDecoderG722, "g722"); codecs[106] = std::make_pair(NetEqDecoder::kDecoderAVT, "avt"); + codecs[114] = std::make_pair(NetEqDecoder::kDecoderAVT16kHz, "avt-16"); + codecs[115] = std::make_pair(NetEqDecoder::kDecoderAVT32kHz, "avt-32"); + codecs[116] = std::make_pair(NetEqDecoder::kDecoderAVT48kHz, "avt-48"); codecs[117] = std::make_pair(NetEqDecoder::kDecoderRED, "red"); codecs[13] = std::make_pair(NetEqDecoder::kDecoderCNGnb, "cng-nb"); codecs[98] = std::make_pair(NetEqDecoder::kDecoderCNGwb, "cng-wb");