diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h index f5f037de3a..02b5d3cab8 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h @@ -93,6 +93,15 @@ struct IsacFix { DCHECK_EQ(sample_rate_hz, kFixSampleRate); return 0; } + static inline void SetEncSampRateInDecoder(instance_type* inst, + uint16_t sample_rate_hz) { + DCHECK_EQ(sample_rate_hz, kFixSampleRate); + } + static inline void SetInitialBweBottleneck( + instance_type* inst, + int bottleneck_bits_per_second) { + WebRtcIsacfix_SetInitialBweBottleneck(inst, bottleneck_bits_per_second); + } static inline int16_t UpdateBwEstimate(instance_type* inst, const uint8_t* encoded, int32_t packet_size, diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h b/webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h index a205c6d641..68ffe65bc1 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h @@ -379,7 +379,8 @@ extern "C" { int16_t rate, int framesize); - + void WebRtcIsacfix_SetInitialBweBottleneck(ISACFIX_MainStruct* ISAC_main_inst, + int bottleneck_bits_per_second); /**************************************************************************** * WebRtcIsacfix_ControlBwe(...) diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c index 2441e41ccb..bdb807e10b 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c @@ -1110,6 +1110,13 @@ int16_t WebRtcIsacfix_Control(ISACFIX_MainStruct *ISAC_main_inst, return 0; } +void WebRtcIsacfix_SetInitialBweBottleneck(ISACFIX_MainStruct* ISAC_main_inst, + int bottleneck_bits_per_second) { + ISACFIX_SubStruct* inst = (ISACFIX_SubStruct*)ISAC_main_inst; + assert(bottleneck_bits_per_second >= 10000 && + bottleneck_bits_per_second <= 32000); + inst->bwestimator_obj.sendBwAvg = ((uint32_t)bottleneck_bits_per_second) << 7; +} /**************************************************************************** * WebRtcIsacfix_ControlBwe(...) diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h index 1fe5d312b8..27998923f0 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h +++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h @@ -91,6 +91,15 @@ struct IsacFloat { uint16_t sample_rate_hz) { return WebRtcIsac_SetEncSampRate(inst, sample_rate_hz); } + static inline void SetEncSampRateInDecoder(instance_type* inst, + uint16_t sample_rate_hz) { + WebRtcIsac_SetEncSampRateInDecoder(inst, sample_rate_hz); + } + static inline void SetInitialBweBottleneck( + instance_type* inst, + int bottleneck_bits_per_second) { + WebRtcIsac_SetInitialBweBottleneck(inst, bottleneck_bits_per_second); + } static inline int16_t UpdateBwEstimate(instance_type* inst, const uint8_t* encoded, int32_t packet_size, diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h index 1fe11bcef0..429fc6b6bf 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h +++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h @@ -269,6 +269,8 @@ extern "C" { int32_t rate, int framesize); + void WebRtcIsac_SetInitialBweBottleneck(ISACStruct* ISAC_main_inst, + int bottleneck_bits_per_second); /****************************************************************************** * WebRtcIsac_ControlBwe(...) @@ -706,13 +708,18 @@ extern "C" { int16_t* decoded, int16_t* speechType); - /* Fills in an IsacBandwidthInfo struct. */ + /* Fills in an IsacBandwidthInfo struct. |inst| should be a decoder. */ void WebRtcIsac_GetBandwidthInfo(ISACStruct* inst, IsacBandwidthInfo* bwinfo); - /* Uses the values from an IsacBandwidthInfo struct. */ + /* Uses the values from an IsacBandwidthInfo struct. |inst| should be an + encoder. */ void WebRtcIsac_SetBandwidthInfo(ISACStruct* inst, const IsacBandwidthInfo* bwinfo); + /* If |inst| is a decoder but not an encoder: tell it what sample rate the + encoder is using, for bandwidth estimation purposes. */ + void WebRtcIsac_SetEncSampRateInDecoder(ISACStruct* inst, int sample_rate_hz); + #if defined(__cplusplus) } #endif diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/isac.c b/webrtc/modules/audio_coding/codecs/isac/main/source/isac.c index a19fd01167..7a51a1e292 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/source/isac.c +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/isac.c @@ -1578,6 +1578,13 @@ int16_t WebRtcIsac_Control(ISACStruct* ISAC_main_inst, return 0; } +void WebRtcIsac_SetInitialBweBottleneck(ISACStruct* ISAC_main_inst, + int bottleneck_bits_per_second) { + ISACMainStruct* instISAC = (ISACMainStruct*)ISAC_main_inst; + assert(bottleneck_bits_per_second >= 10000 && + bottleneck_bits_per_second <= 32000); + instISAC->bwestimator_obj.send_bw_avg = (float)bottleneck_bits_per_second; +} /**************************************************************************** * WebRtcIsac_ControlBwe(...) @@ -2399,3 +2406,12 @@ void WebRtcIsac_SetBandwidthInfo(ISACStruct* inst, assert(instISAC->initFlag & BIT_MASK_ENC_INIT); WebRtcIsacBw_SetBandwidthInfo(&instISAC->bwestimator_obj, bwinfo); } + +void WebRtcIsac_SetEncSampRateInDecoder(ISACStruct* inst, + int sample_rate_hz) { + ISACMainStruct* instISAC = (ISACMainStruct*)inst; + assert(instISAC->initFlag & BIT_MASK_DEC_INIT); + assert(!(instISAC->initFlag & BIT_MASK_ENC_INIT)); + assert(sample_rate_hz == 16000 || sample_rate_hz == 32000); + instISAC->encoderSamplingRateKHz = sample_rate_hz / 1000; +} diff --git a/webrtc/modules/audio_coding/codecs/isac/unittest.cc b/webrtc/modules/audio_coding/codecs/isac/unittest.cc index a80fd08bcf..d05ffa6e48 100644 --- a/webrtc/modules/audio_coding/codecs/isac/unittest.cc +++ b/webrtc/modules/audio_coding/codecs/isac/unittest.cc @@ -24,10 +24,11 @@ namespace webrtc { namespace { +const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz + std::vector LoadSpeechData() { webrtc::test::InputAudioFile input_file( webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); - static const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz std::vector speech_data(kIsacNumberOfSamples); input_file.Read(kIsacNumberOfSamples, speech_data.data()); return speech_data; @@ -41,32 +42,45 @@ IsacBandwidthInfo GetBwInfo(typename T::instance_type* inst) { return bi; } +// Encodes one packet. Returns the packet duration in milliseconds. template -rtc::Buffer EncodePacket(typename T::instance_type* inst, - const IsacBandwidthInfo* bi, - const int16_t* speech_data, - int framesize_ms) { - rtc::Buffer output(1000); - for (int i = 0;; ++i) { +int EncodePacket(typename T::instance_type* inst, + const IsacBandwidthInfo* bi, + const int16_t* speech_data, + rtc::Buffer* output) { + output->SetSize(1000); + for (int duration_ms = 10;; duration_ms += 10) { if (bi) T::SetBandwidthInfo(inst, bi); - int encoded_bytes = T::Encode(inst, speech_data, output.data()); - if (i + 1 == framesize_ms / 10) { + int encoded_bytes = T::Encode(inst, speech_data, output->data()); + if (encoded_bytes > 0 || duration_ms >= 60) { EXPECT_GT(encoded_bytes, 0); - EXPECT_LE(static_cast(encoded_bytes), output.size()); - output.SetSize(encoded_bytes); - return output; + EXPECT_LE(static_cast(encoded_bytes), output->size()); + output->SetSize(encoded_bytes); + return duration_ms; } - EXPECT_EQ(0, encoded_bytes); } } +template +std::vector DecodePacket(typename T::instance_type* inst, + const rtc::Buffer& encoded) { + std::vector decoded(kIsacNumberOfSamples); + int16_t speech_type; + int nsamples = T::DecodeInternal(inst, encoded.data(), encoded.size(), + &decoded.front(), &speech_type); + EXPECT_GT(nsamples, 0); + EXPECT_LE(static_cast(nsamples), decoded.size()); + decoded.resize(nsamples); + return decoded; +} + class BoundedCapacityChannel final { public: - BoundedCapacityChannel(int rate_bits_per_second) + BoundedCapacityChannel(int sample_rate_hz, int rate_bits_per_second) : current_time_rtp_(0), channel_rate_bytes_per_sample_(rate_bits_per_second / - (8.0 * kSamplesPerSecond)) {} + (8.0 * sample_rate_hz)) {} // Simulate sending the given number of bytes at the given RTP time. Returns // the new current RTP time after the sending is done. @@ -81,47 +95,6 @@ class BoundedCapacityChannel final { // The somewhat strange unit for channel rate, bytes per sample, is because // RTP time is measured in samples: const double channel_rate_bytes_per_sample_; - static const int kSamplesPerSecond = 16000; -}; - -template -struct TestParam {}; - -template <> -struct TestParam { - static const int time_to_settle = 200; - static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { - return rate_bits_per_second; - } -}; - -template <> -struct TestParam { - static const int time_to_settle = 350; - static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { - // For some reason, IsacFix fails to adapt to the channel's actual - // bandwidth. Instead, it settles on a few hundred packets at 10kbit/s, - // then a few hundred at 5kbit/s, then a few hundred at 10kbit/s, and so - // on. The 200 packets starting at 350 are in the middle of the first - // 10kbit/s run. - return 10000; - } -}; - -template <> -struct TestParam { - static const int time_to_settle = 0; - static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { - return 32000; - } -}; - -template <> -struct TestParam { - static const int time_to_settle = 0; - static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { - return 16000; - } }; // Test that the iSAC encoder produces identical output whether or not we use a @@ -129,143 +102,153 @@ struct TestParam { // communicate BW estimation info explicitly. template void TestGetSetBandwidthInfo(const int16_t* speech_data, - int rate_bits_per_second) { - using Param = TestParam; - const int framesize_ms = adaptive ? 60 : 30; + int rate_bits_per_second, + int sample_rate_hz, + int frame_size_ms) { + const int bit_rate = 32000; // Conjoined encoder/decoder pair: typename T::instance_type* encdec; ASSERT_EQ(0, T::Create(&encdec)); ASSERT_EQ(0, T::EncoderInit(encdec, adaptive ? 0 : 1)); ASSERT_EQ(0, T::DecoderInit(encdec)); + ASSERT_EQ(0, T::SetEncSampRate(encdec, sample_rate_hz)); + if (adaptive) + ASSERT_EQ(0, T::ControlBwe(encdec, bit_rate, frame_size_ms, false)); + else + ASSERT_EQ(0, T::Control(encdec, bit_rate, frame_size_ms)); // Disjoint encoder/decoder pair: typename T::instance_type* enc; ASSERT_EQ(0, T::Create(&enc)); ASSERT_EQ(0, T::EncoderInit(enc, adaptive ? 0 : 1)); + ASSERT_EQ(0, T::SetEncSampRate(enc, sample_rate_hz)); + if (adaptive) + ASSERT_EQ(0, T::ControlBwe(enc, bit_rate, frame_size_ms, false)); + else + ASSERT_EQ(0, T::Control(enc, bit_rate, frame_size_ms)); typename T::instance_type* dec; ASSERT_EQ(0, T::Create(&dec)); ASSERT_EQ(0, T::DecoderInit(dec)); + T::SetInitialBweBottleneck(dec, bit_rate); + T::SetEncSampRateInDecoder(dec, sample_rate_hz); // 0. Get initial BW info from decoder. auto bi = GetBwInfo(dec); - BoundedCapacityChannel channel1(rate_bits_per_second), - channel2(rate_bits_per_second); - std::vector packet_sizes; - for (int i = 0; i < Param::time_to_settle + 200; ++i) { + BoundedCapacityChannel channel1(sample_rate_hz, rate_bits_per_second), + channel2(sample_rate_hz, rate_bits_per_second); + + int elapsed_time_ms = 0; + for (int i = 0; elapsed_time_ms < 10000; ++i) { std::ostringstream ss; ss << " i = " << i; SCOPED_TRACE(ss.str()); - // 1. Encode 6 * 10 ms (adaptive) or 3 * 10 ms (nonadaptive). The separate - // encoder is given the BW info before each encode call. - auto bitstream1 = - EncodePacket(encdec, nullptr, speech_data, framesize_ms); - auto bitstream2 = EncodePacket(enc, &bi, speech_data, framesize_ms); + // 1. Encode 3 * 10 ms or 6 * 10 ms. The separate encoder is given the BW + // info before each encode call. + rtc::Buffer bitstream1, bitstream2; + int duration1_ms = + EncodePacket(encdec, nullptr, speech_data, &bitstream1); + int duration2_ms = EncodePacket(enc, &bi, speech_data, &bitstream2); + EXPECT_EQ(duration1_ms, duration2_ms); + if (adaptive) + EXPECT_TRUE(duration1_ms == 30 || duration1_ms == 60); + else + EXPECT_EQ(frame_size_ms, duration1_ms); + ASSERT_EQ(bitstream1.size(), bitstream2.size()); EXPECT_EQ(bitstream1, bitstream2); - if (i > Param::time_to_settle) - packet_sizes.push_back(bitstream1.size()); - // 2. Deliver the encoded data to the decoders (but don't actually ask them - // to decode it; that's not necessary). Then get new BW info from the - // separate decoder. - const int samples_per_packet = 16 * framesize_ms; - const int send_time = i * samples_per_packet; + // 2. Deliver the encoded data to the decoders. + const int send_time = elapsed_time_ms * (sample_rate_hz / 1000); EXPECT_EQ(0, T::UpdateBwEstimate( encdec, bitstream1.data(), bitstream1.size(), i, send_time, channel1.Send(send_time, bitstream1.size()))); EXPECT_EQ(0, T::UpdateBwEstimate( dec, bitstream2.data(), bitstream2.size(), i, send_time, channel2.Send(send_time, bitstream2.size()))); + + // 3. Decode, and get new BW info from the separate decoder. + ASSERT_EQ(0, T::SetDecSampRate(encdec, sample_rate_hz)); + ASSERT_EQ(0, T::SetDecSampRate(dec, sample_rate_hz)); + auto decoded1 = DecodePacket(encdec, bitstream1); + auto decoded2 = DecodePacket(dec, bitstream2); + EXPECT_EQ(decoded1, decoded2); bi = GetBwInfo(dec); + + elapsed_time_ms += duration1_ms; } EXPECT_EQ(0, T::Free(encdec)); EXPECT_EQ(0, T::Free(enc)); EXPECT_EQ(0, T::Free(dec)); - - // The average send bitrate is close to the channel's capacity. - double avg_size = - std::accumulate(packet_sizes.begin(), packet_sizes.end(), 0) / - static_cast(packet_sizes.size()); - double avg_rate_bits_per_second = 8.0 * avg_size / (framesize_ms * 1e-3); - double expected_rate_bits_per_second = - Param::ExpectedRateBitsPerSecond(rate_bits_per_second); - EXPECT_GT(avg_rate_bits_per_second / expected_rate_bits_per_second, 0.95); - EXPECT_LT(avg_rate_bits_per_second / expected_rate_bits_per_second, 1.06); - - // The largest packet isn't that large, and the smallest not that small. - size_t min_size = *std::min_element(packet_sizes.begin(), packet_sizes.end()); - size_t max_size = *std::max_element(packet_sizes.begin(), packet_sizes.end()); - double size_range = max_size - min_size; - EXPECT_LE(size_range / avg_size, 0.16); } +enum class IsacType { Fix, Float }; + +std::ostream& operator<<(std::ostream& os, IsacType t) { + os << (t == IsacType::Fix ? "fix" : "float"); + return os; +} + +struct IsacTestParam { + IsacType isac_type; + bool adaptive; + int channel_rate_bits_per_second; + int sample_rate_hz; + int frame_size_ms; + + friend std::ostream& operator<<(std::ostream& os, const IsacTestParam& itp) { + os << '{' << itp.isac_type << ',' + << (itp.adaptive ? "adaptive" : "nonadaptive") << ',' + << itp.channel_rate_bits_per_second << ',' << itp.sample_rate_hz << ',' + << itp.frame_size_ms << '}'; + return os; + } +}; + +class IsacCommonTest : public testing::TestWithParam {}; + } // namespace -TEST(IsacCommonTest, GetSetBandwidthInfoFloat12kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 12000); +TEST_P(IsacCommonTest, GetSetBandwidthInfo) { + auto p = GetParam(); + auto test_fun = [p] { + if (p.isac_type == IsacType::Fix) { + if (p.adaptive) + return TestGetSetBandwidthInfo; + else + return TestGetSetBandwidthInfo; + } else { + if (p.adaptive) + return TestGetSetBandwidthInfo; + else + return TestGetSetBandwidthInfo; + } + }(); + test_fun(LoadSpeechData().data(), p.channel_rate_bits_per_second, + p.sample_rate_hz, p.frame_size_ms); } -TEST(IsacCommonTest, GetSetBandwidthInfoFloat15kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 15000); +std::vector TestCases() { + static const IsacType types[] = {IsacType::Fix, IsacType::Float}; + static const bool adaptives[] = {true, false}; + static const int channel_rates[] = {12000, 15000, 19000, 22000}; + static const int sample_rates[] = {16000, 32000}; + static const int frame_sizes[] = {30, 60}; + std::vector cases; + for (IsacType type : types) + for (bool adaptive : adaptives) + for (int channel_rate : channel_rates) + for (int sample_rate : sample_rates) + if (!(type == IsacType::Fix && sample_rate == 32000)) + for (int frame_size : frame_sizes) + if (!(sample_rate == 32000 && frame_size == 60)) + cases.push_back( + {type, adaptive, channel_rate, sample_rate, frame_size}); + return cases; } -TEST(IsacCommonTest, GetSetBandwidthInfoFloat19kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 19000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFloat22kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 22000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix12kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 12000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix15kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 15000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix19kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 19000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix22kAdaptive) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 22000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFloat12k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 12000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFloat15k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 15000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFloat19k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 19000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFloat22k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 22000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix12k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 12000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix15k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 15000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix19k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 19000); -} - -TEST(IsacCommonTest, GetSetBandwidthInfoFix22k) { - TestGetSetBandwidthInfo(LoadSpeechData().data(), 22000); -} +INSTANTIATE_TEST_CASE_P(, IsacCommonTest, testing::ValuesIn(TestCases())); } // namespace webrtc