diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index da055bb7d2..b222ad716e 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1143,14 +1143,10 @@ rtc_source_set("neteq_tools") { sources = [ "neteq/tools/fake_decode_from_file.cc", "neteq/tools/fake_decode_from_file.h", - "neteq/tools/input_audio_file.cc", - "neteq/tools/input_audio_file.h", "neteq/tools/neteq_delay_analyzer.cc", "neteq/tools/neteq_delay_analyzer.h", "neteq/tools/neteq_replacement_input.cc", "neteq/tools/neteq_replacement_input.h", - "neteq/tools/resample_input_audio_file.cc", - "neteq/tools/resample_input_audio_file.h", ] public_configs = [ ":neteq_tools_config" ] @@ -1165,16 +1161,31 @@ rtc_source_set("neteq_tools") { "../../api:array_view", "../../api:optional", "../../api/audio_codecs:audio_codecs_api", - "../../common_audio", "../../rtc_base:rtc_base_approved", "../rtp_rtcp", ] public_deps = [ + ":neteq_input_audio_tools", ":neteq_tools_minimal", ] } +rtc_source_set("neteq_input_audio_tools") { + sources = [ + "neteq/tools/input_audio_file.cc", + "neteq/tools/input_audio_file.h", + "neteq/tools/resample_input_audio_file.cc", + "neteq/tools/resample_input_audio_file.h", + ] + + deps = [ + "../..:webrtc_common", + "../../common_audio", + "../../rtc_base:rtc_base_approved", + ] +} + if (rtc_enable_protobuf) { rtc_static_library("rtc_event_log_source") { testonly = true diff --git a/modules/audio_coding/neteq/delay_manager.cc b/modules/audio_coding/neteq/delay_manager.cc index ab98a068b5..77a41fd887 100644 --- a/modules/audio_coding/neteq/delay_manager.cc +++ b/modules/audio_coding/neteq/delay_manager.cc @@ -14,12 +14,14 @@ #include #include // max, min +#include #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/neteq/delay_peak_detector.h" #include "modules/include/module_common_types.h" #include "rtc_base/logging.h" #include "rtc_base/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -31,7 +33,7 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, iat_vector_(kMaxIat + 1, 0), iat_factor_(0), tick_timer_(tick_timer), - base_target_level_(4), // In Q0 domain. + base_target_level_(4), // In Q0 domain. target_level_(base_target_level_ << 8), // In Q8 domain. packet_len_ms_(0), streaming_mode_(false), @@ -43,7 +45,9 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, iat_cumulative_sum_(0), max_iat_cumulative_sum_(0), peak_detector_(*peak_detector), - last_pack_cng_or_dtmf_(1) { + last_pack_cng_or_dtmf_(1), + frame_length_change_experiment_( + field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")) { assert(peak_detector); // Should never be NULL. Reset(); } @@ -298,6 +302,10 @@ int DelayManager::SetPacketAudioLength(int length_ms) { LOG_F(LS_ERROR) << "length_ms = " << length_ms; return -1; } + if (frame_length_change_experiment_ && packet_len_ms_ != length_ms) { + iat_vector_ = ScaleHistogram(iat_vector_, packet_len_ms_, length_ms); + } + packet_len_ms_ = length_ms; peak_detector_.SetPacketAudioLength(packet_len_ms_); packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch(); @@ -378,6 +386,41 @@ void DelayManager::RegisterEmptyPacket() { ++last_seq_no_; } +DelayManager::IATVector DelayManager::ScaleHistogram(const IATVector& histogram, + int old_packet_length, + int new_packet_length) { + RTC_DCHECK_GT(new_packet_length, 0); + RTC_DCHECK_EQ(old_packet_length % 10, 0); + RTC_DCHECK_EQ(new_packet_length % 10, 0); + IATVector new_histogram(histogram.size(), 0); + int acc = 0; + int time_counter = 0; + size_t new_histogram_idx = 0; + for (size_t i = 0; i < histogram.size(); i++) { + acc += histogram[i]; + time_counter += old_packet_length; + // The bins should be scaled, to ensure the histogram still sums to one. + const int scaled_acc = acc * new_packet_length / time_counter; + int actually_used_acc = 0; + while (time_counter >= new_packet_length) { + actually_used_acc += scaled_acc; + new_histogram[new_histogram_idx] += scaled_acc; + new_histogram_idx = + std::min(new_histogram_idx + 1, new_histogram.size() - 1); + time_counter -= new_packet_length; + } + // Only subtract the part that was succesfully written to the new histogram. + acc -= actually_used_acc; + } + // If there is anything left in acc (due to rounding errors), add it to the + // last bin. + new_histogram[new_histogram_idx] += acc; + RTC_DCHECK_EQ(histogram.size(), new_histogram.size()); + RTC_DCHECK_EQ(accumulate(histogram.begin(), histogram.end(), 0), + accumulate(new_histogram.begin(), new_histogram.end(), 0)); + return new_histogram; +} + bool DelayManager::SetMinimumDelay(int delay_ms) { // Minimum delay shouldn't be more than maximum delay, if any maximum is set. // Also, if possible check |delay| to less than 75% of diff --git a/modules/audio_coding/neteq/delay_manager.h b/modules/audio_coding/neteq/delay_manager.h index b7cdab3d89..0de03fc869 100644 --- a/modules/audio_coding/neteq/delay_manager.h +++ b/modules/audio_coding/neteq/delay_manager.h @@ -100,6 +100,13 @@ class DelayManager { // packet will shift the sequence numbers for the following packets. virtual void RegisterEmptyPacket(); + // Apply compression or stretching to the IAT histogram, for a change in frame + // size. This returns an updated histogram. This function is public for + // testability. + static IATVector ScaleHistogram(const IATVector& histogram, + int old_packet_length, + int new_packet_length); + // Accessors and mutators. // Assuming |delay| is in valid range. virtual bool SetMinimumDelay(int delay_ms); @@ -166,6 +173,7 @@ class DelayManager { std::unique_ptr max_iat_stopwatch_; DelayPeakDetector& peak_detector_; int last_pack_cng_or_dtmf_; + const bool frame_length_change_experiment_; RTC_DISALLOW_COPY_AND_ASSIGN(DelayManager); }; diff --git a/modules/audio_coding/neteq/delay_manager_unittest.cc b/modules/audio_coding/neteq/delay_manager_unittest.cc index 6bdbc38cf1..6dad4db9f4 100644 --- a/modules/audio_coding/neteq/delay_manager_unittest.cc +++ b/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -335,4 +335,80 @@ TEST_F(DelayManagerTest, Failures) { EXPECT_FALSE(dm_->SetMaximumDelay(60)); } +// Test if the histogram is stretched correctly if the packet size is decreased. +TEST(DelayManagerIATScalingTest, StretchTest) { + using IATVector = DelayManager::IATVector; + // Test a straightforward 60ms to 20ms change. + IATVector iat = {12, 0, 0, 0, 0, 0}; + IATVector expected_result = {4, 4, 4, 0, 0, 0}; + IATVector stretched_iat = DelayManager::ScaleHistogram(iat, 60, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test an example where the last bin in the stretched histogram should + // contain the sum of the elements that don't fit into the new histogram. + iat = {18, 15, 12, 9, 6, 3, 0}; + expected_result = {6, 6, 6, 5, 5, 5, 30}; + stretched_iat = DelayManager::ScaleHistogram(iat, 60, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 120ms to 60ms change. + iat = {18, 16, 14, 4, 0}; + expected_result = {9, 9, 8, 8, 18}; + stretched_iat = DelayManager::ScaleHistogram(iat, 120, 60); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 120ms to 20ms change. + iat = {19, 12, 0, 0, 0, 0, 0, 0}; + expected_result = {3, 3, 3, 3, 3, 3, 2, 11}; + stretched_iat = DelayManager::ScaleHistogram(iat, 120, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 70ms to 40ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3, 0, 0, 0}; + expected_result = {7, 5, 5, 3, 3, 2, 2, 1, 2, 2, 6, 22}; + stretched_iat = DelayManager::ScaleHistogram(iat, 70, 40); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 30ms to 20ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3, 0, 0, 0}; + expected_result = {8, 6, 6, 3, 2, 2, 1, 3, 3, 8, 7, 11}; + stretched_iat = DelayManager::ScaleHistogram(iat, 30, 20); + EXPECT_EQ(stretched_iat, expected_result); +} + +// Test if the histogram is compressed correctly if the packet size is +// increased. +TEST(DelayManagerIATScalingTest, CompressionTest) { + using IATVector = DelayManager::IATVector; + // Test a 20 to 60 ms change. + IATVector iat = {12, 11, 10, 3, 2, 1}; + IATVector expected_result = {33, 6, 0, 0, 0, 0}; + IATVector compressed_iat = DelayManager::ScaleHistogram(iat, 20, 60); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 60ms to 120ms change. + iat = {18, 16, 14, 4, 1}; + expected_result = {34, 18, 1, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 60, 120); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 20ms to 120ms change. + iat = {18, 12, 5, 4, 4, 3, 5, 1}; + expected_result = {46, 6, 0, 0, 0, 0, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 20, 120); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 70ms to 80ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3}; + expected_result = {11, 8, 6, 2, 5, 12, 13, 3, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 70, 80); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 50ms to 110ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3}; + expected_result = {18, 8, 16, 16, 2, 0, 0, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 50, 110); + EXPECT_EQ(compressed_iat, expected_result); +} + } // namespace webrtc diff --git a/modules/audio_coding/neteq/delay_peak_detector.cc b/modules/audio_coding/neteq/delay_peak_detector.cc index 16f41d3399..a6e7ef45da 100644 --- a/modules/audio_coding/neteq/delay_peak_detector.cc +++ b/modules/audio_coding/neteq/delay_peak_detector.cc @@ -14,6 +14,7 @@ #include "rtc_base/checks.h" #include "rtc_base/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -29,7 +30,9 @@ DelayPeakDetector::~DelayPeakDetector() = default; DelayPeakDetector::DelayPeakDetector(const TickTimer* tick_timer) : peak_found_(false), peak_detection_threshold_(0), - tick_timer_(tick_timer) { + tick_timer_(tick_timer), + frame_length_change_experiment_( + field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")) { RTC_DCHECK(!peak_period_stopwatch_); } @@ -42,7 +45,14 @@ void DelayPeakDetector::Reset() { // Calculates the threshold in number of packets. void DelayPeakDetector::SetPacketAudioLength(int length_ms) { if (length_ms > 0) { - peak_detection_threshold_ = kPeakHeightMs / length_ms; + if (frame_length_change_experiment_) { + peak_detection_threshold_ = std::max(2, kPeakHeightMs / length_ms); + } else { + peak_detection_threshold_ = kPeakHeightMs / length_ms; + } + } + if (frame_length_change_experiment_) { + peak_history_.clear(); } } diff --git a/modules/audio_coding/neteq/delay_peak_detector.h b/modules/audio_coding/neteq/delay_peak_detector.h index 2236ef21ba..9defca5ad0 100644 --- a/modules/audio_coding/neteq/delay_peak_detector.h +++ b/modules/audio_coding/neteq/delay_peak_detector.h @@ -66,6 +66,7 @@ class DelayPeakDetector { int peak_detection_threshold_; const TickTimer* tick_timer_; std::unique_ptr peak_period_stopwatch_; + const bool frame_length_change_experiment_; RTC_DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector); }; diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index dd63e2f8b4..c222d1ddd9 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -563,7 +563,7 @@ if (rtc_include_tests) { "../../rtc_base:rtc_base_approved", "../../system_wrappers:system_wrappers", "../../test:test_support", - "../audio_coding:neteq_tools", + "../audio_coding:neteq_input_audio_tools", "aec_dump:mock_aec_dump_unittests", "test/conversational_speech:unittest", "//testing/gmock", @@ -802,7 +802,7 @@ if (rtc_include_tests) { "../../rtc_base:rtc_base_approved", "../../system_wrappers:system_wrappers", "../../test:test_support", - "../audio_coding:neteq_tools", + "../audio_coding:neteq_input_audio_tools", "//testing/gtest", ] }