From 385b10bbaade624470e9ee6cf4844cc5740febb7 Mon Sep 17 00:00:00 2001 From: Ivo Creusen Date: Fri, 13 Oct 2017 12:37:27 +0200 Subject: [PATCH] Added experiment to improve handling of frame length changes in NetEq. The field trial effects two things: after a frame length change the IAT histogram is scaled to prevent an immediate change in target buffer level. Also, the peak history in the delay peak detector is cleared, because the size of the peaks is stored in number of packets (which will be incorrect after a frame length change). Bug: webrtc:8381 Change-Id: I214b990f6e5959b655b6542884a7f75da181a0d8 Reviewed-on: https://webrtc-review.googlesource.com/8101 Reviewed-by: Henrik Lundin Commit-Queue: Ivo Creusen Cr-Commit-Position: refs/heads/master@{#20284} --- modules/audio_coding/BUILD.gn | 21 +++-- modules/audio_coding/neteq/delay_manager.cc | 47 +++++++++++- modules/audio_coding/neteq/delay_manager.h | 8 ++ .../neteq/delay_manager_unittest.cc | 76 +++++++++++++++++++ .../audio_coding/neteq/delay_peak_detector.cc | 14 +++- .../audio_coding/neteq/delay_peak_detector.h | 1 + modules/audio_processing/BUILD.gn | 4 +- 7 files changed, 160 insertions(+), 11 deletions(-) diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index da055bb7d2..b222ad716e 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1143,14 +1143,10 @@ rtc_source_set("neteq_tools") { sources = [ "neteq/tools/fake_decode_from_file.cc", "neteq/tools/fake_decode_from_file.h", - "neteq/tools/input_audio_file.cc", - "neteq/tools/input_audio_file.h", "neteq/tools/neteq_delay_analyzer.cc", "neteq/tools/neteq_delay_analyzer.h", "neteq/tools/neteq_replacement_input.cc", "neteq/tools/neteq_replacement_input.h", - "neteq/tools/resample_input_audio_file.cc", - "neteq/tools/resample_input_audio_file.h", ] public_configs = [ ":neteq_tools_config" ] @@ -1165,16 +1161,31 @@ rtc_source_set("neteq_tools") { "../../api:array_view", "../../api:optional", "../../api/audio_codecs:audio_codecs_api", - "../../common_audio", "../../rtc_base:rtc_base_approved", "../rtp_rtcp", ] public_deps = [ + ":neteq_input_audio_tools", ":neteq_tools_minimal", ] } +rtc_source_set("neteq_input_audio_tools") { + sources = [ + "neteq/tools/input_audio_file.cc", + "neteq/tools/input_audio_file.h", + "neteq/tools/resample_input_audio_file.cc", + "neteq/tools/resample_input_audio_file.h", + ] + + deps = [ + "../..:webrtc_common", + "../../common_audio", + "../../rtc_base:rtc_base_approved", + ] +} + if (rtc_enable_protobuf) { rtc_static_library("rtc_event_log_source") { testonly = true diff --git a/modules/audio_coding/neteq/delay_manager.cc b/modules/audio_coding/neteq/delay_manager.cc index ab98a068b5..77a41fd887 100644 --- a/modules/audio_coding/neteq/delay_manager.cc +++ b/modules/audio_coding/neteq/delay_manager.cc @@ -14,12 +14,14 @@ #include #include // max, min +#include #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/neteq/delay_peak_detector.h" #include "modules/include/module_common_types.h" #include "rtc_base/logging.h" #include "rtc_base/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -31,7 +33,7 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, iat_vector_(kMaxIat + 1, 0), iat_factor_(0), tick_timer_(tick_timer), - base_target_level_(4), // In Q0 domain. + base_target_level_(4), // In Q0 domain. target_level_(base_target_level_ << 8), // In Q8 domain. packet_len_ms_(0), streaming_mode_(false), @@ -43,7 +45,9 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, iat_cumulative_sum_(0), max_iat_cumulative_sum_(0), peak_detector_(*peak_detector), - last_pack_cng_or_dtmf_(1) { + last_pack_cng_or_dtmf_(1), + frame_length_change_experiment_( + field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")) { assert(peak_detector); // Should never be NULL. Reset(); } @@ -298,6 +302,10 @@ int DelayManager::SetPacketAudioLength(int length_ms) { LOG_F(LS_ERROR) << "length_ms = " << length_ms; return -1; } + if (frame_length_change_experiment_ && packet_len_ms_ != length_ms) { + iat_vector_ = ScaleHistogram(iat_vector_, packet_len_ms_, length_ms); + } + packet_len_ms_ = length_ms; peak_detector_.SetPacketAudioLength(packet_len_ms_); packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch(); @@ -378,6 +386,41 @@ void DelayManager::RegisterEmptyPacket() { ++last_seq_no_; } +DelayManager::IATVector DelayManager::ScaleHistogram(const IATVector& histogram, + int old_packet_length, + int new_packet_length) { + RTC_DCHECK_GT(new_packet_length, 0); + RTC_DCHECK_EQ(old_packet_length % 10, 0); + RTC_DCHECK_EQ(new_packet_length % 10, 0); + IATVector new_histogram(histogram.size(), 0); + int acc = 0; + int time_counter = 0; + size_t new_histogram_idx = 0; + for (size_t i = 0; i < histogram.size(); i++) { + acc += histogram[i]; + time_counter += old_packet_length; + // The bins should be scaled, to ensure the histogram still sums to one. + const int scaled_acc = acc * new_packet_length / time_counter; + int actually_used_acc = 0; + while (time_counter >= new_packet_length) { + actually_used_acc += scaled_acc; + new_histogram[new_histogram_idx] += scaled_acc; + new_histogram_idx = + std::min(new_histogram_idx + 1, new_histogram.size() - 1); + time_counter -= new_packet_length; + } + // Only subtract the part that was succesfully written to the new histogram. + acc -= actually_used_acc; + } + // If there is anything left in acc (due to rounding errors), add it to the + // last bin. + new_histogram[new_histogram_idx] += acc; + RTC_DCHECK_EQ(histogram.size(), new_histogram.size()); + RTC_DCHECK_EQ(accumulate(histogram.begin(), histogram.end(), 0), + accumulate(new_histogram.begin(), new_histogram.end(), 0)); + return new_histogram; +} + bool DelayManager::SetMinimumDelay(int delay_ms) { // Minimum delay shouldn't be more than maximum delay, if any maximum is set. // Also, if possible check |delay| to less than 75% of diff --git a/modules/audio_coding/neteq/delay_manager.h b/modules/audio_coding/neteq/delay_manager.h index b7cdab3d89..0de03fc869 100644 --- a/modules/audio_coding/neteq/delay_manager.h +++ b/modules/audio_coding/neteq/delay_manager.h @@ -100,6 +100,13 @@ class DelayManager { // packet will shift the sequence numbers for the following packets. virtual void RegisterEmptyPacket(); + // Apply compression or stretching to the IAT histogram, for a change in frame + // size. This returns an updated histogram. This function is public for + // testability. + static IATVector ScaleHistogram(const IATVector& histogram, + int old_packet_length, + int new_packet_length); + // Accessors and mutators. // Assuming |delay| is in valid range. virtual bool SetMinimumDelay(int delay_ms); @@ -166,6 +173,7 @@ class DelayManager { std::unique_ptr max_iat_stopwatch_; DelayPeakDetector& peak_detector_; int last_pack_cng_or_dtmf_; + const bool frame_length_change_experiment_; RTC_DISALLOW_COPY_AND_ASSIGN(DelayManager); }; diff --git a/modules/audio_coding/neteq/delay_manager_unittest.cc b/modules/audio_coding/neteq/delay_manager_unittest.cc index 6bdbc38cf1..6dad4db9f4 100644 --- a/modules/audio_coding/neteq/delay_manager_unittest.cc +++ b/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -335,4 +335,80 @@ TEST_F(DelayManagerTest, Failures) { EXPECT_FALSE(dm_->SetMaximumDelay(60)); } +// Test if the histogram is stretched correctly if the packet size is decreased. +TEST(DelayManagerIATScalingTest, StretchTest) { + using IATVector = DelayManager::IATVector; + // Test a straightforward 60ms to 20ms change. + IATVector iat = {12, 0, 0, 0, 0, 0}; + IATVector expected_result = {4, 4, 4, 0, 0, 0}; + IATVector stretched_iat = DelayManager::ScaleHistogram(iat, 60, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test an example where the last bin in the stretched histogram should + // contain the sum of the elements that don't fit into the new histogram. + iat = {18, 15, 12, 9, 6, 3, 0}; + expected_result = {6, 6, 6, 5, 5, 5, 30}; + stretched_iat = DelayManager::ScaleHistogram(iat, 60, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 120ms to 60ms change. + iat = {18, 16, 14, 4, 0}; + expected_result = {9, 9, 8, 8, 18}; + stretched_iat = DelayManager::ScaleHistogram(iat, 120, 60); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 120ms to 20ms change. + iat = {19, 12, 0, 0, 0, 0, 0, 0}; + expected_result = {3, 3, 3, 3, 3, 3, 2, 11}; + stretched_iat = DelayManager::ScaleHistogram(iat, 120, 20); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 70ms to 40ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3, 0, 0, 0}; + expected_result = {7, 5, 5, 3, 3, 2, 2, 1, 2, 2, 6, 22}; + stretched_iat = DelayManager::ScaleHistogram(iat, 70, 40); + EXPECT_EQ(stretched_iat, expected_result); + + // Test a 30ms to 20ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3, 0, 0, 0}; + expected_result = {8, 6, 6, 3, 2, 2, 1, 3, 3, 8, 7, 11}; + stretched_iat = DelayManager::ScaleHistogram(iat, 30, 20); + EXPECT_EQ(stretched_iat, expected_result); +} + +// Test if the histogram is compressed correctly if the packet size is +// increased. +TEST(DelayManagerIATScalingTest, CompressionTest) { + using IATVector = DelayManager::IATVector; + // Test a 20 to 60 ms change. + IATVector iat = {12, 11, 10, 3, 2, 1}; + IATVector expected_result = {33, 6, 0, 0, 0, 0}; + IATVector compressed_iat = DelayManager::ScaleHistogram(iat, 20, 60); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 60ms to 120ms change. + iat = {18, 16, 14, 4, 1}; + expected_result = {34, 18, 1, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 60, 120); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 20ms to 120ms change. + iat = {18, 12, 5, 4, 4, 3, 5, 1}; + expected_result = {46, 6, 0, 0, 0, 0, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 20, 120); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 70ms to 80ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3}; + expected_result = {11, 8, 6, 2, 5, 12, 13, 3, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 70, 80); + EXPECT_EQ(compressed_iat, expected_result); + + // Test a 50ms to 110ms change. + iat = {13, 7, 5, 3, 1, 5, 12, 11, 3}; + expected_result = {18, 8, 16, 16, 2, 0, 0, 0, 0}; + compressed_iat = DelayManager::ScaleHistogram(iat, 50, 110); + EXPECT_EQ(compressed_iat, expected_result); +} + } // namespace webrtc diff --git a/modules/audio_coding/neteq/delay_peak_detector.cc b/modules/audio_coding/neteq/delay_peak_detector.cc index 16f41d3399..a6e7ef45da 100644 --- a/modules/audio_coding/neteq/delay_peak_detector.cc +++ b/modules/audio_coding/neteq/delay_peak_detector.cc @@ -14,6 +14,7 @@ #include "rtc_base/checks.h" #include "rtc_base/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -29,7 +30,9 @@ DelayPeakDetector::~DelayPeakDetector() = default; DelayPeakDetector::DelayPeakDetector(const TickTimer* tick_timer) : peak_found_(false), peak_detection_threshold_(0), - tick_timer_(tick_timer) { + tick_timer_(tick_timer), + frame_length_change_experiment_( + field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")) { RTC_DCHECK(!peak_period_stopwatch_); } @@ -42,7 +45,14 @@ void DelayPeakDetector::Reset() { // Calculates the threshold in number of packets. void DelayPeakDetector::SetPacketAudioLength(int length_ms) { if (length_ms > 0) { - peak_detection_threshold_ = kPeakHeightMs / length_ms; + if (frame_length_change_experiment_) { + peak_detection_threshold_ = std::max(2, kPeakHeightMs / length_ms); + } else { + peak_detection_threshold_ = kPeakHeightMs / length_ms; + } + } + if (frame_length_change_experiment_) { + peak_history_.clear(); } } diff --git a/modules/audio_coding/neteq/delay_peak_detector.h b/modules/audio_coding/neteq/delay_peak_detector.h index 2236ef21ba..9defca5ad0 100644 --- a/modules/audio_coding/neteq/delay_peak_detector.h +++ b/modules/audio_coding/neteq/delay_peak_detector.h @@ -66,6 +66,7 @@ class DelayPeakDetector { int peak_detection_threshold_; const TickTimer* tick_timer_; std::unique_ptr peak_period_stopwatch_; + const bool frame_length_change_experiment_; RTC_DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector); }; diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index dd63e2f8b4..c222d1ddd9 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -563,7 +563,7 @@ if (rtc_include_tests) { "../../rtc_base:rtc_base_approved", "../../system_wrappers:system_wrappers", "../../test:test_support", - "../audio_coding:neteq_tools", + "../audio_coding:neteq_input_audio_tools", "aec_dump:mock_aec_dump_unittests", "test/conversational_speech:unittest", "//testing/gmock", @@ -802,7 +802,7 @@ if (rtc_include_tests) { "../../rtc_base:rtc_base_approved", "../../system_wrappers:system_wrappers", "../../test:test_support", - "../audio_coding:neteq_tools", + "../audio_coding:neteq_input_audio_tools", "//testing/gtest", ] }