From df6416aa502d5a9694875a22fcdf286c10f836ea Mon Sep 17 00:00:00 2001 From: aluebs Date: Wed, 16 Mar 2016 18:26:35 -0700 Subject: [PATCH] Dont always downsample to 16kHz in the reverse stream in APM TBR=tina.legrand@webrtc.org Review URL: https://codereview.webrtc.org/1773173002 Cr-Commit-Position: refs/heads/master@{#12024} --- data/audio_processing/output_data_float.pb | Bin 2054 -> 2036 bytes data/audio_processing/output_data_mac.pb | Bin 2054 -> 2036 bytes .../audio_processing/audio_processing_impl.cc | 120 +++++++++--------- .../audio_processing/audio_processing_impl.h | 11 +- .../test/audio_processing_unittest.cc | 8 +- 5 files changed, 71 insertions(+), 68 deletions(-) diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index f8697920326b26d24b178c565e10ae446fc26f5c..97f871625089c2b08fc3a7285dc96cf91876a159 100644 GIT binary patch delta 483 zcmZn@_`=Whfqf#AzW|HCoc~b3D8Z`0GBGiNT}D7gLPkMm;sHfwF$t;38(5Slvoorq z3QGcoWhUFRvg?HF$=g>ja20Ye3NT7ADl~j&GU#Nq_{!_>lF4HiQvgGhGZzO3iji== zOuS8--!m#RPF}&}s+0Oz4rERS&>SWSCa^gjj22r3LFOC;nPUYsM^->qLRLX`@_Hsk zISHU68I(9U7zDT^xD^fS+M+_~KytW~aDjK&3(A z3PK=aK7^nc68slL5WzywQbP~x( zZO-Ckdno-*TGOQ)=vLdiK<5t3gMGJTZYkWdUZbV%k1C=R)ZpLl3pOC|`5HytudqOO z2H+J65j6MJ+xw5gGSmoUHjW?gz(d|pi9XHP_4Hb7A znlGz@xMy(kqbB2iEgNwe;*R`?MIwT@opZm!s={qmBhIl9;+DaCY_OBEYI%?^eOCT6 zD+DP&!pW<;R9do1u^+Qo#D7!@pq?X@#m_v4$B`Rk$AYRa*|n?%una L1^LJ>3@ZE^2d@C+ diff --git a/data/audio_processing/output_data_mac.pb b/data/audio_processing/output_data_mac.pb index 2070170802db3a85fe73e05ed18383321115b8ac..c05f37097345d6a5bec4a5d392c1be5045ea9799 100644 GIT binary patch delta 483 zcmZn@_`=Whfqf#AzW|HCoc~b3D8Z`0GBGiNT}D7gLPkMm;sHfwF$t;38(5Slvoorq z3QGcoWhUFRvg?HF$=g>ja20Ye3NT7ADl~j&GU#Nq_{!_>lF4HiQvgGhGZzO3iji== zOuS8--!m#RPF}&}s+0Oz4rERS&>SWSCa^gjj22r3LFOC;nPUYsM^->qLRLX`@_Hsk zISHU68I(9U7zDT^xD^+GPr1yeARfhvg1ruCI(Fy5KU*9%PrQe1We!7cu ziN+NJtw=$!)y)qUqP+7URf=bv;u=RxGJait^b0QOR#ax%|34|44cd*GsOA zy7b$>aXIxQEyARD* zRYBaBaQ3CG;(n?caRA~@Si%#TK-}IrYw!u_x4IEiJchVc@ByD3q^w&WmJ9E-|H27D z%5yk-+)+vkPAO$6Pi6W?r4Z`bQ0df)R#N9lTY_+*(~H{EvJ2NM{qW%`PkyUC8Kp63 zoVj{@qEDtF+= min_proc_rate) { + return rate; + } + } + return AudioProcessing::kMaxNativeSampleRateHz; +} + } // namespace // Throughout webrtc, it's assumed that success is represented by zero. @@ -104,20 +134,6 @@ struct AudioProcessingImpl::ApmPrivateSubmodules { std::unique_ptr agc_manager; }; -const int AudioProcessing::kNativeSampleRatesHz[] = { - AudioProcessing::kSampleRate8kHz, - AudioProcessing::kSampleRate16kHz, -#ifdef WEBRTC_ARCH_ARM_FAMILY - AudioProcessing::kSampleRate32kHz}; -#else - AudioProcessing::kSampleRate32kHz, - AudioProcessing::kSampleRate48kHz}; -#endif // WEBRTC_ARCH_ARM_FAMILY -const size_t AudioProcessing::kNumNativeSampleRates = - arraysize(AudioProcessing::kNativeSampleRatesHz); -const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing:: - kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; - AudioProcessing* AudioProcessing::Create() { Config config; return Create(config, nullptr); @@ -346,32 +362,19 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { formats_.api_format = config; - // We process at the closest native rate >= min(input rate, output rate). - const int min_proc_rate = - std::min(formats_.api_format.input_stream().sample_rate_hz(), - formats_.api_format.output_stream().sample_rate_hz()); - int fwd_proc_rate; - for (size_t i = 0; i < kNumNativeSampleRates; ++i) { - fwd_proc_rate = kNativeSampleRatesHz[i]; - if (fwd_proc_rate >= min_proc_rate) { - break; - } - } + capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestNativeRate(std::min( + formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.output_stream().sample_rate_hz()))); - capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate); - - // We normally process the reverse stream at 16 kHz. Unless... - int rev_proc_rate = kSampleRate16kHz; + int rev_proc_rate = ClosestNativeRate(std::min( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_output_stream().sample_rate_hz())); + // If the forward sample rate is 8 kHz, the reverse stream is also processed + // at this rate. if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) { - // ...the forward stream is at 8 kHz. rev_proc_rate = kSampleRate8kHz; } else { - if (formats_.api_format.reverse_input_stream().sample_rate_hz() == - kSampleRate32kHz) { - // ...or the input is at 32 kHz, in which case we use the splitting - // filter rather than the resampler. - rev_proc_rate = kSampleRate32kHz; - } + rev_proc_rate = std::max(rev_proc_rate, static_cast(kSampleRate16kHz)); } // Always downmix the reverse stream to mono for analysis. This has been @@ -627,8 +630,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { capture_.capture_audio->DeinterleaveFrom(frame); RETURN_ON_ERR(ProcessStreamLocked()); - capture_.capture_audio->InterleaveTo(frame, - output_copy_needed(is_data_processed())); + capture_.capture_audio->InterleaveTo(frame, output_copy_needed()); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_dump_.debug_file->Open()) { @@ -674,8 +676,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { capture_nonlocked_.fwd_proc_format.num_frames()); } - bool data_processed = is_data_processed(); - if (analysis_needed(data_processed)) { + if (fwd_analysis_needed()) { ca->SplitIntoFrequencyBands(); } @@ -733,7 +734,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio( ca, echo_cancellation()->stream_has_echo())); - if (synthesis_needed(data_processed)) { + if (fwd_synthesis_needed()) { ca->MergeFrequencyBands(); } @@ -903,7 +904,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { int AudioProcessingImpl::ProcessReverseStreamLocked() { AudioBuffer* ra = render_.render_audio.get(); // For brevity. - if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) { + if (rev_analysis_needed()) { ra->SplitIntoFrequencyBands(); } @@ -920,8 +921,7 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() { RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra)); } - if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz && - is_rev_processed()) { + if (rev_synthesis_needed()) { ra->MergeFrequencyBands(); } @@ -1128,31 +1128,26 @@ bool AudioProcessingImpl::is_data_processed() const { return false; } -bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { +bool AudioProcessingImpl::output_copy_needed() const { // Check if we've upmixed or downmixed the audio. return ((formats_.api_format.output_stream().num_channels() != formats_.api_format.input_stream().num_channels()) || - is_data_processed || capture_.transient_suppressor_enabled); + is_data_processed() || capture_.transient_suppressor_enabled); } -bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { - return (is_data_processed && - (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == - kSampleRate32kHz || - capture_nonlocked_.fwd_proc_format.sample_rate_hz() == - kSampleRate48kHz)); +bool AudioProcessingImpl::fwd_synthesis_needed() const { + return (is_data_processed() && + is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz())); } -bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { - if (!is_data_processed && +bool AudioProcessingImpl::fwd_analysis_needed() const { + if (!is_data_processed() && !public_submodules_->voice_detection->is_enabled() && !capture_.transient_suppressor_enabled) { // Only public_submodules_->level_estimator is enabled. return false; - } else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == - kSampleRate32kHz || - capture_nonlocked_.fwd_proc_format.sample_rate_hz() == - kSampleRate48kHz) { + } else if (is_multi_band( + capture_nonlocked_.fwd_proc_format.sample_rate_hz())) { // Something besides public_submodules_->level_estimator is enabled, and we // have super-wb. return true; @@ -1164,6 +1159,15 @@ bool AudioProcessingImpl::is_rev_processed() const { return constants_.intelligibility_enabled; } +bool AudioProcessingImpl::rev_synthesis_needed() const { + return (is_rev_processed() && + is_multi_band(formats_.rev_proc_format.sample_rate_hz())); +} + +bool AudioProcessingImpl::rev_analysis_needed() const { + return is_multi_band(formats_.rev_proc_format.sample_rate_hz()); +} + bool AudioProcessingImpl::render_check_rev_conversion_needed() const { return rev_conversion_needed(); } diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 20ca3a4c74..fe4da57086 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -208,13 +208,10 @@ class AudioProcessingImpl : public AudioProcessing { // Capture-side exclusive methods possibly running APM in a multi-threaded // manner that are called with the render lock already acquired. int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); - bool output_copy_needed(bool is_data_processed) const - EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool output_copy_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); - bool synthesis_needed(bool is_data_processed) const - EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); - bool analysis_needed(bool is_data_processed) const - EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool fwd_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool fwd_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); // Render-side exclusive methods possibly running APM in a multi-threaded @@ -225,6 +222,8 @@ class AudioProcessingImpl : public AudioProcessing { const StreamConfig& output_config) EXCLUSIVE_LOCKS_REQUIRED(crit_render_); bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + bool rev_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + bool rev_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_); // Debug dump methods that are internal and called without locks. diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index da695ec038..00cd187ad9 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -2649,8 +2649,8 @@ INSTANTIATE_TEST_CASE_P( CommonFormats, AudioProcessingTest, testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0), - std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30), - std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20), + std::tr1::make_tuple(48000, 48000, 32000, 48000, 35, 30), + std::tr1::make_tuple(48000, 48000, 16000, 48000, 35, 20), std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20), std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15), std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15), @@ -2697,7 +2697,7 @@ INSTANTIATE_TEST_CASE_P( std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0), std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20), - std::tr1::make_tuple(16000, 16000, 32000, 16000, 50, 20), + std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20), std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0))); #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) @@ -2753,7 +2753,7 @@ INSTANTIATE_TEST_CASE_P( std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0), std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20), - std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20), + std::tr1::make_tuple(16000, 16000, 32000, 16000, 35, 20), std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0))); #endif