Reland "Reduce complexity in the APM pipeline when the output is not used"

This is a reland of aa6adffba325f4b698a1e94aeab020bfdc47adec What was changed in the reland is that the merging of the bands is excluded from the code that is not run when the output is not used. I.e., the merging is always done. This is important to have since some clients may apply muting before APM, and still flag to APM that the signal is muted. If the merging is not always done, those clients will get nonzero output from APM during muting. Original change's description: > Reduce complexity in the APM pipeline when the output is not used > > This CL selectively turns off parts of the audio processing when > the output of APM is not used. The parts turned off are such that > don't need to continuously need to be trained, but rather can be > temporarily deactivated. > > The purpose of this CL is to allow CPU to be reduced when the > client is muted. > > The CL will be follow by additional CLs, adding similar functionality > in the echo canceller and the noiser suppressor > > Bug: b/177830919 > Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703 > Commit-Queue: Per Åhgren <peah@webrtc.org> > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#33431} Bug: b/177830919 Change-Id: Ib74dd1cefa173d45101e26c4f2b931860abc6d08 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211760 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33478}
2021-03-12 23:08:09 +00:00 · 2021-03-12 23:08:09 +00:00 · 19775cbd29
commit 19775cbd29
parent 15179a9986
2 changed files with 103 additions and 88 deletions
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -114,6 +114,10 @@ GainControl::Mode Agc1ConfigModeToInterfaceMode(
  RTC_CHECK_NOTREACHED();
 }
 bool MinimizeProcessingForUnusedOutput() {
  return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
 }
 // Maximum lengths that frame of samples being passed from the render side to
 // the capture side can have (does not apply to AEC3).
 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
@ -266,7 +270,9 @@ AudioProcessingImpl::AudioProcessingImpl(
                     "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
                 !field_trial::IsEnabled(
                     "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
-                 EnforceSplitBandHpf()),
+                 EnforceSplitBandHpf(),
                 MinimizeProcessingForUnusedOutput()),
      capture_(),
      capture_nonlocked_() {
  RTC_LOG(LS_INFO) << "Injected APM submodules:"
                      "\nEcho control factory: "
@ -670,7 +676,9 @@ void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
    bool capture_output_used) {
-  capture_.capture_output_used = capture_output_used;
+  capture_.capture_output_used =
      capture_output_used || !constants_.minimize_processing_for_unused_output;
  if (submodules_.agc_manager.get()) {
    submodules_.agc_manager->HandleCaptureOutputUsedChange(
        capture_.capture_output_used);
@ -912,11 +920,7 @@ void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
  // Fall back to a safe state for the case when a setting for capture output
  // usage setting has been missed.
-  capture_.capture_output_used = true;
+  HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
  if (submodules_.echo_controller) {
    submodules_.echo_controller->SetCaptureOutputUsage(
        capture_.capture_output_used);
  }
 }
 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
@ -1283,81 +1287,95 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
    capture_buffer->MergeFrequencyBands();
  }
-  if (capture_.capture_fullband_audio) {
+  capture_.stats.output_rms_dbfs = absl::nullopt;
-    const auto& ec = submodules_.echo_controller;
+  if (capture_.capture_output_used) {
-    bool ec_active = ec ? ec->ActiveProcessing() : false;
+    if (capture_.capture_fullband_audio) {
-    // Only update the fullband buffer if the multiband processing has changed
+      const auto& ec = submodules_.echo_controller;
-    // the signal. Keep the original signal otherwise.
+      bool ec_active = ec ? ec->ActiveProcessing() : false;
-    if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
+      // Only update the fullband buffer if the multiband processing has changed
-      capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+      // the signal. Keep the original signal otherwise.
      if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
        capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
      }
      capture_buffer = capture_.capture_fullband_audio.get();
    }
    if (config_.residual_echo_detector.enabled) {
      RTC_DCHECK(submodules_.echo_detector);
      submodules_.echo_detector->AnalyzeCaptureAudio(
          rtc::ArrayView<const float>(capture_buffer->channels()[0],
                                      capture_buffer->num_frames()));
    }
    // TODO(aluebs): Investigate if the transient suppression placement should
    // be before or after the AGC.
    if (submodules_.transient_suppressor) {
      float voice_probability =
          submodules_.agc_manager.get()
              ? submodules_.agc_manager->voice_probability()
              : 1.f;
      submodules_.transient_suppressor->Suppress(
          capture_buffer->channels()[0], capture_buffer->num_frames(),
          capture_buffer->num_channels(),
          capture_buffer->split_bands_const(0)[kBand0To8kHz],
          capture_buffer->num_frames_per_band(),
          capture_.keyboard_info.keyboard_data,
          capture_.keyboard_info.num_keyboard_frames, voice_probability,
          capture_.key_pressed);
    }
    // Experimental APM sub-module that analyzes |capture_buffer|.
    if (submodules_.capture_analyzer) {
      submodules_.capture_analyzer->Analyze(capture_buffer);
    }
    if (submodules_.gain_controller2) {
      submodules_.gain_controller2->NotifyAnalogLevel(
          recommended_stream_analog_level_locked());
      submodules_.gain_controller2->Process(capture_buffer);
    }
    if (submodules_.capture_post_processor) {
      submodules_.capture_post_processor->Process(capture_buffer);
    }
    // The level estimator operates on the recombined data.
    if (config_.level_estimation.enabled) {
      submodules_.output_level_estimator->ProcessStream(*capture_buffer);
      capture_.stats.output_rms_dbfs =
          submodules_.output_level_estimator->RMS();
    }
    capture_output_rms_.Analyze(rtc::ArrayView<const float>(
        capture_buffer->channels_const()[0],
        capture_nonlocked_.capture_processing_format.num_frames()));
    if (log_rms) {
      RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
      RTC_HISTOGRAM_COUNTS_LINEAR(
          "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
          RmsLevel::kMinLevelDb, 64);
      RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
                                  levels.peak, 1, RmsLevel::kMinLevelDb, 64);
    }
    if (submodules_.agc_manager) {
      int level = recommended_stream_analog_level_locked();
      data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
                            &level);
    }
    // Compute echo-detector stats.
    if (config_.residual_echo_detector.enabled) {
      RTC_DCHECK(submodules_.echo_detector);
      auto ed_metrics = submodules_.echo_detector->GetMetrics();
      capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
      capture_.stats.residual_echo_likelihood_recent_max =
          ed_metrics.echo_likelihood_recent_max;
    }
    capture_buffer = capture_.capture_fullband_audio.get();
  }
-  if (config_.residual_echo_detector.enabled) {
+  // Compute echo-controller stats.
    RTC_DCHECK(submodules_.echo_detector);
    submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
        capture_buffer->channels()[0], capture_buffer->num_frames()));
  }
  // TODO(aluebs): Investigate if the transient suppression placement should be
  // before or after the AGC.
  if (submodules_.transient_suppressor) {
    float voice_probability = submodules_.agc_manager.get()
                                  ? submodules_.agc_manager->voice_probability()
                                  : 1.f;
    submodules_.transient_suppressor->Suppress(
        capture_buffer->channels()[0], capture_buffer->num_frames(),
        capture_buffer->num_channels(),
        capture_buffer->split_bands_const(0)[kBand0To8kHz],
        capture_buffer->num_frames_per_band(),
        capture_.keyboard_info.keyboard_data,
        capture_.keyboard_info.num_keyboard_frames, voice_probability,
        capture_.key_pressed);
  }
  // Experimental APM sub-module that analyzes |capture_buffer|.
  if (submodules_.capture_analyzer) {
    submodules_.capture_analyzer->Analyze(capture_buffer);
  }
  if (submodules_.gain_controller2) {
    submodules_.gain_controller2->NotifyAnalogLevel(
        recommended_stream_analog_level_locked());
    submodules_.gain_controller2->Process(capture_buffer);
  }
  if (submodules_.capture_post_processor) {
    submodules_.capture_post_processor->Process(capture_buffer);
  }
  // The level estimator operates on the recombined data.
  if (config_.level_estimation.enabled) {
    submodules_.output_level_estimator->ProcessStream(*capture_buffer);
    capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
  } else {
    capture_.stats.output_rms_dbfs = absl::nullopt;
  }
  capture_output_rms_.Analyze(rtc::ArrayView<const float>(
      capture_buffer->channels_const()[0],
      capture_nonlocked_.capture_processing_format.num_frames()));
  if (log_rms) {
    RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
                                levels.average, 1, RmsLevel::kMinLevelDb, 64);
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
                                levels.peak, 1, RmsLevel::kMinLevelDb, 64);
  }
  if (submodules_.agc_manager) {
    int level = recommended_stream_analog_level_locked();
    data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
                          &level);
  }
  // Compute echo-related stats.
  if (submodules_.echo_controller) {
    auto ec_metrics = submodules_.echo_controller->GetMetrics();
    capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
@ -1365,13 +1383,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
        ec_metrics.echo_return_loss_enhancement;
    capture_.stats.delay_ms = ec_metrics.delay_ms;
  }
  if (config_.residual_echo_detector.enabled) {
    RTC_DCHECK(submodules_.echo_detector);
    auto ed_metrics = submodules_.echo_detector->GetMetrics();
    capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
    capture_.stats.residual_echo_likelihood_recent_max =
        ed_metrics.echo_likelihood_recent_max;
  }
  // Pass stats for reporting.
  stats_reporter_.UpdateStatistics(capture_.stats);
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@ -421,13 +421,17 @@ class AudioProcessingImpl : public AudioProcessing {
  const struct ApmConstants {
    ApmConstants(bool multi_channel_render_support,
                 bool multi_channel_capture_support,
-                 bool enforce_split_band_hpf)
+                 bool enforce_split_band_hpf,
                 bool minimize_processing_for_unused_output)
        : multi_channel_render_support(multi_channel_render_support),
          multi_channel_capture_support(multi_channel_capture_support),
-          enforce_split_band_hpf(enforce_split_band_hpf) {}
+          enforce_split_band_hpf(enforce_split_band_hpf),
          minimize_processing_for_unused_output(
              minimize_processing_for_unused_output) {}
    bool multi_channel_render_support;
    bool multi_channel_capture_support;
    bool enforce_split_band_hpf;
    bool minimize_processing_for_unused_output;
  } constants_;
  struct ApmCaptureState {