diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index f9e2895b31..73b2beb568 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -49,7 +49,7 @@ rtc_library("adaptive_digital_gain_controller") { ":common", ":gain_applier", "..:apm_logging", - "..:audio_frame_view", + "../../../api/audio:audio_frame_api", "../../../api/audio:audio_processing", "../../../common_audio", "../../../rtc_base:checks", @@ -174,7 +174,7 @@ rtc_library("gain_applier") { deps = [ ":common", "..:audio_frame_view", - "../../../api:array_view", + "../../../api/audio:audio_frame_api", "../../../rtc_base:safe_minmax", ] } @@ -232,8 +232,7 @@ rtc_library("noise_level_estimator") { deps = [ ":biquad_filter", "..:apm_logging", - "..:audio_frame_view", - "../../../api:array_view", + "../../../api/audio:audio_frame_api", "../../../rtc_base:checks", "../../../system_wrappers", ] @@ -266,8 +265,7 @@ rtc_library("vad_wrapper") { deps = [ ":common", ":cpu_features", - "..:audio_frame_view", - "../../../api:array_view", + "../../../api/audio:audio_frame_api", "../../../common_audio", "../../../rtc_base:checks", "rnn_vad", @@ -335,7 +333,7 @@ rtc_library("gain_applier_unittest") { deps = [ ":gain_applier", ":test_utils", - "..:audio_frame_view", + "../../../api/audio:audio_frame_api", "../../../rtc_base:gunit_helpers", "../../../test:test_support", ] @@ -435,9 +433,8 @@ rtc_library("noise_estimator_unittests") { ":noise_level_estimator", ":test_utils", "..:apm_logging", - "..:audio_frame_view", - "../../../api:array_view", "../../../api:function_view", + "../../../api/audio:audio_frame_api", "../../../rtc_base:checks", "../../../rtc_base:gunit_helpers", ] @@ -449,7 +446,7 @@ rtc_library("vad_wrapper_unittests") { deps = [ ":common", ":vad_wrapper", - "..:audio_frame_view", + "../../../api/audio:audio_frame_api", "../../../rtc_base:checks", "../../../rtc_base:gunit_helpers", "../../../rtc_base:safe_compare", diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc index e8edab602c..5f924cbbcf 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -124,7 +124,7 @@ AdaptiveDigitalGainController::AdaptiveDigitalGainController( } void AdaptiveDigitalGainController::Process(const FrameInfo& info, - AudioFrameView frame) { + DeinterleavedView frame) { RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); RTC_DCHECK_GE(frame.num_channels(), 1); RTC_DCHECK( diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h index 9ae74a2dc8..d464dc6b2c 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -14,8 +14,8 @@ #include #include "api/audio/audio_processing.h" +#include "api/audio/audio_view.h" #include "modules/audio_processing/agc2/gain_applier.h" -#include "modules/audio_processing/include/audio_frame_view.h" namespace webrtc { @@ -46,7 +46,7 @@ class AdaptiveDigitalGainController { // Analyzes `info`, updates the digital gain and applies it to a 10 ms // `frame`. Supports any sample rate supported by APM. - void Process(const FrameInfo& info, AudioFrameView frame); + void Process(const FrameInfo& info, DeinterleavedView frame); private: ApmDataDumper* const apm_data_dumper_; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc index 88fb792f2d..39e175d403 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc @@ -83,7 +83,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig), - fake_audio.float_frame_view()); + fake_audio.view()); } // Checks that the maximum allowed gain is applied. @@ -103,7 +103,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) { float applied_gain; for (int i = 0; i < kNumFramesToAdapt; ++i) { VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); - helper.gain_applier->Process(info, fake_audio.float_frame_view()); + helper.gain_applier->Process(info, fake_audio.view()); applied_gain = fake_audio.float_frame_view().channel(0)[0]; } const float applied_gain_db = 20.0f * std::log10f(applied_gain); @@ -129,8 +129,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { AdaptiveDigitalGainController::FrameInfo info = GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = initial_level_dbfs; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); - float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + helper.gain_applier->Process(info, fake_audio.view()); + float current_gain_linear = fake_audio.view()[0][0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), max_change_per_frame_linear); last_gain_linear = current_gain_linear; @@ -143,8 +143,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { AdaptiveDigitalGainController::FrameInfo info = GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = 0.f; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); - float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + helper.gain_applier->Process(info, fake_audio.view()); + float current_gain_linear = fake_audio.view()[0][0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), max_change_per_frame_linear); last_gain_linear = current_gain_linear; @@ -160,10 +160,10 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) { AdaptiveDigitalGainController::FrameInfo info = GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = initial_level_dbfs; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); + helper.gain_applier->Process(info, fake_audio.view()); float maximal_difference = 0.0f; float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db); - for (const auto& x : fake_audio.float_frame_view().channel(0)) { + for (const auto& x : fake_audio.view()[0]) { const float difference = std::abs(x - current_value); maximal_difference = std::max(maximal_difference, difference); current_value = x; @@ -195,13 +195,13 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) { GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = initial_level_dbfs; info.noise_rms_dbfs = kWithNoiseDbfs; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); + auto fake_view = fake_audio.view(); + helper.gain_applier->Process(info, fake_view); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { const float maximal_ratio = - *std::max_element(fake_audio.float_frame_view().channel(0).begin(), - fake_audio.float_frame_view().channel(0).end()); + *std::max_element(fake_view[0].begin(), fake_view[0].end()); EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); } @@ -217,7 +217,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, AdaptiveDigitalGainController::FrameInfo info = GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = 5.0f; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); + helper.gain_applier->Process(info, fake_audio.view()); } TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) { @@ -239,13 +239,13 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) { info.speech_level_dbfs = initial_level_dbfs; info.limiter_envelope_dbfs = 1.0f; info.speech_level_reliable = false; - helper.gain_applier->Process(info, fake_audio.float_frame_view()); + auto fake_view = fake_audio.view(); + helper.gain_applier->Process(info, fake_view); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { const float maximal_ratio = - *std::max_element(fake_audio.float_frame_view().channel(0).begin(), - fake_audio.float_frame_view().channel(0).end()); + *std::max_element(fake_view[0].begin(), fake_view[0].end()); EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); } @@ -271,8 +271,8 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest, for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { SCOPED_TRACE(i); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); - helper.gain_applier->Process(info, audio.float_frame_view()); - const float gain = audio.float_frame_view().channel(0)[0]; + helper.gain_applier->Process(info, audio.view()); + const float gain = audio.view()[0][0]; if (i > 0) { EXPECT_EQ(prev_gain, gain); // No gain increase applied. } @@ -293,16 +293,16 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest, for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { SCOPED_TRACE(i); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); - helper.gain_applier->Process(info, audio.float_frame_view()); - prev_gain = audio.float_frame_view().channel(0)[0]; + helper.gain_applier->Process(info, audio.view()); + prev_gain = audio.view()[0][0]; } // Process one more speech frame. VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); - helper.gain_applier->Process(info, audio.float_frame_view()); + helper.gain_applier->Process(info, audio.view()); // An increased gain has been applied. - EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); + EXPECT_GT(audio.view()[0][0], prev_gain); } INSTANTIATE_TEST_SUITE_P( diff --git a/modules/audio_processing/agc2/gain_applier.cc b/modules/audio_processing/agc2/gain_applier.cc index f9e276d3a8..f833ad1fbe 100644 --- a/modules/audio_processing/agc2/gain_applier.cc +++ b/modules/audio_processing/agc2/gain_applier.cc @@ -10,7 +10,7 @@ #include "modules/audio_processing/agc2/gain_applier.h" -#include "api/array_view.h" +#include "api/audio/audio_view.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "rtc_base/numerics/safe_minmax.h" @@ -24,9 +24,9 @@ bool GainCloseToOne(float gain_factor) { gain_factor <= 1.f + 1.f / kMaxFloatS16Value; } -void ClipSignal(AudioFrameView signal) { - for (int k = 0; k < signal.num_channels(); ++k) { - rtc::ArrayView channel_view = signal.channel(k); +void ClipSignal(DeinterleavedView signal) { + for (size_t k = 0; k < signal.num_channels(); ++k) { + MonoView channel_view = signal[k]; for (auto& sample : channel_view) { sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); } @@ -36,7 +36,7 @@ void ClipSignal(AudioFrameView signal) { void ApplyGainWithRamping(float last_gain_linear, float gain_at_end_of_frame_linear, float inverse_samples_per_channel, - AudioFrameView float_frame) { + DeinterleavedView float_frame) { // Do not modify the signal. if (last_gain_linear == gain_at_end_of_frame_linear && GainCloseToOne(gain_at_end_of_frame_linear)) { @@ -45,8 +45,8 @@ void ApplyGainWithRamping(float last_gain_linear, // Gain is constant and different from 1. if (last_gain_linear == gain_at_end_of_frame_linear) { - for (int k = 0; k < float_frame.num_channels(); ++k) { - rtc::ArrayView channel_view = float_frame.channel(k); + for (size_t k = 0; k < float_frame.num_channels(); ++k) { + MonoView channel_view = float_frame[k]; for (auto& sample : channel_view) { sample *= gain_at_end_of_frame_linear; } @@ -57,12 +57,12 @@ void ApplyGainWithRamping(float last_gain_linear, // The gain changes. We have to change slowly to avoid discontinuities. const float increment = (gain_at_end_of_frame_linear - last_gain_linear) * inverse_samples_per_channel; - float gain = last_gain_linear; - for (int i = 0; i < float_frame.samples_per_channel(); ++i) { - for (int ch = 0; ch < float_frame.num_channels(); ++ch) { - float_frame.channel(ch)[i] *= gain; + for (size_t ch = 0; ch < float_frame.num_channels(); ++ch) { + float gain = last_gain_linear; + for (float& sample : float_frame[ch]) { + sample *= gain; + gain += increment; } - gain += increment; } } @@ -73,7 +73,7 @@ GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor) last_gain_factor_(initial_gain_factor), current_gain_factor_(initial_gain_factor) {} -void GainApplier::ApplyGain(AudioFrameView signal) { +void GainApplier::ApplyGain(DeinterleavedView signal) { if (static_cast(signal.samples_per_channel()) != samples_per_channel_) { Initialize(signal.samples_per_channel()); } diff --git a/modules/audio_processing/agc2/gain_applier.h b/modules/audio_processing/agc2/gain_applier.h index ba8a4a4cd2..82ae82eeef 100644 --- a/modules/audio_processing/agc2/gain_applier.h +++ b/modules/audio_processing/agc2/gain_applier.h @@ -13,6 +13,7 @@ #include +#include "api/audio/audio_view.h" #include "modules/audio_processing/include/audio_frame_view.h" namespace webrtc { @@ -20,10 +21,15 @@ class GainApplier { public: GainApplier(bool hard_clip_samples, float initial_gain_factor); - void ApplyGain(AudioFrameView signal); + void ApplyGain(DeinterleavedView signal); void SetGainFactor(float gain_factor); float GetGainFactor() const { return current_gain_factor_; } + [[deprecated("Use DeinterleavedView<> version")]] void ApplyGain( + AudioFrameView signal) { + ApplyGain(signal.view()); + } + private: void Initialize(int samples_per_channel); diff --git a/modules/audio_processing/agc2/gain_applier_unittest.cc b/modules/audio_processing/agc2/gain_applier_unittest.cc index 3296345e62..7548faa61b 100644 --- a/modules/audio_processing/agc2/gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/gain_applier_unittest.cc @@ -15,6 +15,7 @@ #include #include +#include "api/audio/audio_view.h" #include "modules/audio_processing/agc2/vector_float_frame.h" #include "rtc_base/gunit.h" @@ -25,9 +26,9 @@ TEST(AutomaticGainController2GainApplier, InitialGainIsRespected) { VectorFloatFrame fake_audio(1, 1, initial_signal_level); GainApplier gain_applier(true, gain_factor); - gain_applier.ApplyGain(fake_audio.float_frame_view()); - EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], - initial_signal_level * gain_factor, 0.1f); + auto fake_view = fake_audio.view(); + gain_applier.ApplyGain(fake_audio.view()); + EXPECT_NEAR(fake_view[0][0], initial_signal_level * gain_factor, 0.1f); } TEST(AutomaticGainController2GainApplier, ClippingIsDone) { @@ -36,9 +37,9 @@ TEST(AutomaticGainController2GainApplier, ClippingIsDone) { VectorFloatFrame fake_audio(1, 1, initial_signal_level); GainApplier gain_applier(true, gain_factor); - gain_applier.ApplyGain(fake_audio.float_frame_view()); - EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], - std::numeric_limits::max(), 0.1f); + gain_applier.ApplyGain(fake_audio.view()); + EXPECT_NEAR(fake_audio.view()[0][0], std::numeric_limits::max(), + 0.1f); } TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) { @@ -47,10 +48,10 @@ TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) { VectorFloatFrame fake_audio(1, 1, initial_signal_level); GainApplier gain_applier(false, gain_factor); - gain_applier.ApplyGain(fake_audio.float_frame_view()); + gain_applier.ApplyGain(fake_audio.view()); - EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], - initial_signal_level * gain_factor, 0.1f); + EXPECT_NEAR(fake_audio.view()[0][0], initial_signal_level * gain_factor, + 0.1f); } TEST(AutomaticGainController2GainApplier, RampingIsDone) { @@ -64,13 +65,13 @@ TEST(AutomaticGainController2GainApplier, RampingIsDone) { GainApplier gain_applier(false, initial_gain_factor); gain_applier.SetGainFactor(target_gain_factor); - gain_applier.ApplyGain(fake_audio.float_frame_view()); + gain_applier.ApplyGain(fake_audio.view()); // The maximal gain change should be close to that in linear interpolation. for (size_t channel = 0; channel < num_channels; ++channel) { float max_signal_change = 0.f; float last_signal_level = initial_signal_level; - for (const auto sample : fake_audio.float_frame_view().channel(channel)) { + for (const auto sample : fake_audio.view()[channel]) { const float current_change = fabs(last_signal_level - sample); max_signal_change = std::max(max_signal_change, current_change); last_signal_level = sample; @@ -84,10 +85,10 @@ TEST(AutomaticGainController2GainApplier, RampingIsDone) { // Next frame should have the desired level. VectorFloatFrame next_fake_audio_frame(num_channels, samples_per_channel, initial_signal_level); - gain_applier.ApplyGain(next_fake_audio_frame.float_frame_view()); + gain_applier.ApplyGain(next_fake_audio_frame.view()); // The last sample should have the new gain. - EXPECT_NEAR(next_fake_audio_frame.float_frame_view().channel(0)[0], + EXPECT_NEAR(next_fake_audio_frame.view()[0][0], initial_signal_level * target_gain_factor, 0.1f); } } // namespace webrtc diff --git a/modules/audio_processing/agc2/noise_level_estimator.cc b/modules/audio_processing/agc2/noise_level_estimator.cc index 691513b509..c43738aad3 100644 --- a/modules/audio_processing/agc2/noise_level_estimator.cc +++ b/modules/audio_processing/agc2/noise_level_estimator.cc @@ -16,7 +16,7 @@ #include #include -#include "api/array_view.h" +#include "api/audio/audio_view.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" @@ -25,11 +25,12 @@ namespace { constexpr int kFramesPerSecond = 100; -float FrameEnergy(const AudioFrameView& audio) { +float FrameEnergy(DeinterleavedView audio) { float energy = 0.0f; - for (int k = 0; k < audio.num_channels(); ++k) { + for (size_t k = 0; k < audio.num_channels(); ++k) { + MonoView ch = audio[k]; float channel_energy = - std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f, + std::accumulate(ch.begin(), ch.end(), 0.0f, [](float a, float b) -> float { return a + b * b; }); energy = std::max(channel_energy, energy); } @@ -81,7 +82,7 @@ class NoiseFloorEstimator : public NoiseLevelEstimator { NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete; ~NoiseFloorEstimator() = default; - float Analyze(const AudioFrameView& frame) override { + float Analyze(DeinterleavedView frame) override { // Detect sample rate changes. const int sample_rate_hz = static_cast(frame.samples_per_channel() * kFramesPerSecond); diff --git a/modules/audio_processing/agc2/noise_level_estimator.h b/modules/audio_processing/agc2/noise_level_estimator.h index 9f3b957486..8df4cbc93d 100644 --- a/modules/audio_processing/agc2/noise_level_estimator.h +++ b/modules/audio_processing/agc2/noise_level_estimator.h @@ -13,7 +13,7 @@ #include -#include "modules/audio_processing/include/audio_frame_view.h" +#include "api/audio/audio_view.h" namespace webrtc { class ApmDataDumper; @@ -24,7 +24,7 @@ class NoiseLevelEstimator { virtual ~NoiseLevelEstimator() = default; // Analyzes a 10 ms `frame`, updates the noise level estimation and returns // the value for the latter in dBFS. - virtual float Analyze(const AudioFrameView& frame) = 0; + virtual float Analyze(DeinterleavedView frame) = 0; }; // Creates a noise level estimator based on noise floor detection. diff --git a/modules/audio_processing/agc2/noise_level_estimator_unittest.cc b/modules/audio_processing/agc2/noise_level_estimator_unittest.cc index 8168c5a229..9d42bfc0fb 100644 --- a/modules/audio_processing/agc2/noise_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/noise_level_estimator_unittest.cc @@ -15,6 +15,7 @@ #include #include +#include "api/audio/audio_view.h" #include "api/function_view.h" #include "modules/audio_processing/agc2/agc2_testing_common.h" #include "modules/audio_processing/agc2/vector_float_frame.h" @@ -36,13 +37,13 @@ float RunEstimator(rtc::FunctionView sample_generator, rtc::CheckedDivExact(sample_rate_hz, kFramesPerSecond); VectorFloatFrame signal(1, samples_per_channel, 0.0f); for (int i = 0; i < kNumIterations; ++i) { - AudioFrameView frame_view = signal.float_frame_view(); + DeinterleavedView frame_view = signal.view(); for (int j = 0; j < samples_per_channel; ++j) { - frame_view.channel(0)[j] = sample_generator(); + frame_view[0][j] = sample_generator(); } estimator.Analyze(frame_view); } - return estimator.Analyze(signal.float_frame_view()); + return estimator.Analyze(signal.view()); } class NoiseEstimatorParametrization : public ::testing::TestWithParam { diff --git a/modules/audio_processing/agc2/vad_wrapper.cc b/modules/audio_processing/agc2/vad_wrapper.cc index 3bafdc7c56..8de8abd315 100644 --- a/modules/audio_processing/agc2/vad_wrapper.cc +++ b/modules/audio_processing/agc2/vad_wrapper.cc @@ -13,7 +13,6 @@ #include #include -#include "api/array_view.h" #include "common_audio/resampler/include/push_resampler.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/rnn_vad/common.h" @@ -36,7 +35,7 @@ class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad { int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; } void Reset() override { rnn_vad_.Reset(); } - float Analyze(rtc::ArrayView frame) override { + float Analyze(MonoView frame) override { RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz); std::array feature_vector; const bool is_silence = features_extractor_.CheckSilenceComputeFeatures( @@ -87,7 +86,8 @@ VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default; -float VoiceActivityDetectorWrapper::Analyze(AudioFrameView frame) { +float VoiceActivityDetectorWrapper::Analyze( + DeinterleavedView frame) { // Periodically reset the VAD. time_to_vad_reset_--; if (time_to_vad_reset_ <= 0) { @@ -98,7 +98,7 @@ float VoiceActivityDetectorWrapper::Analyze(AudioFrameView frame) { // Resample the first channel of `frame`. RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_); MonoView dst(resampled_buffer_.data(), resampled_buffer_.size()); - resampler_.Resample(frame.channel(0), dst); + resampler_.Resample(frame[0], dst); return vad_->Analyze(resampled_buffer_); } diff --git a/modules/audio_processing/agc2/vad_wrapper.h b/modules/audio_processing/agc2/vad_wrapper.h index 8751dbaf75..025a48ef22 100644 --- a/modules/audio_processing/agc2/vad_wrapper.h +++ b/modules/audio_processing/agc2/vad_wrapper.h @@ -14,10 +14,9 @@ #include #include -#include "api/array_view.h" +#include "api/audio/audio_view.h" #include "common_audio/resampler/include/push_resampler.h" #include "modules/audio_processing/agc2/cpu_features.h" -#include "modules/audio_processing/include/audio_frame_view.h" namespace webrtc { @@ -37,7 +36,7 @@ class VoiceActivityDetectorWrapper { // Resets the internal state. virtual void Reset() = 0; // Analyzes an audio frame and returns the speech probability. - virtual float Analyze(rtc::ArrayView frame) = 0; + virtual float Analyze(MonoView frame) = 0; }; // Ctor. Uses `cpu_features` to instantiate the default VAD. @@ -63,7 +62,7 @@ class VoiceActivityDetectorWrapper { // Analyzes the first channel of `frame` and returns the speech probability. // `frame` must be a 10 ms frame with the sample rate specified in the last // `Initialize()` call. - float Analyze(AudioFrameView frame); + float Analyze(DeinterleavedView frame); private: const int vad_reset_period_frames_; diff --git a/modules/audio_processing/agc2/vad_wrapper_unittest.cc b/modules/audio_processing/agc2/vad_wrapper_unittest.cc index 6f66560060..9d8761d23e 100644 --- a/modules/audio_processing/agc2/vad_wrapper_unittest.cc +++ b/modules/audio_processing/agc2/vad_wrapper_unittest.cc @@ -16,8 +16,8 @@ #include #include +#include "api/audio/audio_view.h" #include "modules/audio_processing/agc2/agc2_common.h" -#include "modules/audio_processing/include/audio_frame_view.h" #include "rtc_base/checks.h" #include "rtc_base/gunit.h" #include "rtc_base/numerics/safe_compare.h" @@ -85,11 +85,9 @@ struct FrameWithView { explicit FrameWithView(int sample_rate_hz) : samples(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond), 0.0f), - channel0(samples.data()), - view(&channel0, /*num_channels=*/1, samples.size()) {} + view(samples.data(), samples.size(), /*num_channels=*/1) {} std::vector samples; - const float* const channel0; - const AudioFrameView view; + const DeinterleavedView view; }; // Checks that the expected speech probabilities are returned. diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index f0625c5dc2..2fb021c7c3 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1473,8 +1473,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { absl::optional voice_probability; if (!!submodules_.voice_activity_detector) { - voice_probability = submodules_.voice_activity_detector->Analyze( - AudioFrameView(capture_buffer->view())); + voice_probability = + submodules_.voice_activity_detector->Analyze(capture_buffer->view()); } if (submodules_.transient_suppressor) { diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 9b19bc8517..9bfae527d7 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -64,11 +64,11 @@ struct SpeechLevel { }; // Computes the audio levels for the first channel in `frame`. -AudioLevels ComputeAudioLevels(AudioFrameView frame, +AudioLevels ComputeAudioLevels(DeinterleavedView frame, ApmDataDumper& data_dumper) { float peak = 0.0f; float rms = 0.0f; - for (const auto& x : frame.channel(0)) { + for (const auto& x : frame[0]) { peak = std::max(std::fabs(x), peak); rms += x * x; } @@ -182,8 +182,8 @@ void GainController2::Process(absl::optional speech_probability, saturation_protector_->Reset(); } - AudioFrameView float_frame(audio->channels(), audio->num_channels(), - audio->num_frames()); + DeinterleavedView float_frame = audio->view(); + // Compute speech probability. if (vad_) { // When the VAD component runs, `speech_probability` should not be specified @@ -258,7 +258,7 @@ void GainController2::Process(absl::optional speech_probability, // computation in `limiter_`. fixed_gain_applier_.ApplyGain(float_frame); - limiter_.Process(float_frame.view()); + limiter_.Process(float_frame); // Periodically log limiter stats. if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) { diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index 71642dc38c..bccab8d060 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -16,7 +16,6 @@ #include #include -#include "api/array_view.h" #include "modules/audio_processing/agc2/agc2_testing_common.h" #include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/test/audio_buffer_tools.h" @@ -596,9 +595,7 @@ TEST(GainController2, agc2_reference.Process(absl::nullopt, /*input_volume_changed=*/false, &audio_buffer_reference); test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); - float speech_probability = vad.Analyze(AudioFrameView( - audio_buffer.channels(), audio_buffer.num_channels(), - audio_buffer.num_frames())); + float speech_probability = vad.Analyze(audio_buffer.view()); agc2.Process(speech_probability, /*input_volume_changed=*/false, &audio_buffer); // Check the output buffer.