Remove PushResampler<T>::InitializeIfNeeded
This switches from accepting a sample rate and convert to channel size over to accepting the channel size. Instead of InitializeIfNeeded: * Offer a way to explicitly initialize PushResampler via the ctor (needed for VoiceActivityDetectorWrapper) * Implicitly check for the right configuration from within Resample(). (All calls to Resample() were preceded by a call to Initialize) As part of this, refactor VoiceActivityDetectorWrapper (VADW): * VADW is now initialized in the constructor and more const. * Remove VADW::Initialize() and instead reconstruct VADW if needed. Add constants for max sample rate and num channels to audio_util.h In many cases the numbers for these values are embedded in the code which has led to some inconsistency. Bug: chromium:335805780 Change-Id: Iead0d52eb1b261a8d64e93f51401147c8fba32f0 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/353360 Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42587}
This commit is contained in:
parent
5e49544a76
commit
d6ef33e59b
@ -81,9 +81,6 @@ int Resample(const AudioFrame& frame,
|
|||||||
RTC_CHECK_EQ(destination.data().size(),
|
RTC_CHECK_EQ(destination.data().size(),
|
||||||
frame.num_channels_ * target_number_of_samples_per_channel);
|
frame.num_channels_ * target_number_of_samples_per_channel);
|
||||||
|
|
||||||
resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
|
|
||||||
static_cast<int>(frame.num_channels()));
|
|
||||||
|
|
||||||
// TODO(yujo): Add special case handling of muted frames.
|
// TODO(yujo): Add special case handling of muted frames.
|
||||||
return resampler->Resample(frame.data_view(), destination);
|
return resampler->Resample(frame.data_view(), destination);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -61,14 +61,6 @@ void RemixAndResample(InterleavedView<const int16_t> src_data,
|
|||||||
src_data = downmixed;
|
src_data = downmixed;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_,
|
|
||||||
src_data.num_channels()) == -1) {
|
|
||||||
RTC_FATAL() << "InitializeIfNeeded failed: sample_rate_hz = "
|
|
||||||
<< sample_rate_hz << ", dst_frame->sample_rate_hz_ = "
|
|
||||||
<< dst_frame->sample_rate_hz_
|
|
||||||
<< ", num_channels = " << src_data.num_channels();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(yujo): for muted input frames, don't resample. Either 1) allow
|
// TODO(yujo): for muted input frames, don't resample. Either 1) allow
|
||||||
// resampler to return output length without doing the resample, so we know
|
// resampler to return output length without doing the resample, so we know
|
||||||
// how much to zero here; or 2) make resampler accept a hint that the input is
|
// how much to zero here; or 2) make resampler accept a hint that the input is
|
||||||
|
|||||||
@ -25,6 +25,17 @@ namespace webrtc {
|
|||||||
|
|
||||||
typedef std::numeric_limits<int16_t> limits_int16;
|
typedef std::numeric_limits<int16_t> limits_int16;
|
||||||
|
|
||||||
|
// TODO(tommi, peah): Move these constants to their own header, e.g.
|
||||||
|
// `audio_constants.h`. Also consider if they should be in api/.
|
||||||
|
|
||||||
|
// Absolute highest acceptable sample rate supported for audio processing,
|
||||||
|
// capture and codecs. Note that for some components some cases a lower limit
|
||||||
|
// applies which typically is 48000 but in some cases is lower.
|
||||||
|
constexpr int kMaxSampleRateHz = 384000;
|
||||||
|
|
||||||
|
// Number of samples per channel for 10ms of audio at the highest sample rate.
|
||||||
|
constexpr size_t kMaxSamplesPerChannel10ms = kMaxSampleRateHz / 100u;
|
||||||
|
|
||||||
// The conversion functions use the following naming convention:
|
// The conversion functions use the following naming convention:
|
||||||
// S16: int16_t [-32768, 32767]
|
// S16: int16_t [-32768, 32767]
|
||||||
// Float: float [-1.0, 1.0]
|
// Float: float [-1.0, 1.0]
|
||||||
|
|||||||
@ -23,16 +23,13 @@ class PushSincResampler;
|
|||||||
// Wraps PushSincResampler to provide stereo support.
|
// Wraps PushSincResampler to provide stereo support.
|
||||||
// Note: This implementation assumes 10ms buffer sizes throughout.
|
// Note: This implementation assumes 10ms buffer sizes throughout.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class PushResampler {
|
class PushResampler final {
|
||||||
public:
|
public:
|
||||||
PushResampler();
|
PushResampler();
|
||||||
virtual ~PushResampler();
|
PushResampler(size_t src_samples_per_channel,
|
||||||
|
size_t dst_samples_per_channel,
|
||||||
// Must be called whenever the parameters change. Free to be called at any
|
size_t num_channels);
|
||||||
// time as it is a no-op if parameters have not changed since the last call.
|
~PushResampler();
|
||||||
int InitializeIfNeeded(int src_sample_rate_hz,
|
|
||||||
int dst_sample_rate_hz,
|
|
||||||
size_t num_channels);
|
|
||||||
|
|
||||||
// Returns the total number of samples provided in destination (e.g. 32 kHz,
|
// Returns the total number of samples provided in destination (e.g. 32 kHz,
|
||||||
// 2 channel audio gives 640 samples).
|
// 2 channel audio gives 640 samples).
|
||||||
@ -42,6 +39,12 @@ class PushResampler {
|
|||||||
int Resample(MonoView<const T> src, MonoView<T> dst);
|
int Resample(MonoView<const T> src, MonoView<T> dst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Ensures that source and destination buffers for deinterleaving are
|
||||||
|
// correctly configured prior to resampling that requires deinterleaving.
|
||||||
|
void EnsureInitialized(size_t src_samples_per_channel,
|
||||||
|
size_t dst_samples_per_channel,
|
||||||
|
size_t num_channels);
|
||||||
|
|
||||||
// Buffers used for when a deinterleaving step is necessary.
|
// Buffers used for when a deinterleaving step is necessary.
|
||||||
std::unique_ptr<T[]> source_;
|
std::unique_ptr<T[]> source_;
|
||||||
std::unique_ptr<T[]> destination_;
|
std::unique_ptr<T[]> destination_;
|
||||||
|
|||||||
@ -22,55 +22,66 @@
|
|||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Maximum concurrent number of channels for `PushResampler<>`.
|
||||||
|
// Note that this may be different from what the maximum is for audio codecs.
|
||||||
|
constexpr int kMaxNumberOfChannels = 8;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
PushResampler<T>::PushResampler() = default;
|
PushResampler<T>::PushResampler() = default;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
PushResampler<T>::PushResampler(size_t src_samples_per_channel,
|
||||||
|
size_t dst_samples_per_channel,
|
||||||
|
size_t num_channels) {
|
||||||
|
EnsureInitialized(src_samples_per_channel, dst_samples_per_channel,
|
||||||
|
num_channels);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
PushResampler<T>::~PushResampler() = default;
|
PushResampler<T>::~PushResampler() = default;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int PushResampler<T>::InitializeIfNeeded(int src_sample_rate_hz,
|
void PushResampler<T>::EnsureInitialized(size_t src_samples_per_channel,
|
||||||
int dst_sample_rate_hz,
|
size_t dst_samples_per_channel,
|
||||||
size_t num_channels) {
|
size_t num_channels) {
|
||||||
// These checks used to be factored out of this template function due to
|
RTC_DCHECK_GT(src_samples_per_channel, 0);
|
||||||
// Windows debug build issues with clang. http://crbug.com/615050
|
RTC_DCHECK_GT(dst_samples_per_channel, 0);
|
||||||
RTC_CHECK_GT(src_sample_rate_hz, 0);
|
RTC_DCHECK_GT(num_channels, 0);
|
||||||
RTC_CHECK_GT(dst_sample_rate_hz, 0);
|
RTC_DCHECK_LE(src_samples_per_channel, kMaxSamplesPerChannel10ms);
|
||||||
RTC_CHECK_GT(num_channels, 0);
|
RTC_DCHECK_LE(dst_samples_per_channel, kMaxSamplesPerChannel10ms);
|
||||||
|
RTC_DCHECK_LE(num_channels, kMaxNumberOfChannels);
|
||||||
|
|
||||||
const size_t src_size_10ms_mono =
|
if (src_samples_per_channel == SamplesPerChannel(source_view_) &&
|
||||||
static_cast<size_t>(src_sample_rate_hz / 100);
|
dst_samples_per_channel == SamplesPerChannel(destination_view_) &&
|
||||||
const size_t dst_size_10ms_mono =
|
|
||||||
static_cast<size_t>(dst_sample_rate_hz / 100);
|
|
||||||
|
|
||||||
if (src_size_10ms_mono == SamplesPerChannel(source_view_) &&
|
|
||||||
dst_size_10ms_mono == SamplesPerChannel(destination_view_) &&
|
|
||||||
num_channels == NumChannels(source_view_)) {
|
num_channels == NumChannels(source_view_)) {
|
||||||
// No-op if settings haven't changed.
|
// No-op if settings haven't changed.
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate two buffers for all source and destination channels.
|
// Allocate two buffers for all source and destination channels.
|
||||||
// Then organize source and destination views together with an array of
|
// Then organize source and destination views together with an array of
|
||||||
// resamplers for each channel in the deinterlaved buffers.
|
// resamplers for each channel in the deinterlaved buffers.
|
||||||
source_.reset(new T[src_size_10ms_mono * num_channels]);
|
source_.reset(new T[src_samples_per_channel * num_channels]);
|
||||||
destination_.reset(new T[dst_size_10ms_mono * num_channels]);
|
destination_.reset(new T[dst_samples_per_channel * num_channels]);
|
||||||
source_view_ =
|
source_view_ = DeinterleavedView<T>(source_.get(), src_samples_per_channel,
|
||||||
DeinterleavedView<T>(source_.get(), src_size_10ms_mono, num_channels);
|
num_channels);
|
||||||
destination_view_ = DeinterleavedView<T>(destination_.get(),
|
destination_view_ = DeinterleavedView<T>(
|
||||||
dst_size_10ms_mono, num_channels);
|
destination_.get(), dst_samples_per_channel, num_channels);
|
||||||
resamplers_.resize(num_channels);
|
resamplers_.resize(num_channels);
|
||||||
for (size_t i = 0; i < num_channels; ++i) {
|
for (size_t i = 0; i < num_channels; ++i) {
|
||||||
resamplers_[i] = std::make_unique<PushSincResampler>(src_size_10ms_mono,
|
resamplers_[i] = std::make_unique<PushSincResampler>(
|
||||||
dst_size_10ms_mono);
|
src_samples_per_channel, dst_samples_per_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int PushResampler<T>::Resample(InterleavedView<const T> src,
|
int PushResampler<T>::Resample(InterleavedView<const T> src,
|
||||||
InterleavedView<T> dst) {
|
InterleavedView<T> dst) {
|
||||||
|
EnsureInitialized(SamplesPerChannel(src), SamplesPerChannel(dst),
|
||||||
|
NumChannels(src));
|
||||||
|
|
||||||
RTC_DCHECK_EQ(NumChannels(src), NumChannels(source_view_));
|
RTC_DCHECK_EQ(NumChannels(src), NumChannels(source_view_));
|
||||||
RTC_DCHECK_EQ(NumChannels(dst), NumChannels(destination_view_));
|
RTC_DCHECK_EQ(NumChannels(dst), NumChannels(destination_view_));
|
||||||
RTC_DCHECK_EQ(SamplesPerChannel(src), SamplesPerChannel(source_view_));
|
RTC_DCHECK_EQ(SamplesPerChannel(src), SamplesPerChannel(source_view_));
|
||||||
|
|||||||
@ -19,29 +19,24 @@
|
|||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
TEST(PushResamplerTest, VerifiesInputParameters) {
|
TEST(PushResamplerTest, VerifiesInputParameters) {
|
||||||
PushResampler<int16_t> resampler;
|
PushResampler<int16_t> resampler1(160, 160, 1);
|
||||||
EXPECT_EQ(0, resampler.InitializeIfNeeded(16000, 16000, 1));
|
PushResampler<int16_t> resampler2(160, 160, 2);
|
||||||
EXPECT_EQ(0, resampler.InitializeIfNeeded(16000, 16000, 2));
|
PushResampler<int16_t> resampler3(160, 160, 8);
|
||||||
EXPECT_EQ(0, resampler.InitializeIfNeeded(16000, 16000, 8));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||||
TEST(PushResamplerDeathTest, VerifiesBadInputParameters1) {
|
TEST(PushResamplerDeathTest, VerifiesBadInputParameters1) {
|
||||||
PushResampler<int16_t> resampler;
|
RTC_EXPECT_DEATH(PushResampler<int16_t>(-1, 160, 1),
|
||||||
RTC_EXPECT_DEATH(resampler.InitializeIfNeeded(-1, 16000, 1),
|
"src_samples_per_channel");
|
||||||
"src_sample_rate_hz");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PushResamplerDeathTest, VerifiesBadInputParameters2) {
|
TEST(PushResamplerDeathTest, VerifiesBadInputParameters2) {
|
||||||
PushResampler<int16_t> resampler;
|
RTC_EXPECT_DEATH(PushResampler<int16_t>(160, -1, 1),
|
||||||
RTC_EXPECT_DEATH(resampler.InitializeIfNeeded(16000, -1, 1),
|
"dst_samples_per_channel");
|
||||||
"dst_sample_rate_hz");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PushResamplerDeathTest, VerifiesBadInputParameters3) {
|
TEST(PushResamplerDeathTest, VerifiesBadInputParameters3) {
|
||||||
PushResampler<int16_t> resampler;
|
RTC_EXPECT_DEATH(PushResampler<int16_t>(160, 16000, 0), "num_channels");
|
||||||
RTC_EXPECT_DEATH(resampler.InitializeIfNeeded(16000, 16000, 0),
|
|
||||||
"num_channels");
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -44,14 +44,6 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
|
|||||||
return static_cast<int>(dst.samples_per_channel());
|
return static_cast<int>(dst.samples_per_channel());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz,
|
|
||||||
num_audio_channels) != 0) {
|
|
||||||
RTC_LOG(LS_ERROR) << "InitializeIfNeeded(" << in_freq_hz << ", "
|
|
||||||
<< out_freq_hz << ", " << num_audio_channels
|
|
||||||
<< ") failed.";
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int out_length = resampler_.Resample(src, dst);
|
int out_length = resampler_.Resample(src, dst);
|
||||||
if (out_length == -1) {
|
if (out_length == -1) {
|
||||||
RTC_LOG(LS_ERROR) << "Resample(" << in_audio << ", " << src.data().size()
|
RTC_LOG(LS_ERROR) << "Resample(" << in_audio << ", " << src.data().size()
|
||||||
|
|||||||
@ -73,28 +73,20 @@ VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
|
|||||||
int sample_rate_hz)
|
int sample_rate_hz)
|
||||||
: vad_reset_period_frames_(
|
: vad_reset_period_frames_(
|
||||||
rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)),
|
rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)),
|
||||||
|
frame_size_(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond)),
|
||||||
time_to_vad_reset_(vad_reset_period_frames_),
|
time_to_vad_reset_(vad_reset_period_frames_),
|
||||||
vad_(std::move(vad)) {
|
vad_(std::move(vad)),
|
||||||
RTC_DCHECK(vad_);
|
resampled_buffer_(
|
||||||
|
rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond)),
|
||||||
|
resampler_(frame_size_,
|
||||||
|
resampled_buffer_.size(),
|
||||||
|
/*num_channels=*/1) {
|
||||||
RTC_DCHECK_GT(vad_reset_period_frames_, 1);
|
RTC_DCHECK_GT(vad_reset_period_frames_, 1);
|
||||||
resampled_buffer_.resize(
|
vad_->Reset();
|
||||||
rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond));
|
|
||||||
Initialize(sample_rate_hz);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default;
|
VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default;
|
||||||
|
|
||||||
void VoiceActivityDetectorWrapper::Initialize(int sample_rate_hz) {
|
|
||||||
RTC_DCHECK_GT(sample_rate_hz, 0);
|
|
||||||
frame_size_ = rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond);
|
|
||||||
int status =
|
|
||||||
resampler_.InitializeIfNeeded(sample_rate_hz, vad_->SampleRateHz(),
|
|
||||||
/*num_channels=*/1);
|
|
||||||
constexpr int kStatusOk = 0;
|
|
||||||
RTC_DCHECK_EQ(status, kStatusOk);
|
|
||||||
vad_->Reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) {
|
float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) {
|
||||||
// Periodically reset the VAD.
|
// Periodically reset the VAD.
|
||||||
time_to_vad_reset_--;
|
time_to_vad_reset_--;
|
||||||
|
|||||||
@ -60,9 +60,6 @@ class VoiceActivityDetectorWrapper {
|
|||||||
delete;
|
delete;
|
||||||
~VoiceActivityDetectorWrapper();
|
~VoiceActivityDetectorWrapper();
|
||||||
|
|
||||||
// Initializes the VAD wrapper.
|
|
||||||
void Initialize(int sample_rate_hz);
|
|
||||||
|
|
||||||
// Analyzes the first channel of `frame` and returns the speech probability.
|
// Analyzes the first channel of `frame` and returns the speech probability.
|
||||||
// `frame` must be a 10 ms frame with the sample rate specified in the last
|
// `frame` must be a 10 ms frame with the sample rate specified in the last
|
||||||
// `Initialize()` call.
|
// `Initialize()` call.
|
||||||
@ -70,11 +67,11 @@ class VoiceActivityDetectorWrapper {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
const int vad_reset_period_frames_;
|
const int vad_reset_period_frames_;
|
||||||
int frame_size_;
|
const int frame_size_;
|
||||||
int time_to_vad_reset_;
|
int time_to_vad_reset_;
|
||||||
PushResampler<float> resampler_;
|
|
||||||
std::unique_ptr<MonoVad> vad_;
|
std::unique_ptr<MonoVad> vad_;
|
||||||
std::vector<float> resampled_buffer_;
|
std::vector<float> resampled_buffer_;
|
||||||
|
PushResampler<float> resampler_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|||||||
@ -50,7 +50,7 @@ class MockVad : public VoiceActivityDetectorWrapper::MonoVad {
|
|||||||
TEST(GainController2VoiceActivityDetectorWrapper, CtorAndInitReadSampleRate) {
|
TEST(GainController2VoiceActivityDetectorWrapper, CtorAndInitReadSampleRate) {
|
||||||
auto vad = std::make_unique<MockVad>();
|
auto vad = std::make_unique<MockVad>();
|
||||||
EXPECT_CALL(*vad, SampleRateHz)
|
EXPECT_CALL(*vad, SampleRateHz)
|
||||||
.Times(2)
|
.Times(1)
|
||||||
.WillRepeatedly(Return(kSampleRate8kHz));
|
.WillRepeatedly(Return(kSampleRate8kHz));
|
||||||
EXPECT_CALL(*vad, Reset).Times(AnyNumber());
|
EXPECT_CALL(*vad, Reset).Times(AnyNumber());
|
||||||
auto vad_wrapper = std::make_unique<VoiceActivityDetectorWrapper>(
|
auto vad_wrapper = std::make_unique<VoiceActivityDetectorWrapper>(
|
||||||
|
|||||||
@ -15,7 +15,6 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#include "common_audio/channel_buffer.h"
|
#include "common_audio/channel_buffer.h"
|
||||||
#include "common_audio/include/audio_util.h"
|
|
||||||
#include "common_audio/resampler/push_sinc_resampler.h"
|
#include "common_audio/resampler/push_sinc_resampler.h"
|
||||||
#include "modules/audio_processing/splitting_filter.h"
|
#include "modules/audio_processing/splitting_filter.h"
|
||||||
#include "rtc_base/checks.h"
|
#include "rtc_base/checks.h"
|
||||||
@ -25,7 +24,6 @@ namespace {
|
|||||||
|
|
||||||
constexpr size_t kSamplesPer32kHzChannel = 320;
|
constexpr size_t kSamplesPer32kHzChannel = 320;
|
||||||
constexpr size_t kSamplesPer48kHzChannel = 480;
|
constexpr size_t kSamplesPer48kHzChannel = 480;
|
||||||
constexpr size_t kMaxSamplesPerChannel = AudioBuffer::kMaxSampleRate / 100;
|
|
||||||
|
|
||||||
size_t NumBandsFromFramesPerChannel(size_t num_frames) {
|
size_t NumBandsFromFramesPerChannel(size_t num_frames) {
|
||||||
if (num_frames == kSamplesPer32kHzChannel) {
|
if (num_frames == kSamplesPer32kHzChannel) {
|
||||||
@ -110,9 +108,9 @@ void AudioBuffer::CopyFrom(const float* const* stacked_data,
|
|||||||
const bool resampling_needed = input_num_frames_ != buffer_num_frames_;
|
const bool resampling_needed = input_num_frames_ != buffer_num_frames_;
|
||||||
|
|
||||||
if (downmix_needed) {
|
if (downmix_needed) {
|
||||||
RTC_DCHECK_GE(kMaxSamplesPerChannel, input_num_frames_);
|
RTC_DCHECK_GE(kMaxSamplesPerChannel10ms, input_num_frames_);
|
||||||
|
|
||||||
std::array<float, kMaxSamplesPerChannel> downmix;
|
std::array<float, kMaxSamplesPerChannel10ms> downmix;
|
||||||
if (downmix_by_averaging_) {
|
if (downmix_by_averaging_) {
|
||||||
const float kOneByNumChannels = 1.f / input_num_channels_;
|
const float kOneByNumChannels = 1.f / input_num_channels_;
|
||||||
for (size_t i = 0; i < input_num_frames_; ++i) {
|
for (size_t i = 0; i < input_num_frames_; ++i) {
|
||||||
@ -230,7 +228,7 @@ void AudioBuffer::CopyFrom(const int16_t* const interleaved_data,
|
|||||||
if (num_channels_ == 1) {
|
if (num_channels_ == 1) {
|
||||||
if (input_num_channels_ == 1) {
|
if (input_num_channels_ == 1) {
|
||||||
if (resampling_required) {
|
if (resampling_required) {
|
||||||
std::array<float, kMaxSamplesPerChannel> float_buffer;
|
std::array<float, kMaxSamplesPerChannel10ms> float_buffer;
|
||||||
S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data());
|
S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data());
|
||||||
input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_,
|
input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_,
|
||||||
data_->channels()[0],
|
data_->channels()[0],
|
||||||
@ -239,7 +237,7 @@ void AudioBuffer::CopyFrom(const int16_t* const interleaved_data,
|
|||||||
S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]);
|
S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::array<float, kMaxSamplesPerChannel> float_buffer;
|
std::array<float, kMaxSamplesPerChannel10ms> float_buffer;
|
||||||
float* downmixed_data =
|
float* downmixed_data =
|
||||||
resampling_required ? float_buffer.data() : data_->channels()[0];
|
resampling_required ? float_buffer.data() : data_->channels()[0];
|
||||||
if (downmix_by_averaging_) {
|
if (downmix_by_averaging_) {
|
||||||
@ -274,7 +272,7 @@ void AudioBuffer::CopyFrom(const int16_t* const interleaved_data,
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (resampling_required) {
|
if (resampling_required) {
|
||||||
std::array<float, kMaxSamplesPerChannel> float_buffer;
|
std::array<float, kMaxSamplesPerChannel10ms> float_buffer;
|
||||||
for (size_t i = 0; i < num_channels_; ++i) {
|
for (size_t i = 0; i < num_channels_; ++i) {
|
||||||
deinterleave_channel(i, num_channels_, input_num_frames_, interleaved,
|
deinterleave_channel(i, num_channels_, input_num_frames_, interleaved,
|
||||||
float_buffer.data());
|
float_buffer.data());
|
||||||
@ -302,7 +300,7 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config,
|
|||||||
|
|
||||||
int16_t* interleaved = interleaved_data;
|
int16_t* interleaved = interleaved_data;
|
||||||
if (num_channels_ == 1) {
|
if (num_channels_ == 1) {
|
||||||
std::array<float, kMaxSamplesPerChannel> float_buffer;
|
std::array<float, kMaxSamplesPerChannel10ms> float_buffer;
|
||||||
|
|
||||||
if (resampling_required) {
|
if (resampling_required) {
|
||||||
output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_,
|
output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_,
|
||||||
@ -335,7 +333,7 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config,
|
|||||||
|
|
||||||
if (resampling_required) {
|
if (resampling_required) {
|
||||||
for (size_t i = 0; i < num_channels_; ++i) {
|
for (size_t i = 0; i < num_channels_; ++i) {
|
||||||
std::array<float, kMaxSamplesPerChannel> float_buffer;
|
std::array<float, kMaxSamplesPerChannel10ms> float_buffer;
|
||||||
output_resamplers_[i]->Resample(data_->channels()[i],
|
output_resamplers_[i]->Resample(data_->channels()[i],
|
||||||
buffer_num_frames_, float_buffer.data(),
|
buffer_num_frames_, float_buffer.data(),
|
||||||
output_num_frames_);
|
output_num_frames_);
|
||||||
|
|||||||
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "api/audio/audio_processing.h"
|
#include "api/audio/audio_processing.h"
|
||||||
#include "common_audio/channel_buffer.h"
|
#include "common_audio/channel_buffer.h"
|
||||||
|
#include "common_audio/include/audio_util.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
@ -32,7 +33,8 @@ enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 };
|
|||||||
class AudioBuffer {
|
class AudioBuffer {
|
||||||
public:
|
public:
|
||||||
static const int kSplitBandSize = 160;
|
static const int kSplitBandSize = 160;
|
||||||
static const int kMaxSampleRate = 384000;
|
// TODO(tommi): Remove this (`AudioBuffer::kMaxSampleRate`) constant.
|
||||||
|
static const int kMaxSampleRate = webrtc::kMaxSampleRateHz;
|
||||||
AudioBuffer(size_t input_rate,
|
AudioBuffer(size_t input_rate,
|
||||||
size_t input_num_channels,
|
size_t input_num_channels,
|
||||||
size_t buffer_rate,
|
size_t buffer_rate,
|
||||||
|
|||||||
@ -2157,17 +2157,11 @@ void AudioProcessingImpl::InitializeVoiceActivityDetector() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!submodules_.voice_activity_detector) {
|
// TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
|
||||||
RTC_DCHECK(!!submodules_.gain_controller2);
|
submodules_.voice_activity_detector =
|
||||||
// TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
|
std::make_unique<VoiceActivityDetectorWrapper>(
|
||||||
submodules_.voice_activity_detector =
|
submodules_.gain_controller2->GetCpuFeatures(),
|
||||||
std::make_unique<VoiceActivityDetectorWrapper>(
|
proc_fullband_sample_rate_hz());
|
||||||
submodules_.gain_controller2->GetCpuFeatures(),
|
|
||||||
proc_fullband_sample_rate_hz());
|
|
||||||
} else {
|
|
||||||
submodules_.voice_activity_detector->Initialize(
|
|
||||||
proc_fullband_sample_rate_hz());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioProcessingImpl::InitializeNoiseSuppressor() {
|
void AudioProcessingImpl::InitializeNoiseSuppressor() {
|
||||||
|
|||||||
@ -2173,8 +2173,8 @@ TEST_P(AudioProcessingTest, Formats) {
|
|||||||
// don't match.
|
// don't match.
|
||||||
std::unique_ptr<float[]> cmp_data(new float[ref_length]);
|
std::unique_ptr<float[]> cmp_data(new float[ref_length]);
|
||||||
|
|
||||||
PushResampler<float> resampler;
|
PushResampler<float> resampler(out_samples_per_channel,
|
||||||
resampler.InitializeIfNeeded(out_rate, ref_rate, out_num);
|
ref_samples_per_channel, out_num);
|
||||||
|
|
||||||
// Compute the resampling delay of the output relative to the reference,
|
// Compute the resampling delay of the output relative to the reference,
|
||||||
// to find the region over which we should search for the best SNR.
|
// to find the region over which we should search for the best SNR.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user