Remove all AudioBuffer code that is not related to storing audio data

This CL moves/removes all code from the AudioBuffer that:
-Is not directly handling audio data (e.g., keytaps, VAD descisions).
-Is caching aggregated versions of the rest of the audio data.
-Is not used (or only used in testing)

Bug: webrtc:10882
Change-Id: I737deb3f692748eff30f46ad806b2c6f6292802c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149072
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28866}
This commit is contained in:
Per Åhgren 2019-08-15 12:15:46 +02:00 committed by Commit Bot
parent 6e4791fe49
commit a1351271e6
10 changed files with 125 additions and 224 deletions

View File

@ -27,15 +27,6 @@ const size_t kSamplesPer16kHzChannel = 160;
const size_t kSamplesPer32kHzChannel = 320;
const size_t kSamplesPer48kHzChannel = 480;
int KeyboardChannelIndex(const StreamConfig& stream_config) {
if (!stream_config.has_keyboard()) {
RTC_NOTREACHED();
return 0;
}
return stream_config.num_channels();
}
size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
size_t num_bands = 1;
if (num_frames == kSamplesPer32kHzChannel ||
@ -60,10 +51,6 @@ AudioBuffer::AudioBuffer(size_t input_num_frames,
num_channels_(num_process_channels),
num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
mixed_low_pass_valid_(false),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
keyboard_data_(NULL),
data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)),
output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) {
RTC_DCHECK_GT(input_num_frames_, 0);
@ -118,10 +105,6 @@ void AudioBuffer::CopyFrom(const float* const* data,
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
if (stream_config.has_keyboard()) {
keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
}
// Downmix.
const float* const* data_ptr = data;
if (need_to_downmix) {
@ -179,10 +162,6 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config,
}
void AudioBuffer::InitForNewData() {
keyboard_data_ = NULL;
mixed_low_pass_valid_ = false;
reference_copied_ = false;
activity_ = AudioFrame::kVadUnknown;
num_channels_ = num_proc_channels_;
data_->set_num_channels(num_proc_channels_);
if (split_data_.get()) {
@ -195,7 +174,6 @@ const int16_t* const* AudioBuffer::channels_const() const {
}
int16_t* const* AudioBuffer::channels() {
mixed_low_pass_valid_ = false;
return data_->ibuf()->channels();
}
@ -205,7 +183,6 @@ const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
}
int16_t* const* AudioBuffer::split_bands(size_t channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf()->bands(channel)
: data_->ibuf()->bands(channel);
}
@ -218,39 +195,11 @@ const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
}
}
int16_t* const* AudioBuffer::split_channels(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->ibuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
}
}
ChannelBuffer<int16_t>* AudioBuffer::data() {
mixed_low_pass_valid_ = false;
return data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::data() const {
return data_->ibuf_const();
}
ChannelBuffer<int16_t>* AudioBuffer::split_data() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
}
const float* const* AudioBuffer::channels_const_f() const {
return data_->fbuf_const()->channels();
}
float* const* AudioBuffer::channels_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf()->channels();
}
@ -260,85 +209,10 @@ const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
}
float* const* AudioBuffer::split_bands_f(size_t channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf()->bands(channel)
: data_->fbuf()->bands(channel);
}
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
if (split_data_.get()) {
return split_data_->fbuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
}
}
float* const* AudioBuffer::split_channels_f(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->fbuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
}
}
ChannelBuffer<float>* AudioBuffer::data_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::data_f() const {
return data_->fbuf_const();
}
ChannelBuffer<float>* AudioBuffer::split_data_f() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
}
const int16_t* AudioBuffer::mixed_low_pass_data() {
if (num_proc_channels_ == 1) {
return split_bands_const(0)[kBand0To8kHz];
}
if (!mixed_low_pass_valid_) {
if (!mixed_low_pass_channels_.get()) {
mixed_low_pass_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_, 1));
}
DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
num_split_frames_, num_channels_,
mixed_low_pass_channels_->channels()[0]);
mixed_low_pass_valid_ = true;
}
return mixed_low_pass_channels_->channels()[0];
}
const int16_t* AudioBuffer::low_pass_reference(int channel) const {
if (!reference_copied_) {
return NULL;
}
return low_pass_reference_channels_->channels()[channel];
}
const float* AudioBuffer::keyboard_data() const {
return keyboard_data_;
}
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity;
}
AudioFrame::VADActivity AudioBuffer::activity() const {
return activity_;
}
size_t AudioBuffer::num_channels() const {
return num_channels_;
}
@ -359,17 +233,12 @@ size_t AudioBuffer::num_frames_per_band() const {
return num_split_frames_;
}
size_t AudioBuffer::num_keyboard_frames() const {
// We don't resample the keyboard channel.
return input_num_frames_;
}
size_t AudioBuffer::num_bands() const {
return num_bands_;
}
// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
void AudioBuffer::DeinterleaveFrom(const AudioFrame* frame) {
RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_);
RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
InitForNewData();
@ -378,7 +247,6 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
activity_ = frame->vad_activity_;
int16_t* const* deinterleaved;
if (input_num_frames_ == proc_num_frames_) {
@ -407,12 +275,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
frame->vad_activity_ = activity_;
if (!data_changed) {
return;
}
void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
@ -437,21 +300,6 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
}
}
void AudioBuffer::CopyLowPassToReference() {
reference_copied_ = true;
if (!low_pass_reference_channels_.get() ||
low_pass_reference_channels_->num_channels() != num_channels_) {
low_pass_reference_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_, num_proc_channels_));
}
for (size_t i = 0; i < num_proc_channels_; i++) {
memcpy(low_pass_reference_channels_->channels()[i],
split_bands_const(i)[kBand0To8kHz],
low_pass_reference_channels_->num_frames_per_band() *
sizeof(split_bands_const(i)[kBand0To8kHz][0]));
}
}
void AudioBuffer::SplitIntoFrequencyBands() {
splitting_filter_->Analysis(data_.get(), split_data_.get());
}

View File

@ -40,10 +40,10 @@ class AudioBuffer {
virtual ~AudioBuffer();
size_t num_channels() const;
size_t num_proc_channels() const { return num_proc_channels_; }
void set_num_channels(size_t num_channels);
size_t num_frames() const;
size_t num_frames_per_band() const;
size_t num_keyboard_frames() const;
size_t num_bands() const;
// Returns a pointer array to the full-band channels.
@ -76,44 +76,17 @@ class AudioBuffer {
// 0 <= band < |num_bands_|
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_channels(Band band);
const int16_t* const* split_channels_const(Band band) const;
float* const* split_channels_f(Band band);
const float* const* split_channels_const_f(Band band) const;
// Returns a pointer to the ChannelBuffer that encapsulates the full-band
// data.
ChannelBuffer<int16_t>* data();
const ChannelBuffer<int16_t>* data() const;
ChannelBuffer<float>* data_f();
const ChannelBuffer<float>* data_f() const;
// Returns a pointer to the ChannelBuffer that encapsulates the split data.
ChannelBuffer<int16_t>* split_data();
const ChannelBuffer<int16_t>* split_data() const;
ChannelBuffer<float>* split_data_f();
const ChannelBuffer<float>* split_data_f() const;
// Returns a pointer to the low-pass data downmixed to mono. If this data
// isn't already available it re-calculates it.
const int16_t* mixed_low_pass_data();
const int16_t* low_pass_reference(int channel) const;
const float* keyboard_data() const;
void set_activity(AudioFrame::VADActivity activity);
AudioFrame::VADActivity activity() const;
// Use for int16 interleaved data.
void DeinterleaveFrom(AudioFrame* audioFrame);
void DeinterleaveFrom(const AudioFrame* audioFrame);
// If |data_changed| is false, only the non-audio data members will be copied
// to |frame|.
void InterleaveTo(AudioFrame* frame, bool data_changed) const;
void InterleaveTo(AudioFrame* frame) const;
// Use for float deinterleaved data.
void CopyFrom(const float* const* data, const StreamConfig& stream_config);
void CopyTo(const StreamConfig& stream_config, float* const* data);
void CopyLowPassToReference();
// Splits the signal into different bands.
void SplitIntoFrequencyBands();
@ -142,16 +115,10 @@ class AudioBuffer {
size_t num_bands_;
size_t num_split_frames_;
bool mixed_low_pass_valid_;
bool reference_copied_;
AudioFrame::VADActivity activity_;
const float* keyboard_data_;
std::unique_ptr<IFChannelBuffer> data_;
std::unique_ptr<IFChannelBuffer> split_data_;
std::unique_ptr<SplittingFilter> splitting_filter_;
std::unique_ptr<ChannelBuffer<int16_t>> mixed_low_pass_channels_;
std::unique_ptr<ChannelBuffer<int16_t>> low_pass_reference_channels_;
std::unique_ptr<IFChannelBuffer> input_buffer_;
std::unique_ptr<IFChannelBuffer> output_buffer_;
std::unique_ptr<ChannelBuffer<float>> process_buffer_;

View File

@ -21,10 +21,6 @@ const size_t kStereo = 2u;
const size_t kMono = 1u;
void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) {
EXPECT_EQ(ab.data()->num_channels(), num_channels);
EXPECT_EQ(ab.data_f()->num_channels(), num_channels);
EXPECT_EQ(ab.split_data()->num_channels(), num_channels);
EXPECT_EQ(ab.split_data_f()->num_channels(), num_channels);
EXPECT_EQ(ab.num_channels(), num_channels);
}

View File

@ -949,6 +949,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
RecordUnprocessedCaptureStream(src);
}
capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
RETURN_ON_ERR(ProcessCaptureStreamLocked());
capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
@ -1243,11 +1244,14 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
RecordUnprocessedCaptureStream(*frame);
}
capture_.vad_activity = frame->vad_activity_;
capture_.capture_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessCaptureStreamLocked());
capture_.capture_audio->InterleaveTo(
frame, submodule_states_.CaptureMultiBandProcessingActive() ||
submodule_states_.CaptureFullBandProcessingActive());
if (submodule_states_.CaptureMultiBandProcessingActive() ||
submodule_states_.CaptureFullBandProcessingActive()) {
capture_.capture_audio->InterleaveTo(frame);
}
frame->vad_activity_ = capture_.vad_activity;
if (aec_dump_) {
RecordProcessedCaptureStream(*frame);
@ -1361,7 +1365,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
}
if (public_submodules_->noise_suppression->is_enabled()) {
capture_buffer->CopyLowPassToReference();
private_submodules_->echo_control_mobile->CopyLowPassReference(
capture_buffer);
}
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
@ -1393,7 +1398,15 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
}
public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);
if (public_submodules_->voice_detection->is_enabled() &&
!public_submodules_->voice_detection->using_external_vad()) {
bool voice_active =
public_submodules_->voice_detection->ProcessCaptureAudio(
capture_buffer);
capture_.vad_activity =
voice_active ? AudioFrame::kVadActive : AudioFrame::kVadPassive;
}
if (config_.voice_detection.enabled) {
private_submodules_->voice_detector->ProcessCaptureAudio(capture_buffer);
capture_.stats.voice_detected =
@ -1440,8 +1453,9 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->channels_f()[0], capture_buffer->num_frames(),
capture_buffer->num_channels(),
capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band(), capture_buffer->keyboard_data(),
capture_buffer->num_keyboard_frames(), voice_probability,
capture_buffer->num_frames_per_band(),
capture_.keyboard_info.keyboard_data,
capture_.keyboard_info.num_keyboard_frames, voice_probability,
capture_.key_pressed);
}
@ -1598,9 +1612,10 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
render_.render_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessRenderStreamLocked());
render_.render_audio->InterleaveTo(
frame, submodule_states_.RenderMultiBandProcessingActive() ||
submodule_states_.RenderFullBandProcessingActive());
if (submodule_states_.RenderMultiBandProcessingActive() ||
submodule_states_.RenderFullBandProcessingActive()) {
render_.render_audio->InterleaveTo(frame);
}
return kNoError;
}
@ -2117,6 +2132,17 @@ AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default;
void AudioProcessingImpl::ApmCaptureState::KeyboardInfo::Extract(
const float* const* data,
const StreamConfig& stream_config) {
if (stream_config.has_keyboard()) {
keyboard_data = data[stream_config.num_channels()];
} else {
keyboard_data = NULL;
}
num_keyboard_frames = stream_config.num_frames();
}
AudioProcessingImpl::ApmRenderState::ApmRenderState() = default;
AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default;

View File

@ -394,6 +394,12 @@ class AudioProcessingImpl : public AudioProcessing {
int playout_volume;
int prev_playout_volume;
AudioProcessingStats stats;
struct KeyboardInfo {
void Extract(const float* const* data, const StreamConfig& stream_config);
size_t num_keyboard_frames = 0;
const float* keyboard_data = nullptr;
} keyboard_info;
AudioFrame::VADActivity vad_activity = AudioFrame::kVadUnknown;
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {

View File

@ -101,7 +101,10 @@ class EchoControlMobileImpl::Canceller {
};
EchoControlMobileImpl::EchoControlMobileImpl()
: routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {}
: routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {
low_pass_reference_[0].fill(0);
low_pass_reference_[1].fill(0);
}
EchoControlMobileImpl::~EchoControlMobileImpl() {}
@ -168,7 +171,9 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
// TODO(ajm): improve how this works, possibly inside AECM.
// This is kind of hacked up.
const int16_t* noisy = audio->low_pass_reference(capture);
RTC_DCHECK_LT(capture, low_pass_reference_.size());
const int16_t* noisy =
reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz];
if (noisy == NULL) {
noisy = clean;
@ -195,6 +200,16 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
return AudioProcessing::kNoError;
}
void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
reference_copied_ = true;
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
memcpy(low_pass_reference_[capture].data(),
audio->split_bands_const(capture)[kBand0To8kHz],
audio->num_frames_per_band() * sizeof(int16_t));
}
}
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
if (MapSetting(mode) == -1) {
return AudioProcessing::kBadParameterError;
@ -219,6 +234,9 @@ bool EchoControlMobileImpl::is_comfort_noise_enabled() const {
void EchoControlMobileImpl::Initialize(int sample_rate_hz,
size_t num_reverse_channels,
size_t num_output_channels) {
low_pass_reference_[0].fill(0);
low_pass_reference_[1].fill(0);
stream_properties_.reset(new StreamProperties(
sample_rate_hz, num_reverse_channels, num_output_channels));

View File

@ -54,6 +54,7 @@ class EchoControlMobileImpl {
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
void CopyLowPassReference(AudioBuffer* audio);
void Initialize(int sample_rate_hz,
size_t num_reverse_channels,
@ -78,6 +79,8 @@ class EchoControlMobileImpl {
std::vector<std::unique_ptr<Canceller>> cancellers_;
std::unique_ptr<StreamProperties> stream_properties_;
std::array<std::array<int16_t, 160>, 2> low_pass_reference_;
bool reference_copied_ = false;
};
} // namespace webrtc

View File

@ -120,10 +120,28 @@ void GainControlImpl::PackRenderAudioBuffer(
std::vector<int16_t>* packed_buffer) {
RTC_DCHECK_GE(160, audio->num_frames_per_band());
std::array<int16_t, 160> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass;
if (audio->num_proc_channels() == 1) {
mixed_low_pass =
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
audio->num_frames_per_band());
} else {
const int num_channels = static_cast<int>(audio->num_channels());
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
for (int j = 1; j < num_channels; ++j) {
value += audio->split_channels_const(kBand0To8kHz)[j][i];
}
mixed_low_pass_data[i] = value / num_channels;
}
mixed_low_pass = rtc::ArrayView<const int16_t>(
mixed_low_pass_data.data(), audio->num_frames_per_band());
}
packed_buffer->clear();
packed_buffer->insert(
packed_buffer->end(), audio->mixed_low_pass_data(),
(audio->mixed_low_pass_data() + audio->num_frames_per_band()));
packed_buffer->insert(packed_buffer->end(), mixed_low_pass.data(),
(mixed_low_pass.data() + audio->num_frames_per_band()));
}
int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {

View File

@ -54,30 +54,42 @@ void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
set_likelihood(likelihood_);
}
void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
rtc::CritScope cs(crit_);
if (!enabled_) {
return;
}
if (using_external_vad_) {
using_external_vad_ = false;
return;
}
RTC_DCHECK(enabled_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
// TODO(ajm): concatenate data in frame buffer here.
int vad_ret =
WebRtcVad_Process(vad_->state(), sample_rate_hz_,
audio->mixed_low_pass_data(), frame_size_samples_);
std::array<int16_t, 160> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass;
if (audio->num_proc_channels() == 1) {
mixed_low_pass =
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
audio->num_frames_per_band());
} else {
const int num_channels = static_cast<int>(audio->num_channels());
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
for (int j = 1; j < num_channels; ++j) {
value += audio->split_channels_const(kBand0To8kHz)[j][i];
}
mixed_low_pass_data[i] = value / num_channels;
}
mixed_low_pass = rtc::ArrayView<const int16_t>(
mixed_low_pass_data.data(), audio->num_frames_per_band());
}
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
mixed_low_pass.data(), frame_size_samples_);
if (vad_ret == 0) {
stream_has_voice_ = false;
audio->set_activity(AudioFrame::kVadPassive);
return false;
} else if (vad_ret == 1) {
stream_has_voice_ = true;
audio->set_activity(AudioFrame::kVadActive);
} else {
RTC_NOTREACHED();
}
return stream_has_voice_;
}
int VoiceDetectionImpl::Enable(bool enable) {

View File

@ -31,7 +31,14 @@ class VoiceDetectionImpl : public VoiceDetection {
// TODO(peah): Fold into ctor, once public API is removed.
void Initialize(int sample_rate_hz);
void ProcessCaptureAudio(AudioBuffer* audio);
// Returns the VAD activity.
bool ProcessCaptureAudio(AudioBuffer* audio);
bool using_external_vad() const {
rtc::CritScope cs(crit_);
return using_external_vad_;
}
// VoiceDetection implementation.
int Enable(bool enable) override;