Make VoiceDetection not a ProcessingComponent (bit exact).
BUG=webrtc:5354 Review URL: https://codereview.webrtc.org/1494593004 Cr-Commit-Position: refs/heads/master@{#11047}
This commit is contained in:
parent
672aba3f57
commit
a29386c26d
@ -149,8 +149,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
|
||||
ApmPublicSubmodules()
|
||||
: echo_cancellation(nullptr),
|
||||
echo_control_mobile(nullptr),
|
||||
gain_control(nullptr),
|
||||
voice_detection(nullptr) {}
|
||||
gain_control(nullptr) {}
|
||||
// Accessed externally of APM without any lock acquired.
|
||||
EchoCancellationImpl* echo_cancellation;
|
||||
EchoControlMobileImpl* echo_control_mobile;
|
||||
@ -158,7 +157,7 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
|
||||
rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter;
|
||||
rtc::scoped_ptr<LevelEstimatorImpl> level_estimator;
|
||||
rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression;
|
||||
VoiceDetectionImpl* voice_detection;
|
||||
rtc::scoped_ptr<VoiceDetectionImpl> voice_detection;
|
||||
rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc;
|
||||
|
||||
// Accessed internally from both render and capture.
|
||||
@ -246,8 +245,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||
new LevelEstimatorImpl(&crit_capture_));
|
||||
public_submodules_->noise_suppression.reset(
|
||||
new NoiseSuppressionImpl(&crit_capture_));
|
||||
public_submodules_->voice_detection =
|
||||
new VoiceDetectionImpl(this, &crit_capture_);
|
||||
public_submodules_->voice_detection.reset(
|
||||
new VoiceDetectionImpl(&crit_capture_));
|
||||
public_submodules_->gain_control_for_new_agc.reset(
|
||||
new GainControlForNewAgc(public_submodules_->gain_control));
|
||||
|
||||
@ -257,8 +256,6 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||
public_submodules_->echo_control_mobile);
|
||||
private_submodules_->component_list.push_back(
|
||||
public_submodules_->gain_control);
|
||||
private_submodules_->component_list.push_back(
|
||||
public_submodules_->voice_detection);
|
||||
}
|
||||
|
||||
SetExtraOptions(config);
|
||||
@ -396,6 +393,7 @@ int AudioProcessingImpl::InitializeLocked() {
|
||||
InitializeHighPassFilter();
|
||||
InitializeNoiseSuppression();
|
||||
InitializeLevelEstimator();
|
||||
InitializeVoiceDetection();
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_dump_.debug_file->Open()) {
|
||||
@ -776,7 +774,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
|
||||
RETURN_ON_ERR(
|
||||
public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca));
|
||||
RETURN_ON_ERR(public_submodules_->voice_detection->ProcessCaptureAudio(ca));
|
||||
public_submodules_->voice_detection->ProcessCaptureAudio(ca);
|
||||
|
||||
if (constants_.use_new_agc &&
|
||||
public_submodules_->gain_control->is_enabled() &&
|
||||
@ -1162,7 +1160,7 @@ NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
|
||||
VoiceDetection* AudioProcessingImpl::voice_detection() const {
|
||||
// Adding a lock here has no effect as it allows any access to the submodule
|
||||
// from the returned pointer.
|
||||
return public_submodules_->voice_detection;
|
||||
return public_submodules_->voice_detection.get();
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::is_data_processed() const {
|
||||
@ -1185,6 +1183,9 @@ bool AudioProcessingImpl::is_data_processed() const {
|
||||
if (public_submodules_->level_estimator->is_enabled()) {
|
||||
enabled_count++;
|
||||
}
|
||||
if (public_submodules_->voice_detection->is_enabled()) {
|
||||
enabled_count++;
|
||||
}
|
||||
|
||||
// Data is unchanged if no components are enabled, or if only
|
||||
// public_submodules_->level_estimator
|
||||
@ -1313,6 +1314,10 @@ void AudioProcessingImpl::InitializeLevelEstimator() {
|
||||
public_submodules_->level_estimator->Initialize();
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeVoiceDetection() {
|
||||
public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::MaybeUpdateHistograms() {
|
||||
static const int kMinDiffDelayMs = 60;
|
||||
|
||||
|
||||
@ -188,6 +188,8 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||
void InitializeLevelEstimator()
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||
void InitializeVoiceDetection()
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||
int InitializeLocked(const ProcessingConfig& config)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
|
||||
|
||||
@ -10,66 +10,61 @@
|
||||
|
||||
#include "webrtc/modules/audio_processing/voice_detection_impl.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/base/criticalsection.h"
|
||||
#include "webrtc/base/thread_checker.h"
|
||||
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
typedef VadInst Handle;
|
||||
|
||||
namespace {
|
||||
int MapSetting(VoiceDetection::Likelihood likelihood) {
|
||||
switch (likelihood) {
|
||||
case VoiceDetection::kVeryLowLikelihood:
|
||||
return 3;
|
||||
case VoiceDetection::kLowLikelihood:
|
||||
return 2;
|
||||
case VoiceDetection::kModerateLikelihood:
|
||||
return 1;
|
||||
case VoiceDetection::kHighLikelihood:
|
||||
return 0;
|
||||
class VoiceDetectionImpl::Vad {
|
||||
public:
|
||||
Vad() {
|
||||
state_ = WebRtcVad_Create();
|
||||
RTC_CHECK(state_);
|
||||
int error = WebRtcVad_Init(state_);
|
||||
RTC_DCHECK_EQ(0, error);
|
||||
}
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
} // namespace
|
||||
~Vad() {
|
||||
WebRtcVad_Free(state_);
|
||||
}
|
||||
VadInst* state() { return state_; }
|
||||
private:
|
||||
VadInst* state_ = nullptr;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
|
||||
};
|
||||
|
||||
VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
|
||||
rtc::CriticalSection* crit)
|
||||
: ProcessingComponent(),
|
||||
apm_(apm),
|
||||
crit_(crit),
|
||||
stream_has_voice_(false),
|
||||
using_external_vad_(false),
|
||||
likelihood_(kLowLikelihood),
|
||||
frame_size_ms_(10),
|
||||
frame_size_samples_(0) {
|
||||
RTC_DCHECK(apm);
|
||||
VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
|
||||
: crit_(crit) {
|
||||
RTC_DCHECK(crit);
|
||||
}
|
||||
|
||||
VoiceDetectionImpl::~VoiceDetectionImpl() {}
|
||||
|
||||
int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
||||
void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
|
||||
rtc::CritScope cs(crit_);
|
||||
if (!is_component_enabled()) {
|
||||
return apm_->kNoError;
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
rtc::scoped_ptr<Vad> new_vad;
|
||||
if (enabled_) {
|
||||
new_vad.reset(new Vad());
|
||||
}
|
||||
vad_.swap(new_vad);
|
||||
using_external_vad_ = false;
|
||||
frame_size_samples_ =
|
||||
static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
|
||||
set_likelihood(likelihood_);
|
||||
}
|
||||
|
||||
void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
||||
rtc::CritScope cs(crit_);
|
||||
if (!enabled_) {
|
||||
return;
|
||||
}
|
||||
if (using_external_vad_) {
|
||||
using_external_vad_ = false;
|
||||
return apm_->kNoError;
|
||||
return;
|
||||
}
|
||||
assert(audio->num_frames_per_band() <= 160);
|
||||
|
||||
RTC_DCHECK_GE(160u, audio->num_frames_per_band());
|
||||
// TODO(ajm): concatenate data in frame buffer here.
|
||||
|
||||
int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
|
||||
apm_->proc_split_sample_rate_hz(),
|
||||
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
|
||||
audio->mixed_low_pass_data(),
|
||||
frame_size_samples_);
|
||||
if (vad_ret == 0) {
|
||||
@ -79,27 +74,29 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
||||
stream_has_voice_ = true;
|
||||
audio->set_activity(AudioFrame::kVadActive);
|
||||
} else {
|
||||
return apm_->kUnspecifiedError;
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
return apm_->kNoError;
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::Enable(bool enable) {
|
||||
rtc::CritScope cs(crit_);
|
||||
return EnableComponent(enable);
|
||||
if (enabled_ != enable) {
|
||||
enabled_ = enable;
|
||||
Initialize(sample_rate_hz_);
|
||||
}
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
bool VoiceDetectionImpl::is_enabled() const {
|
||||
rtc::CritScope cs(crit_);
|
||||
return is_component_enabled();
|
||||
return enabled_;
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
|
||||
rtc::CritScope cs(crit_);
|
||||
using_external_vad_ = true;
|
||||
stream_has_voice_ = has_voice;
|
||||
return apm_->kNoError;
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
bool VoiceDetectionImpl::stream_has_voice() const {
|
||||
@ -111,12 +108,30 @@ bool VoiceDetectionImpl::stream_has_voice() const {
|
||||
|
||||
int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
|
||||
rtc::CritScope cs(crit_);
|
||||
if (MapSetting(likelihood) == -1) {
|
||||
return apm_->kBadParameterError;
|
||||
}
|
||||
|
||||
likelihood_ = likelihood;
|
||||
return Configure();
|
||||
if (enabled_) {
|
||||
int mode = 2;
|
||||
switch (likelihood) {
|
||||
case VoiceDetection::kVeryLowLikelihood:
|
||||
mode = 3;
|
||||
break;
|
||||
case VoiceDetection::kLowLikelihood:
|
||||
mode = 2;
|
||||
break;
|
||||
case VoiceDetection::kModerateLikelihood:
|
||||
mode = 1;
|
||||
break;
|
||||
case VoiceDetection::kHighLikelihood:
|
||||
mode = 0;
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
break;
|
||||
}
|
||||
int error = WebRtcVad_set_mode(vad_->state(), mode);
|
||||
RTC_DCHECK_EQ(0, error);
|
||||
}
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
|
||||
@ -126,64 +141,14 @@ VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
|
||||
|
||||
int VoiceDetectionImpl::set_frame_size_ms(int size) {
|
||||
rtc::CritScope cs(crit_);
|
||||
assert(size == 10); // TODO(ajm): remove when supported.
|
||||
if (size != 10 &&
|
||||
size != 20 &&
|
||||
size != 30) {
|
||||
return apm_->kBadParameterError;
|
||||
}
|
||||
|
||||
RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
|
||||
frame_size_ms_ = size;
|
||||
|
||||
return Initialize();
|
||||
Initialize(sample_rate_hz_);
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::frame_size_ms() const {
|
||||
rtc::CritScope cs(crit_);
|
||||
return frame_size_ms_;
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::Initialize() {
|
||||
int err = ProcessingComponent::Initialize();
|
||||
|
||||
rtc::CritScope cs(crit_);
|
||||
if (err != apm_->kNoError || !is_component_enabled()) {
|
||||
return err;
|
||||
}
|
||||
|
||||
using_external_vad_ = false;
|
||||
frame_size_samples_ = static_cast<size_t>(
|
||||
frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000);
|
||||
// TODO(ajm): intialize frame buffer here.
|
||||
|
||||
return apm_->kNoError;
|
||||
}
|
||||
|
||||
void* VoiceDetectionImpl::CreateHandle() const {
|
||||
return WebRtcVad_Create();
|
||||
}
|
||||
|
||||
void VoiceDetectionImpl::DestroyHandle(void* handle) const {
|
||||
WebRtcVad_Free(static_cast<Handle*>(handle));
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::InitializeHandle(void* handle) const {
|
||||
return WebRtcVad_Init(static_cast<Handle*>(handle));
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
|
||||
rtc::CritScope cs(crit_);
|
||||
return WebRtcVad_set_mode(static_cast<Handle*>(handle),
|
||||
MapSetting(likelihood_));
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::num_handles_required() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int VoiceDetectionImpl::GetHandleError(void* handle) const {
|
||||
// The VAD has no get_error() function.
|
||||
assert(handle != NULL);
|
||||
return apm_->kUnspecifiedError;
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -11,31 +11,27 @@
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/base/criticalsection.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/processing_component.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioBuffer;
|
||||
|
||||
class VoiceDetectionImpl : public VoiceDetection,
|
||||
public ProcessingComponent {
|
||||
class VoiceDetectionImpl : public VoiceDetection {
|
||||
public:
|
||||
VoiceDetectionImpl(const AudioProcessing* apm, rtc::CriticalSection* crit);
|
||||
virtual ~VoiceDetectionImpl();
|
||||
explicit VoiceDetectionImpl(rtc::CriticalSection* crit);
|
||||
~VoiceDetectionImpl() override;
|
||||
|
||||
int ProcessCaptureAudio(AudioBuffer* audio);
|
||||
// TODO(peah): Fold into ctor, once public API is removed.
|
||||
void Initialize(int sample_rate_hz);
|
||||
void ProcessCaptureAudio(AudioBuffer* audio);
|
||||
|
||||
// VoiceDetection implementation.
|
||||
bool is_enabled() const override;
|
||||
|
||||
// ProcessingComponent implementation.
|
||||
int Initialize() override;
|
||||
|
||||
private:
|
||||
// VoiceDetection implementation.
|
||||
int Enable(bool enable) override;
|
||||
bool is_enabled() const override;
|
||||
int set_stream_has_voice(bool has_voice) override;
|
||||
bool stream_has_voice() const override;
|
||||
int set_likelihood(Likelihood likelihood) override;
|
||||
@ -43,24 +39,18 @@ class VoiceDetectionImpl : public VoiceDetection,
|
||||
int set_frame_size_ms(int size) override;
|
||||
int frame_size_ms() const override;
|
||||
|
||||
// ProcessingComponent implementation.
|
||||
void* CreateHandle() const override;
|
||||
int InitializeHandle(void* handle) const override;
|
||||
int ConfigureHandle(void* handle) const override;
|
||||
void DestroyHandle(void* handle) const override;
|
||||
int num_handles_required() const override;
|
||||
int GetHandleError(void* handle) const override;
|
||||
|
||||
// Not guarded as its public API is thread safe.
|
||||
const AudioProcessing* apm_;
|
||||
|
||||
private:
|
||||
class Vad;
|
||||
rtc::CriticalSection* const crit_;
|
||||
|
||||
bool stream_has_voice_ GUARDED_BY(crit_);
|
||||
bool using_external_vad_ GUARDED_BY(crit_);
|
||||
Likelihood likelihood_ GUARDED_BY(crit_);
|
||||
int frame_size_ms_ GUARDED_BY(crit_);
|
||||
size_t frame_size_samples_ GUARDED_BY(crit_);
|
||||
bool enabled_ GUARDED_BY(crit_) = false;
|
||||
bool stream_has_voice_ GUARDED_BY(crit_) = false;
|
||||
bool using_external_vad_ GUARDED_BY(crit_) = false;
|
||||
Likelihood likelihood_ GUARDED_BY(crit_) = kLowLikelihood;
|
||||
int frame_size_ms_ GUARDED_BY(crit_) = 10;
|
||||
size_t frame_size_samples_ GUARDED_BY(crit_) = 0;
|
||||
int sample_rate_hz_ GUARDED_BY(crit_) = 0;
|
||||
rtc::scoped_ptr<Vad> vad_ GUARDED_BY(crit_);
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user