Bug: webrtc:7494 Change-Id: I880ef3991ade4e429ccde843571f069ede149c0e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/213342 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33604}
72 lines
2.5 KiB
C++
72 lines
2.5 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
|
|
#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
|
|
|
|
#include <memory>
|
|
|
|
#include "modules/audio_processing/agc2/cpu_features.h"
|
|
#include "modules/audio_processing/include/audio_frame_view.h"
|
|
|
|
namespace webrtc {
|
|
|
|
// Class to analyze voice activity and audio levels.
|
|
class VadLevelAnalyzer {
|
|
public:
|
|
struct Result {
|
|
float speech_probability; // Range: [0, 1].
|
|
float rms_dbfs; // Root mean square power (dBFS).
|
|
float peak_dbfs; // Peak power (dBFS).
|
|
};
|
|
|
|
// Voice Activity Detector (VAD) interface.
|
|
class VoiceActivityDetector {
|
|
public:
|
|
virtual ~VoiceActivityDetector() = default;
|
|
// Resets the internal state.
|
|
virtual void Reset() = 0;
|
|
// Analyzes an audio frame and returns the speech probability.
|
|
virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
|
|
};
|
|
|
|
// Ctor. Uses the default VAD.
|
|
VadLevelAnalyzer();
|
|
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
|
|
// `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
|
|
// duration of two frames. `vad_probability_attack` is a number in (0,1] used
|
|
// to smooth the speech probability (instant decay, slow attack).
|
|
VadLevelAnalyzer(int vad_reset_period_ms,
|
|
float vad_probability_attack,
|
|
const AvailableCpuFeatures& cpu_features);
|
|
// Ctor. Uses a custom `vad`.
|
|
VadLevelAnalyzer(int vad_reset_period_ms,
|
|
float vad_probability_attack,
|
|
std::unique_ptr<VoiceActivityDetector> vad);
|
|
|
|
VadLevelAnalyzer(const VadLevelAnalyzer&) = delete;
|
|
VadLevelAnalyzer& operator=(const VadLevelAnalyzer&) = delete;
|
|
~VadLevelAnalyzer();
|
|
|
|
// Computes the speech probability and the level for `frame`.
|
|
Result AnalyzeFrame(AudioFrameView<const float> frame);
|
|
|
|
private:
|
|
std::unique_ptr<VoiceActivityDetector> vad_;
|
|
const int vad_reset_period_frames_;
|
|
const float vad_probability_attack_;
|
|
int time_to_vad_reset_;
|
|
float vad_probability_;
|
|
};
|
|
|
|
} // namespace webrtc
|
|
|
|
#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WITH_LEVEL_H_
|