VoipVolumeControl subAPI for VoIP API
- mute/unmute API. - speech level/energy/duration API. Bug: webrtc:12111 Change-Id: I54757b9874d15d59a145f2ca70801ee9ef0f4430 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191060 Commit-Queue: Tim Na <natim@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32607}
This commit is contained in:
parent
f42a94a7f5
commit
a58cae3eae
@ -17,6 +17,7 @@ rtc_source_set("voip_api") {
|
||||
"voip_engine.h",
|
||||
"voip_network.h",
|
||||
"voip_statistics.h",
|
||||
"voip_volume_control.h",
|
||||
]
|
||||
deps = [
|
||||
"..:array_view",
|
||||
|
||||
@ -18,6 +18,7 @@ class VoipCodec;
|
||||
class VoipNetwork;
|
||||
class VoipDtmf;
|
||||
class VoipStatistics;
|
||||
class VoipVolumeControl;
|
||||
|
||||
// VoipEngine is the main interface serving as the entry point for all VoIP
|
||||
// APIs. A single instance of VoipEngine should suffice the most of the need for
|
||||
@ -89,6 +90,9 @@ class VoipEngine {
|
||||
// VoipStatistics provides performance metrics around audio decoding module
|
||||
// and jitter buffer (NetEq).
|
||||
virtual VoipStatistics& Statistics() = 0;
|
||||
|
||||
// VoipVolumeControl provides various input/output volume control.
|
||||
virtual VoipVolumeControl& VolumeControl() = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
57
api/voip/voip_volume_control.h
Normal file
57
api/voip/voip_volume_control.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_VOIP_VOIP_VOLUME_CONTROL_H_
|
||||
#define API_VOIP_VOIP_VOLUME_CONTROL_H_
|
||||
|
||||
#include "api/voip/voip_base.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct VolumeInfo {
|
||||
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-audiolevel
|
||||
double audio_level = 0;
|
||||
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalaudioenergy
|
||||
double total_energy = 0.0;
|
||||
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalsamplesduration
|
||||
double total_duration = 0.0;
|
||||
};
|
||||
|
||||
// VoipVolumeControl interface.
|
||||
//
|
||||
// This sub-API supports functions related to the input (microphone) and output
|
||||
// (speaker) device.
|
||||
//
|
||||
// Caller must ensure that ChannelId is valid otherwise it will result in no-op
|
||||
// with error logging.
|
||||
class VoipVolumeControl {
|
||||
public:
|
||||
// Mute/unmutes the microphone input sample before encoding process. Note that
|
||||
// mute doesn't affect audio input level and energy values as input sample is
|
||||
// silenced after the measurement.
|
||||
virtual void SetInputMuted(ChannelId channel_id, bool enable) = 0;
|
||||
|
||||
// Gets the microphone volume info.
|
||||
// Returns absl::nullopt if |channel_id| is invalid.
|
||||
virtual absl::optional<VolumeInfo> GetInputVolumeInfo(
|
||||
ChannelId channel_id) = 0;
|
||||
|
||||
// Gets the speaker volume info.
|
||||
// Returns absl::nullopt if |channel_id| is invalid.
|
||||
virtual absl::optional<VolumeInfo> GetOutputVolumeInfo(
|
||||
ChannelId channel_id) = 0;
|
||||
|
||||
protected:
|
||||
virtual ~VoipVolumeControl() = default;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_VOIP_VOIP_VOLUME_CONTROL_H_
|
||||
@ -155,7 +155,7 @@ IngressStatistics AudioChannel::GetIngressStatistics() {
|
||||
ingress_stats.neteq_stats.interruption_count = stats.interruptionCount;
|
||||
ingress_stats.neteq_stats.total_interruption_duration_ms =
|
||||
stats.totalInterruptionDurationMs;
|
||||
ingress_stats.total_duration = ingress_->GetTotalDuration();
|
||||
ingress_stats.total_duration = ingress_->GetOutputTotalDuration();
|
||||
return ingress_stats;
|
||||
}
|
||||
|
||||
|
||||
@ -70,6 +70,7 @@ class AudioChannel : public rtc::RefCountInterface {
|
||||
bool SendTelephoneEvent(int dtmf_event, int duration_ms) {
|
||||
return egress_->SendTelephoneEvent(dtmf_event, duration_ms);
|
||||
}
|
||||
void SetMute(bool enable) { egress_->SetMute(enable); }
|
||||
|
||||
// APIs relayed to AudioIngress.
|
||||
bool IsPlaying() const { return ingress_->IsPlaying(); }
|
||||
@ -84,6 +85,27 @@ class AudioChannel : public rtc::RefCountInterface {
|
||||
}
|
||||
IngressStatistics GetIngressStatistics();
|
||||
|
||||
// See comments on the methods used from AudioEgress and AudioIngress.
|
||||
// Conversion to double is following what is done in
|
||||
// DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be
|
||||
// consistent.
|
||||
double GetInputAudioLevel() const {
|
||||
return egress_->GetInputAudioLevel() / 32767.0;
|
||||
}
|
||||
double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); }
|
||||
double GetInputTotalDuration() const {
|
||||
return egress_->GetInputTotalDuration();
|
||||
}
|
||||
double GetOutputAudioLevel() const {
|
||||
return ingress_->GetOutputAudioLevel() / 32767.0;
|
||||
}
|
||||
double GetOutputTotalEnergy() const {
|
||||
return ingress_->GetOutputTotalEnergy();
|
||||
}
|
||||
double GetOutputTotalDuration() const {
|
||||
return ingress_->GetOutputTotalDuration();
|
||||
}
|
||||
|
||||
private:
|
||||
// ChannelId that this audio channel belongs for logging purpose.
|
||||
ChannelId id_;
|
||||
|
||||
@ -80,6 +80,12 @@ void AudioEgress::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
double duration_seconds =
|
||||
static_cast<double>(audio_frame->samples_per_channel_) /
|
||||
audio_frame->sample_rate_hz_;
|
||||
|
||||
input_audio_level_.ComputeLevel(*audio_frame, duration_seconds);
|
||||
|
||||
AudioFrameOperations::Mute(audio_frame.get(),
|
||||
encoder_context_.previously_muted_,
|
||||
encoder_context_.mute_);
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include "api/audio_codecs/audio_format.h"
|
||||
#include "api/task_queue/task_queue_factory.h"
|
||||
#include "audio/audio_level.h"
|
||||
#include "audio/utility/audio_frame_operations.h"
|
||||
#include "call/audio_sender.h"
|
||||
#include "modules/audio_coding/include/audio_coding_module.h"
|
||||
@ -89,6 +90,16 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
|
||||
// otherwise false when the dtmf queue reached maximum of 20 events.
|
||||
bool SendTelephoneEvent(int dtmf_event, int duration_ms);
|
||||
|
||||
// See comments on LevelFullRange, TotalEnergy, TotalDuration from
|
||||
// audio/audio_level.h.
|
||||
int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); }
|
||||
double GetInputTotalEnergy() const {
|
||||
return input_audio_level_.TotalEnergy();
|
||||
}
|
||||
double GetInputTotalDuration() const {
|
||||
return input_audio_level_.TotalDuration();
|
||||
}
|
||||
|
||||
// Implementation of AudioSender interface.
|
||||
void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override;
|
||||
|
||||
@ -137,6 +148,9 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
|
||||
// Defined last to ensure that there are no running tasks when the other
|
||||
// members are destroyed.
|
||||
rtc::TaskQueue encoder_queue_;
|
||||
|
||||
// Synchronizaton is handled internally by voe::AudioLevel.
|
||||
voe::AudioLevel input_audio_level_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -68,16 +68,13 @@ class AudioIngress : public AudioMixer::Source {
|
||||
void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet);
|
||||
void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet);
|
||||
|
||||
// Retrieve highest speech output level in last 100 ms. Note that
|
||||
// this isn't RMS but absolute raw audio level on int16_t sample unit.
|
||||
// Therefore, the return value will vary between 0 ~ 0xFFFF. This type of
|
||||
// value may be useful to be used for measuring active speaker gauge.
|
||||
int GetSpeechOutputLevelFullRange() const {
|
||||
// See comments on LevelFullRange, TotalEnergy, TotalDuration from
|
||||
// audio/audio_level.h.
|
||||
int GetOutputAudioLevel() const {
|
||||
return output_audio_level_.LevelFullRange();
|
||||
}
|
||||
// Retrieves the total duration for all samples played so far as explained in
|
||||
// audio/AudioLevel.h.
|
||||
double GetTotalDuration() const {
|
||||
double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); }
|
||||
double GetOutputTotalDuration() {
|
||||
return output_audio_level_.TotalDuration();
|
||||
}
|
||||
|
||||
|
||||
@ -43,12 +43,13 @@ std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock,
|
||||
return rtp_rtcp;
|
||||
}
|
||||
|
||||
constexpr int16_t kAudioLevel = 3004; // Used for sine wave level.
|
||||
|
||||
// AudioEgressTest configures audio egress by using Rtp Stack, fake clock,
|
||||
// and task queue factory. Encoder factory is needed to create codec and
|
||||
// configure the RTP stack in audio egress.
|
||||
class AudioEgressTest : public ::testing::Test {
|
||||
public:
|
||||
static constexpr int16_t kAudioLevel = 3004; // Used for sine wave level.
|
||||
static constexpr uint16_t kSeqNum = 12345;
|
||||
static constexpr uint64_t kStartTime = 123456789;
|
||||
static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF;
|
||||
@ -286,5 +287,37 @@ TEST_F(AudioEgressTest, SendDTMF) {
|
||||
EXPECT_EQ(dtmf_count, kExpected);
|
||||
}
|
||||
|
||||
TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) {
|
||||
// Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
|
||||
// get audio level from input source.
|
||||
constexpr int kExpected = 6;
|
||||
rtc::Event event;
|
||||
int rtp_count = 0;
|
||||
auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
|
||||
if (++rtp_count == kExpected) {
|
||||
event.Set();
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
|
||||
|
||||
// Two 10 ms audio frames will result in rtp packet with ptime 20.
|
||||
for (size_t i = 0; i < kExpected * 2; i++) {
|
||||
egress_->SendAudioData(GetAudioFrame(i));
|
||||
fake_clock_.AdvanceTimeMilliseconds(10);
|
||||
}
|
||||
|
||||
event.Wait(/*give_up_after_ms=*/1000);
|
||||
EXPECT_EQ(rtp_count, kExpected);
|
||||
|
||||
constexpr double kExpectedEnergy = 0.00016809565587789564;
|
||||
constexpr double kExpectedDuration = 0.11999999999999998;
|
||||
|
||||
EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel);
|
||||
EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy);
|
||||
EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
|
||||
@ -134,9 +134,10 @@ TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) {
|
||||
EXPECT_EQ(audio_frame.elapsed_time_ms_, 0);
|
||||
}
|
||||
|
||||
TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
|
||||
// Per audio_level's kUpdateFrequency, we need 11 RTP to get audio level.
|
||||
constexpr int kNumRtp = 11;
|
||||
TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) {
|
||||
// Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
|
||||
// get audio level from output source.
|
||||
constexpr int kNumRtp = 6;
|
||||
int rtp_count = 0;
|
||||
rtc::Event event;
|
||||
auto handle_rtp = [&](const uint8_t* packet, size_t length, Unused) {
|
||||
@ -151,15 +152,21 @@ TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
|
||||
egress_->SendAudioData(GetAudioFrame(i));
|
||||
fake_clock_.AdvanceTimeMilliseconds(10);
|
||||
}
|
||||
event.Wait(/*ms=*/1000);
|
||||
event.Wait(/*give_up_after_ms=*/1000);
|
||||
|
||||
for (int i = 0; i < kNumRtp; ++i) {
|
||||
for (int i = 0; i < kNumRtp * 2; ++i) {
|
||||
AudioFrame audio_frame;
|
||||
EXPECT_EQ(
|
||||
ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame),
|
||||
AudioMixer::Source::AudioFrameInfo::kNormal);
|
||||
}
|
||||
EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
|
||||
EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
|
||||
|
||||
constexpr double kExpectedEnergy = 0.00016809565587789564;
|
||||
constexpr double kExpectedDuration = 0.11999999999999998;
|
||||
|
||||
EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy);
|
||||
EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration);
|
||||
}
|
||||
|
||||
TEST_F(AudioIngressTest, PreferredSampleRate) {
|
||||
@ -221,7 +228,7 @@ TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) {
|
||||
|
||||
// Now we should still see valid speech output level as StopPlay won't affect
|
||||
// the measurement.
|
||||
EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
|
||||
EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -420,4 +420,35 @@ absl::optional<IngressStatistics> VoipCore::GetIngressStatistics(
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
void VoipCore::SetInputMuted(ChannelId channel_id, bool enable) {
|
||||
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
|
||||
if (channel) {
|
||||
channel->SetMute(enable);
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<VolumeInfo> VoipCore::GetInputVolumeInfo(ChannelId channel_id) {
|
||||
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
|
||||
if (channel) {
|
||||
VolumeInfo input_volume;
|
||||
input_volume.audio_level = channel->GetInputAudioLevel();
|
||||
input_volume.total_energy = channel->GetInputTotalEnergy();
|
||||
input_volume.total_duration = channel->GetInputTotalDuration();
|
||||
return input_volume;
|
||||
}
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
absl::optional<VolumeInfo> VoipCore::GetOutputVolumeInfo(ChannelId channel_id) {
|
||||
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
|
||||
if (channel) {
|
||||
VolumeInfo output_volume;
|
||||
output_volume.audio_level = channel->GetOutputAudioLevel();
|
||||
output_volume.total_energy = channel->GetOutputTotalEnergy();
|
||||
output_volume.total_duration = channel->GetOutputTotalDuration();
|
||||
return output_volume;
|
||||
}
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "api/voip/voip_engine.h"
|
||||
#include "api/voip/voip_network.h"
|
||||
#include "api/voip/voip_statistics.h"
|
||||
#include "api/voip/voip_volume_control.h"
|
||||
#include "audio/audio_transport_impl.h"
|
||||
#include "audio/voip/audio_channel.h"
|
||||
#include "modules/audio_device/include/audio_device.h"
|
||||
@ -49,7 +50,8 @@ class VoipCore : public VoipEngine,
|
||||
public VoipNetwork,
|
||||
public VoipCodec,
|
||||
public VoipDtmf,
|
||||
public VoipStatistics {
|
||||
public VoipStatistics,
|
||||
public VoipVolumeControl {
|
||||
public:
|
||||
// Construct VoipCore with provided arguments.
|
||||
// ProcessThread implementation can be injected by |process_thread|
|
||||
@ -69,6 +71,7 @@ class VoipCore : public VoipEngine,
|
||||
VoipCodec& Codec() override { return *this; }
|
||||
VoipDtmf& Dtmf() override { return *this; }
|
||||
VoipStatistics& Statistics() override { return *this; }
|
||||
VoipVolumeControl& VolumeControl() override { return *this; }
|
||||
|
||||
// Implements VoipBase interfaces.
|
||||
absl::optional<ChannelId> CreateChannel(
|
||||
@ -106,6 +109,11 @@ class VoipCore : public VoipEngine,
|
||||
absl::optional<IngressStatistics> GetIngressStatistics(
|
||||
ChannelId channel_id) override;
|
||||
|
||||
// Implements VoipVolumeControl interfaces.
|
||||
void SetInputMuted(ChannelId channel_id, bool enable) override;
|
||||
absl::optional<VolumeInfo> GetInputVolumeInfo(ChannelId channel_id) override;
|
||||
absl::optional<VolumeInfo> GetOutputVolumeInfo(ChannelId channel_id) override;
|
||||
|
||||
private:
|
||||
// Initialize ADM and default audio device if needed.
|
||||
// Returns true if ADM is successfully initialized or already in such state
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user