VoipVolumeControl subAPI for VoIP API

- mute/unmute API.
- speech level/energy/duration API.

Bug: webrtc:12111
Change-Id: I54757b9874d15d59a145f2ca70801ee9ef0f4430
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191060
Commit-Queue: Tim Na <natim@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32607}
This commit is contained in:
Tim Na 2020-11-13 11:07:43 -08:00 committed by Commit Bot
parent f42a94a7f5
commit a58cae3eae
12 changed files with 198 additions and 18 deletions

View File

@ -17,6 +17,7 @@ rtc_source_set("voip_api") {
"voip_engine.h",
"voip_network.h",
"voip_statistics.h",
"voip_volume_control.h",
]
deps = [
"..:array_view",

View File

@ -18,6 +18,7 @@ class VoipCodec;
class VoipNetwork;
class VoipDtmf;
class VoipStatistics;
class VoipVolumeControl;
// VoipEngine is the main interface serving as the entry point for all VoIP
// APIs. A single instance of VoipEngine should suffice the most of the need for
@ -89,6 +90,9 @@ class VoipEngine {
// VoipStatistics provides performance metrics around audio decoding module
// and jitter buffer (NetEq).
virtual VoipStatistics& Statistics() = 0;
// VoipVolumeControl provides various input/output volume control.
virtual VoipVolumeControl& VolumeControl() = 0;
};
} // namespace webrtc

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_VOIP_VOIP_VOLUME_CONTROL_H_
#define API_VOIP_VOIP_VOLUME_CONTROL_H_
#include "api/voip/voip_base.h"
namespace webrtc {
struct VolumeInfo {
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-audiolevel
double audio_level = 0;
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalaudioenergy
double total_energy = 0.0;
// https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalsamplesduration
double total_duration = 0.0;
};
// VoipVolumeControl interface.
//
// This sub-API supports functions related to the input (microphone) and output
// (speaker) device.
//
// Caller must ensure that ChannelId is valid otherwise it will result in no-op
// with error logging.
class VoipVolumeControl {
public:
// Mute/unmutes the microphone input sample before encoding process. Note that
// mute doesn't affect audio input level and energy values as input sample is
// silenced after the measurement.
virtual void SetInputMuted(ChannelId channel_id, bool enable) = 0;
// Gets the microphone volume info.
// Returns absl::nullopt if |channel_id| is invalid.
virtual absl::optional<VolumeInfo> GetInputVolumeInfo(
ChannelId channel_id) = 0;
// Gets the speaker volume info.
// Returns absl::nullopt if |channel_id| is invalid.
virtual absl::optional<VolumeInfo> GetOutputVolumeInfo(
ChannelId channel_id) = 0;
protected:
virtual ~VoipVolumeControl() = default;
};
} // namespace webrtc
#endif // API_VOIP_VOIP_VOLUME_CONTROL_H_

View File

@ -155,7 +155,7 @@ IngressStatistics AudioChannel::GetIngressStatistics() {
ingress_stats.neteq_stats.interruption_count = stats.interruptionCount;
ingress_stats.neteq_stats.total_interruption_duration_ms =
stats.totalInterruptionDurationMs;
ingress_stats.total_duration = ingress_->GetTotalDuration();
ingress_stats.total_duration = ingress_->GetOutputTotalDuration();
return ingress_stats;
}

View File

@ -70,6 +70,7 @@ class AudioChannel : public rtc::RefCountInterface {
bool SendTelephoneEvent(int dtmf_event, int duration_ms) {
return egress_->SendTelephoneEvent(dtmf_event, duration_ms);
}
void SetMute(bool enable) { egress_->SetMute(enable); }
// APIs relayed to AudioIngress.
bool IsPlaying() const { return ingress_->IsPlaying(); }
@ -84,6 +85,27 @@ class AudioChannel : public rtc::RefCountInterface {
}
IngressStatistics GetIngressStatistics();
// See comments on the methods used from AudioEgress and AudioIngress.
// Conversion to double is following what is done in
// DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be
// consistent.
double GetInputAudioLevel() const {
return egress_->GetInputAudioLevel() / 32767.0;
}
double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); }
double GetInputTotalDuration() const {
return egress_->GetInputTotalDuration();
}
double GetOutputAudioLevel() const {
return ingress_->GetOutputAudioLevel() / 32767.0;
}
double GetOutputTotalEnergy() const {
return ingress_->GetOutputTotalEnergy();
}
double GetOutputTotalDuration() const {
return ingress_->GetOutputTotalDuration();
}
private:
// ChannelId that this audio channel belongs for logging purpose.
ChannelId id_;

View File

@ -80,6 +80,12 @@ void AudioEgress::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) {
return;
}
double duration_seconds =
static_cast<double>(audio_frame->samples_per_channel_) /
audio_frame->sample_rate_hz_;
input_audio_level_.ComputeLevel(*audio_frame, duration_seconds);
AudioFrameOperations::Mute(audio_frame.get(),
encoder_context_.previously_muted_,
encoder_context_.mute_);

View File

@ -16,6 +16,7 @@
#include "api/audio_codecs/audio_format.h"
#include "api/task_queue/task_queue_factory.h"
#include "audio/audio_level.h"
#include "audio/utility/audio_frame_operations.h"
#include "call/audio_sender.h"
#include "modules/audio_coding/include/audio_coding_module.h"
@ -89,6 +90,16 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
// otherwise false when the dtmf queue reached maximum of 20 events.
bool SendTelephoneEvent(int dtmf_event, int duration_ms);
// See comments on LevelFullRange, TotalEnergy, TotalDuration from
// audio/audio_level.h.
int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); }
double GetInputTotalEnergy() const {
return input_audio_level_.TotalEnergy();
}
double GetInputTotalDuration() const {
return input_audio_level_.TotalDuration();
}
// Implementation of AudioSender interface.
void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override;
@ -137,6 +148,9 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
// Defined last to ensure that there are no running tasks when the other
// members are destroyed.
rtc::TaskQueue encoder_queue_;
// Synchronizaton is handled internally by voe::AudioLevel.
voe::AudioLevel input_audio_level_;
};
} // namespace webrtc

View File

@ -68,16 +68,13 @@ class AudioIngress : public AudioMixer::Source {
void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet);
void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet);
// Retrieve highest speech output level in last 100 ms. Note that
// this isn't RMS but absolute raw audio level on int16_t sample unit.
// Therefore, the return value will vary between 0 ~ 0xFFFF. This type of
// value may be useful to be used for measuring active speaker gauge.
int GetSpeechOutputLevelFullRange() const {
// See comments on LevelFullRange, TotalEnergy, TotalDuration from
// audio/audio_level.h.
int GetOutputAudioLevel() const {
return output_audio_level_.LevelFullRange();
}
// Retrieves the total duration for all samples played so far as explained in
// audio/AudioLevel.h.
double GetTotalDuration() const {
double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); }
double GetOutputTotalDuration() {
return output_audio_level_.TotalDuration();
}

View File

@ -43,12 +43,13 @@ std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock,
return rtp_rtcp;
}
constexpr int16_t kAudioLevel = 3004; // Used for sine wave level.
// AudioEgressTest configures audio egress by using Rtp Stack, fake clock,
// and task queue factory. Encoder factory is needed to create codec and
// configure the RTP stack in audio egress.
class AudioEgressTest : public ::testing::Test {
public:
static constexpr int16_t kAudioLevel = 3004; // Used for sine wave level.
static constexpr uint16_t kSeqNum = 12345;
static constexpr uint64_t kStartTime = 123456789;
static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF;
@ -286,5 +287,37 @@ TEST_F(AudioEgressTest, SendDTMF) {
EXPECT_EQ(dtmf_count, kExpected);
}
TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) {
// Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
// get audio level from input source.
constexpr int kExpected = 6;
rtc::Event event;
int rtp_count = 0;
auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
if (++rtp_count == kExpected) {
event.Set();
}
return true;
};
EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
// Two 10 ms audio frames will result in rtp packet with ptime 20.
for (size_t i = 0; i < kExpected * 2; i++) {
egress_->SendAudioData(GetAudioFrame(i));
fake_clock_.AdvanceTimeMilliseconds(10);
}
event.Wait(/*give_up_after_ms=*/1000);
EXPECT_EQ(rtp_count, kExpected);
constexpr double kExpectedEnergy = 0.00016809565587789564;
constexpr double kExpectedDuration = 0.11999999999999998;
EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel);
EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy);
EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration);
}
} // namespace
} // namespace webrtc

View File

@ -134,9 +134,10 @@ TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) {
EXPECT_EQ(audio_frame.elapsed_time_ms_, 0);
}
TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
// Per audio_level's kUpdateFrequency, we need 11 RTP to get audio level.
constexpr int kNumRtp = 11;
TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) {
// Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
// get audio level from output source.
constexpr int kNumRtp = 6;
int rtp_count = 0;
rtc::Event event;
auto handle_rtp = [&](const uint8_t* packet, size_t length, Unused) {
@ -151,15 +152,21 @@ TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
egress_->SendAudioData(GetAudioFrame(i));
fake_clock_.AdvanceTimeMilliseconds(10);
}
event.Wait(/*ms=*/1000);
event.Wait(/*give_up_after_ms=*/1000);
for (int i = 0; i < kNumRtp; ++i) {
for (int i = 0; i < kNumRtp * 2; ++i) {
AudioFrame audio_frame;
EXPECT_EQ(
ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame),
AudioMixer::Source::AudioFrameInfo::kNormal);
}
EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
constexpr double kExpectedEnergy = 0.00016809565587789564;
constexpr double kExpectedDuration = 0.11999999999999998;
EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy);
EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration);
}
TEST_F(AudioIngressTest, PreferredSampleRate) {
@ -221,7 +228,7 @@ TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) {
// Now we should still see valid speech output level as StopPlay won't affect
// the measurement.
EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
}
} // namespace

View File

@ -420,4 +420,35 @@ absl::optional<IngressStatistics> VoipCore::GetIngressStatistics(
return absl::nullopt;
}
void VoipCore::SetInputMuted(ChannelId channel_id, bool enable) {
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
if (channel) {
channel->SetMute(enable);
}
}
absl::optional<VolumeInfo> VoipCore::GetInputVolumeInfo(ChannelId channel_id) {
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
if (channel) {
VolumeInfo input_volume;
input_volume.audio_level = channel->GetInputAudioLevel();
input_volume.total_energy = channel->GetInputTotalEnergy();
input_volume.total_duration = channel->GetInputTotalDuration();
return input_volume;
}
return absl::nullopt;
}
absl::optional<VolumeInfo> VoipCore::GetOutputVolumeInfo(ChannelId channel_id) {
rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
if (channel) {
VolumeInfo output_volume;
output_volume.audio_level = channel->GetOutputAudioLevel();
output_volume.total_energy = channel->GetOutputTotalEnergy();
output_volume.total_duration = channel->GetOutputTotalDuration();
return output_volume;
}
return absl::nullopt;
}
} // namespace webrtc

View File

@ -27,6 +27,7 @@
#include "api/voip/voip_engine.h"
#include "api/voip/voip_network.h"
#include "api/voip/voip_statistics.h"
#include "api/voip/voip_volume_control.h"
#include "audio/audio_transport_impl.h"
#include "audio/voip/audio_channel.h"
#include "modules/audio_device/include/audio_device.h"
@ -49,7 +50,8 @@ class VoipCore : public VoipEngine,
public VoipNetwork,
public VoipCodec,
public VoipDtmf,
public VoipStatistics {
public VoipStatistics,
public VoipVolumeControl {
public:
// Construct VoipCore with provided arguments.
// ProcessThread implementation can be injected by |process_thread|
@ -69,6 +71,7 @@ class VoipCore : public VoipEngine,
VoipCodec& Codec() override { return *this; }
VoipDtmf& Dtmf() override { return *this; }
VoipStatistics& Statistics() override { return *this; }
VoipVolumeControl& VolumeControl() override { return *this; }
// Implements VoipBase interfaces.
absl::optional<ChannelId> CreateChannel(
@ -106,6 +109,11 @@ class VoipCore : public VoipEngine,
absl::optional<IngressStatistics> GetIngressStatistics(
ChannelId channel_id) override;
// Implements VoipVolumeControl interfaces.
void SetInputMuted(ChannelId channel_id, bool enable) override;
absl::optional<VolumeInfo> GetInputVolumeInfo(ChannelId channel_id) override;
absl::optional<VolumeInfo> GetOutputVolumeInfo(ChannelId channel_id) override;
private:
// Initialize ADM and default audio device if needed.
// Returns true if ADM is successfully initialized or already in such state