From a58cae3eaebf5e239507addd5b3c6123131d3264 Mon Sep 17 00:00:00 2001 From: Tim Na Date: Fri, 13 Nov 2020 11:07:43 -0800 Subject: [PATCH] VoipVolumeControl subAPI for VoIP API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - mute/unmute API. - speech level/energy/duration API. Bug: webrtc:12111 Change-Id: I54757b9874d15d59a145f2ca70801ee9ef0f4430 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191060 Commit-Queue: Tim Na Reviewed-by: Karl Wiberg Reviewed-by: Per Ã…hgren Reviewed-by: Mirko Bonadei Cr-Commit-Position: refs/heads/master@{#32607} --- api/voip/BUILD.gn | 1 + api/voip/voip_engine.h | 4 ++ api/voip/voip_volume_control.h | 57 +++++++++++++++++++++++ audio/voip/audio_channel.cc | 2 +- audio/voip/audio_channel.h | 22 +++++++++ audio/voip/audio_egress.cc | 6 +++ audio/voip/audio_egress.h | 14 ++++++ audio/voip/audio_ingress.h | 13 ++---- audio/voip/test/audio_egress_unittest.cc | 35 +++++++++++++- audio/voip/test/audio_ingress_unittest.cc | 21 ++++++--- audio/voip/voip_core.cc | 31 ++++++++++++ audio/voip/voip_core.h | 10 +++- 12 files changed, 198 insertions(+), 18 deletions(-) create mode 100644 api/voip/voip_volume_control.h diff --git a/api/voip/BUILD.gn b/api/voip/BUILD.gn index a62dd14207..c099bfbfaf 100644 --- a/api/voip/BUILD.gn +++ b/api/voip/BUILD.gn @@ -17,6 +17,7 @@ rtc_source_set("voip_api") { "voip_engine.h", "voip_network.h", "voip_statistics.h", + "voip_volume_control.h", ] deps = [ "..:array_view", diff --git a/api/voip/voip_engine.h b/api/voip/voip_engine.h index 5724b6b5d9..69c0a8504f 100644 --- a/api/voip/voip_engine.h +++ b/api/voip/voip_engine.h @@ -18,6 +18,7 @@ class VoipCodec; class VoipNetwork; class VoipDtmf; class VoipStatistics; +class VoipVolumeControl; // VoipEngine is the main interface serving as the entry point for all VoIP // APIs. A single instance of VoipEngine should suffice the most of the need for @@ -89,6 +90,9 @@ class VoipEngine { // VoipStatistics provides performance metrics around audio decoding module // and jitter buffer (NetEq). virtual VoipStatistics& Statistics() = 0; + + // VoipVolumeControl provides various input/output volume control. + virtual VoipVolumeControl& VolumeControl() = 0; }; } // namespace webrtc diff --git a/api/voip/voip_volume_control.h b/api/voip/voip_volume_control.h new file mode 100644 index 0000000000..54e446715e --- /dev/null +++ b/api/voip/voip_volume_control.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VOIP_VOIP_VOLUME_CONTROL_H_ +#define API_VOIP_VOIP_VOLUME_CONTROL_H_ + +#include "api/voip/voip_base.h" + +namespace webrtc { + +struct VolumeInfo { + // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-audiolevel + double audio_level = 0; + // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalaudioenergy + double total_energy = 0.0; + // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalsamplesduration + double total_duration = 0.0; +}; + +// VoipVolumeControl interface. +// +// This sub-API supports functions related to the input (microphone) and output +// (speaker) device. +// +// Caller must ensure that ChannelId is valid otherwise it will result in no-op +// with error logging. +class VoipVolumeControl { + public: + // Mute/unmutes the microphone input sample before encoding process. Note that + // mute doesn't affect audio input level and energy values as input sample is + // silenced after the measurement. + virtual void SetInputMuted(ChannelId channel_id, bool enable) = 0; + + // Gets the microphone volume info. + // Returns absl::nullopt if |channel_id| is invalid. + virtual absl::optional GetInputVolumeInfo( + ChannelId channel_id) = 0; + + // Gets the speaker volume info. + // Returns absl::nullopt if |channel_id| is invalid. + virtual absl::optional GetOutputVolumeInfo( + ChannelId channel_id) = 0; + + protected: + virtual ~VoipVolumeControl() = default; +}; + +} // namespace webrtc + +#endif // API_VOIP_VOIP_VOLUME_CONTROL_H_ diff --git a/audio/voip/audio_channel.cc b/audio/voip/audio_channel.cc index 926130dc7e..dc53acf3ad 100644 --- a/audio/voip/audio_channel.cc +++ b/audio/voip/audio_channel.cc @@ -155,7 +155,7 @@ IngressStatistics AudioChannel::GetIngressStatistics() { ingress_stats.neteq_stats.interruption_count = stats.interruptionCount; ingress_stats.neteq_stats.total_interruption_duration_ms = stats.totalInterruptionDurationMs; - ingress_stats.total_duration = ingress_->GetTotalDuration(); + ingress_stats.total_duration = ingress_->GetOutputTotalDuration(); return ingress_stats; } diff --git a/audio/voip/audio_channel.h b/audio/voip/audio_channel.h index a8946a7aa6..5bc7483591 100644 --- a/audio/voip/audio_channel.h +++ b/audio/voip/audio_channel.h @@ -70,6 +70,7 @@ class AudioChannel : public rtc::RefCountInterface { bool SendTelephoneEvent(int dtmf_event, int duration_ms) { return egress_->SendTelephoneEvent(dtmf_event, duration_ms); } + void SetMute(bool enable) { egress_->SetMute(enable); } // APIs relayed to AudioIngress. bool IsPlaying() const { return ingress_->IsPlaying(); } @@ -84,6 +85,27 @@ class AudioChannel : public rtc::RefCountInterface { } IngressStatistics GetIngressStatistics(); + // See comments on the methods used from AudioEgress and AudioIngress. + // Conversion to double is following what is done in + // DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be + // consistent. + double GetInputAudioLevel() const { + return egress_->GetInputAudioLevel() / 32767.0; + } + double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); } + double GetInputTotalDuration() const { + return egress_->GetInputTotalDuration(); + } + double GetOutputAudioLevel() const { + return ingress_->GetOutputAudioLevel() / 32767.0; + } + double GetOutputTotalEnergy() const { + return ingress_->GetOutputTotalEnergy(); + } + double GetOutputTotalDuration() const { + return ingress_->GetOutputTotalDuration(); + } + private: // ChannelId that this audio channel belongs for logging purpose. ChannelId id_; diff --git a/audio/voip/audio_egress.cc b/audio/voip/audio_egress.cc index 90e069e1cc..1162824c9e 100644 --- a/audio/voip/audio_egress.cc +++ b/audio/voip/audio_egress.cc @@ -80,6 +80,12 @@ void AudioEgress::SendAudioData(std::unique_ptr audio_frame) { return; } + double duration_seconds = + static_cast(audio_frame->samples_per_channel_) / + audio_frame->sample_rate_hz_; + + input_audio_level_.ComputeLevel(*audio_frame, duration_seconds); + AudioFrameOperations::Mute(audio_frame.get(), encoder_context_.previously_muted_, encoder_context_.mute_); diff --git a/audio/voip/audio_egress.h b/audio/voip/audio_egress.h index 6b2d374717..d9ae4f3e04 100644 --- a/audio/voip/audio_egress.h +++ b/audio/voip/audio_egress.h @@ -16,6 +16,7 @@ #include "api/audio_codecs/audio_format.h" #include "api/task_queue/task_queue_factory.h" +#include "audio/audio_level.h" #include "audio/utility/audio_frame_operations.h" #include "call/audio_sender.h" #include "modules/audio_coding/include/audio_coding_module.h" @@ -89,6 +90,16 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback { // otherwise false when the dtmf queue reached maximum of 20 events. bool SendTelephoneEvent(int dtmf_event, int duration_ms); + // See comments on LevelFullRange, TotalEnergy, TotalDuration from + // audio/audio_level.h. + int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); } + double GetInputTotalEnergy() const { + return input_audio_level_.TotalEnergy(); + } + double GetInputTotalDuration() const { + return input_audio_level_.TotalDuration(); + } + // Implementation of AudioSender interface. void SendAudioData(std::unique_ptr audio_frame) override; @@ -137,6 +148,9 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback { // Defined last to ensure that there are no running tasks when the other // members are destroyed. rtc::TaskQueue encoder_queue_; + + // Synchronizaton is handled internally by voe::AudioLevel. + voe::AudioLevel input_audio_level_; }; } // namespace webrtc diff --git a/audio/voip/audio_ingress.h b/audio/voip/audio_ingress.h index acb84c0b94..d3680e0f00 100644 --- a/audio/voip/audio_ingress.h +++ b/audio/voip/audio_ingress.h @@ -68,16 +68,13 @@ class AudioIngress : public AudioMixer::Source { void ReceivedRTPPacket(rtc::ArrayView rtp_packet); void ReceivedRTCPPacket(rtc::ArrayView rtcp_packet); - // Retrieve highest speech output level in last 100 ms. Note that - // this isn't RMS but absolute raw audio level on int16_t sample unit. - // Therefore, the return value will vary between 0 ~ 0xFFFF. This type of - // value may be useful to be used for measuring active speaker gauge. - int GetSpeechOutputLevelFullRange() const { + // See comments on LevelFullRange, TotalEnergy, TotalDuration from + // audio/audio_level.h. + int GetOutputAudioLevel() const { return output_audio_level_.LevelFullRange(); } - // Retrieves the total duration for all samples played so far as explained in - // audio/AudioLevel.h. - double GetTotalDuration() const { + double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); } + double GetOutputTotalDuration() { return output_audio_level_.TotalDuration(); } diff --git a/audio/voip/test/audio_egress_unittest.cc b/audio/voip/test/audio_egress_unittest.cc index 70fb6dcf36..0692ef2df4 100644 --- a/audio/voip/test/audio_egress_unittest.cc +++ b/audio/voip/test/audio_egress_unittest.cc @@ -43,12 +43,13 @@ std::unique_ptr CreateRtpStack(Clock* clock, return rtp_rtcp; } +constexpr int16_t kAudioLevel = 3004; // Used for sine wave level. + // AudioEgressTest configures audio egress by using Rtp Stack, fake clock, // and task queue factory. Encoder factory is needed to create codec and // configure the RTP stack in audio egress. class AudioEgressTest : public ::testing::Test { public: - static constexpr int16_t kAudioLevel = 3004; // Used for sine wave level. static constexpr uint16_t kSeqNum = 12345; static constexpr uint64_t kStartTime = 123456789; static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF; @@ -286,5 +287,37 @@ TEST_F(AudioEgressTest, SendDTMF) { EXPECT_EQ(dtmf_count, kExpected); } +TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) { + // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to + // get audio level from input source. + constexpr int kExpected = 6; + rtc::Event event; + int rtp_count = 0; + auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) { + if (++rtp_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + fake_clock_.AdvanceTimeMilliseconds(10); + } + + event.Wait(/*give_up_after_ms=*/1000); + EXPECT_EQ(rtp_count, kExpected); + + constexpr double kExpectedEnergy = 0.00016809565587789564; + constexpr double kExpectedDuration = 0.11999999999999998; + + EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel); + EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy); + EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration); +} + } // namespace } // namespace webrtc diff --git a/audio/voip/test/audio_ingress_unittest.cc b/audio/voip/test/audio_ingress_unittest.cc index 01b4d67dad..55ecfec695 100644 --- a/audio/voip/test/audio_ingress_unittest.cc +++ b/audio/voip/test/audio_ingress_unittest.cc @@ -134,9 +134,10 @@ TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) { EXPECT_EQ(audio_frame.elapsed_time_ms_, 0); } -TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) { - // Per audio_level's kUpdateFrequency, we need 11 RTP to get audio level. - constexpr int kNumRtp = 11; +TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) { + // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to + // get audio level from output source. + constexpr int kNumRtp = 6; int rtp_count = 0; rtc::Event event; auto handle_rtp = [&](const uint8_t* packet, size_t length, Unused) { @@ -151,15 +152,21 @@ TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) { egress_->SendAudioData(GetAudioFrame(i)); fake_clock_.AdvanceTimeMilliseconds(10); } - event.Wait(/*ms=*/1000); + event.Wait(/*give_up_after_ms=*/1000); - for (int i = 0; i < kNumRtp; ++i) { + for (int i = 0; i < kNumRtp * 2; ++i) { AudioFrame audio_frame; EXPECT_EQ( ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame), AudioMixer::Source::AudioFrameInfo::kNormal); } - EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel); + EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel); + + constexpr double kExpectedEnergy = 0.00016809565587789564; + constexpr double kExpectedDuration = 0.11999999999999998; + + EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy); + EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration); } TEST_F(AudioIngressTest, PreferredSampleRate) { @@ -221,7 +228,7 @@ TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) { // Now we should still see valid speech output level as StopPlay won't affect // the measurement. - EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel); + EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel); } } // namespace diff --git a/audio/voip/voip_core.cc b/audio/voip/voip_core.cc index 92b80b5b71..ac29fbf6d8 100644 --- a/audio/voip/voip_core.cc +++ b/audio/voip/voip_core.cc @@ -420,4 +420,35 @@ absl::optional VoipCore::GetIngressStatistics( return absl::nullopt; } +void VoipCore::SetInputMuted(ChannelId channel_id, bool enable) { + rtc::scoped_refptr channel = GetChannel(channel_id); + if (channel) { + channel->SetMute(enable); + } +} + +absl::optional VoipCore::GetInputVolumeInfo(ChannelId channel_id) { + rtc::scoped_refptr channel = GetChannel(channel_id); + if (channel) { + VolumeInfo input_volume; + input_volume.audio_level = channel->GetInputAudioLevel(); + input_volume.total_energy = channel->GetInputTotalEnergy(); + input_volume.total_duration = channel->GetInputTotalDuration(); + return input_volume; + } + return absl::nullopt; +} + +absl::optional VoipCore::GetOutputVolumeInfo(ChannelId channel_id) { + rtc::scoped_refptr channel = GetChannel(channel_id); + if (channel) { + VolumeInfo output_volume; + output_volume.audio_level = channel->GetOutputAudioLevel(); + output_volume.total_energy = channel->GetOutputTotalEnergy(); + output_volume.total_duration = channel->GetOutputTotalDuration(); + return output_volume; + } + return absl::nullopt; +} + } // namespace webrtc diff --git a/audio/voip/voip_core.h b/audio/voip/voip_core.h index 4279f770d9..5ebf4381cc 100644 --- a/audio/voip/voip_core.h +++ b/audio/voip/voip_core.h @@ -27,6 +27,7 @@ #include "api/voip/voip_engine.h" #include "api/voip/voip_network.h" #include "api/voip/voip_statistics.h" +#include "api/voip/voip_volume_control.h" #include "audio/audio_transport_impl.h" #include "audio/voip/audio_channel.h" #include "modules/audio_device/include/audio_device.h" @@ -49,7 +50,8 @@ class VoipCore : public VoipEngine, public VoipNetwork, public VoipCodec, public VoipDtmf, - public VoipStatistics { + public VoipStatistics, + public VoipVolumeControl { public: // Construct VoipCore with provided arguments. // ProcessThread implementation can be injected by |process_thread| @@ -69,6 +71,7 @@ class VoipCore : public VoipEngine, VoipCodec& Codec() override { return *this; } VoipDtmf& Dtmf() override { return *this; } VoipStatistics& Statistics() override { return *this; } + VoipVolumeControl& VolumeControl() override { return *this; } // Implements VoipBase interfaces. absl::optional CreateChannel( @@ -106,6 +109,11 @@ class VoipCore : public VoipEngine, absl::optional GetIngressStatistics( ChannelId channel_id) override; + // Implements VoipVolumeControl interfaces. + void SetInputMuted(ChannelId channel_id, bool enable) override; + absl::optional GetInputVolumeInfo(ChannelId channel_id) override; + absl::optional GetOutputVolumeInfo(ChannelId channel_id) override; + private: // Initialize ADM and default audio device if needed. // Returns true if ADM is successfully initialized or already in such state