From a58cae3eaebf5e239507addd5b3c6123131d3264 Mon Sep 17 00:00:00 2001
From: Tim Na <natim@webrtc.org>
Date: Fri, 13 Nov 2020 11:07:43 -0800
Subject: [PATCH] VoipVolumeControl subAPI for VoIP API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- mute/unmute API.
- speech level/energy/duration API.

Bug: webrtc:12111
Change-Id: I54757b9874d15d59a145f2ca70801ee9ef0f4430
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191060
Commit-Queue: Tim Na <natim@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32607}
---
 api/voip/BUILD.gn                         |  1 +
 api/voip/voip_engine.h                    |  4 ++
 api/voip/voip_volume_control.h            | 57 +++++++++++++++++++++++
 audio/voip/audio_channel.cc               |  2 +-
 audio/voip/audio_channel.h                | 22 +++++++++
 audio/voip/audio_egress.cc                |  6 +++
 audio/voip/audio_egress.h                 | 14 ++++++
 audio/voip/audio_ingress.h                | 13 ++----
 audio/voip/test/audio_egress_unittest.cc  | 35 +++++++++++++-
 audio/voip/test/audio_ingress_unittest.cc | 21 ++++++---
 audio/voip/voip_core.cc                   | 31 ++++++++++++
 audio/voip/voip_core.h                    | 10 +++-
 12 files changed, 198 insertions(+), 18 deletions(-)
 create mode 100644 api/voip/voip_volume_control.h

diff --git a/api/voip/BUILD.gn b/api/voip/BUILD.gn
index a62dd14207..c099bfbfaf 100644
--- a/api/voip/BUILD.gn
+++ b/api/voip/BUILD.gn
@@ -17,6 +17,7 @@ rtc_source_set("voip_api") {
     "voip_engine.h",
     "voip_network.h",
     "voip_statistics.h",
+    "voip_volume_control.h",
   ]
   deps = [
     "..:array_view",
diff --git a/api/voip/voip_engine.h b/api/voip/voip_engine.h
index 5724b6b5d9..69c0a8504f 100644
--- a/api/voip/voip_engine.h
+++ b/api/voip/voip_engine.h
@@ -18,6 +18,7 @@ class VoipCodec;
 class VoipNetwork;
 class VoipDtmf;
 class VoipStatistics;
+class VoipVolumeControl;
 
 // VoipEngine is the main interface serving as the entry point for all VoIP
 // APIs. A single instance of VoipEngine should suffice the most of the need for
@@ -89,6 +90,9 @@ class VoipEngine {
   // VoipStatistics provides performance metrics around audio decoding module
   // and jitter buffer (NetEq).
   virtual VoipStatistics& Statistics() = 0;
+
+  // VoipVolumeControl provides various input/output volume control.
+  virtual VoipVolumeControl& VolumeControl() = 0;
 };
 
 }  // namespace webrtc
diff --git a/api/voip/voip_volume_control.h b/api/voip/voip_volume_control.h
new file mode 100644
index 0000000000..54e446715e
--- /dev/null
+++ b/api/voip/voip_volume_control.h
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_VOIP_VOIP_VOLUME_CONTROL_H_
+#define API_VOIP_VOIP_VOLUME_CONTROL_H_
+
+#include "api/voip/voip_base.h"
+
+namespace webrtc {
+
+struct VolumeInfo {
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-audiolevel
+  double audio_level = 0;
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalaudioenergy
+  double total_energy = 0.0;
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalsamplesduration
+  double total_duration = 0.0;
+};
+
+// VoipVolumeControl interface.
+//
+// This sub-API supports functions related to the input (microphone) and output
+// (speaker) device.
+//
+// Caller must ensure that ChannelId is valid otherwise it will result in no-op
+// with error logging.
+class VoipVolumeControl {
+ public:
+  // Mute/unmutes the microphone input sample before encoding process. Note that
+  // mute doesn't affect audio input level and energy values as input sample is
+  // silenced after the measurement.
+  virtual void SetInputMuted(ChannelId channel_id, bool enable) = 0;
+
+  // Gets the microphone volume info.
+  // Returns absl::nullopt if |channel_id| is invalid.
+  virtual absl::optional<VolumeInfo> GetInputVolumeInfo(
+      ChannelId channel_id) = 0;
+
+  // Gets the speaker volume info.
+  // Returns absl::nullopt if |channel_id| is invalid.
+  virtual absl::optional<VolumeInfo> GetOutputVolumeInfo(
+      ChannelId channel_id) = 0;
+
+ protected:
+  virtual ~VoipVolumeControl() = default;
+};
+
+}  // namespace webrtc
+
+#endif  // API_VOIP_VOIP_VOLUME_CONTROL_H_
diff --git a/audio/voip/audio_channel.cc b/audio/voip/audio_channel.cc
index 926130dc7e..dc53acf3ad 100644
--- a/audio/voip/audio_channel.cc
+++ b/audio/voip/audio_channel.cc
@@ -155,7 +155,7 @@ IngressStatistics AudioChannel::GetIngressStatistics() {
   ingress_stats.neteq_stats.interruption_count = stats.interruptionCount;
   ingress_stats.neteq_stats.total_interruption_duration_ms =
       stats.totalInterruptionDurationMs;
-  ingress_stats.total_duration = ingress_->GetTotalDuration();
+  ingress_stats.total_duration = ingress_->GetOutputTotalDuration();
   return ingress_stats;
 }
 
diff --git a/audio/voip/audio_channel.h b/audio/voip/audio_channel.h
index a8946a7aa6..5bc7483591 100644
--- a/audio/voip/audio_channel.h
+++ b/audio/voip/audio_channel.h
@@ -70,6 +70,7 @@ class AudioChannel : public rtc::RefCountInterface {
   bool SendTelephoneEvent(int dtmf_event, int duration_ms) {
     return egress_->SendTelephoneEvent(dtmf_event, duration_ms);
   }
+  void SetMute(bool enable) { egress_->SetMute(enable); }
 
   // APIs relayed to AudioIngress.
   bool IsPlaying() const { return ingress_->IsPlaying(); }
@@ -84,6 +85,27 @@ class AudioChannel : public rtc::RefCountInterface {
   }
   IngressStatistics GetIngressStatistics();
 
+  // See comments on the methods used from AudioEgress and AudioIngress.
+  // Conversion to double is following what is done in
+  // DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be
+  // consistent.
+  double GetInputAudioLevel() const {
+    return egress_->GetInputAudioLevel() / 32767.0;
+  }
+  double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); }
+  double GetInputTotalDuration() const {
+    return egress_->GetInputTotalDuration();
+  }
+  double GetOutputAudioLevel() const {
+    return ingress_->GetOutputAudioLevel() / 32767.0;
+  }
+  double GetOutputTotalEnergy() const {
+    return ingress_->GetOutputTotalEnergy();
+  }
+  double GetOutputTotalDuration() const {
+    return ingress_->GetOutputTotalDuration();
+  }
+
  private:
   // ChannelId that this audio channel belongs for logging purpose.
   ChannelId id_;
diff --git a/audio/voip/audio_egress.cc b/audio/voip/audio_egress.cc
index 90e069e1cc..1162824c9e 100644
--- a/audio/voip/audio_egress.cc
+++ b/audio/voip/audio_egress.cc
@@ -80,6 +80,12 @@ void AudioEgress::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) {
           return;
         }
 
+        double duration_seconds =
+            static_cast<double>(audio_frame->samples_per_channel_) /
+            audio_frame->sample_rate_hz_;
+
+        input_audio_level_.ComputeLevel(*audio_frame, duration_seconds);
+
         AudioFrameOperations::Mute(audio_frame.get(),
                                    encoder_context_.previously_muted_,
                                    encoder_context_.mute_);
diff --git a/audio/voip/audio_egress.h b/audio/voip/audio_egress.h
index 6b2d374717..d9ae4f3e04 100644
--- a/audio/voip/audio_egress.h
+++ b/audio/voip/audio_egress.h
@@ -16,6 +16,7 @@
 
 #include "api/audio_codecs/audio_format.h"
 #include "api/task_queue/task_queue_factory.h"
+#include "audio/audio_level.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "call/audio_sender.h"
 #include "modules/audio_coding/include/audio_coding_module.h"
@@ -89,6 +90,16 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
   // otherwise false when the dtmf queue reached maximum of 20 events.
   bool SendTelephoneEvent(int dtmf_event, int duration_ms);
 
+  // See comments on LevelFullRange, TotalEnergy, TotalDuration from
+  // audio/audio_level.h.
+  int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); }
+  double GetInputTotalEnergy() const {
+    return input_audio_level_.TotalEnergy();
+  }
+  double GetInputTotalDuration() const {
+    return input_audio_level_.TotalDuration();
+  }
+
   // Implementation of AudioSender interface.
   void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override;
 
@@ -137,6 +148,9 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
   // Defined last to ensure that there are no running tasks when the other
   // members are destroyed.
   rtc::TaskQueue encoder_queue_;
+
+  // Synchronizaton is handled internally by voe::AudioLevel.
+  voe::AudioLevel input_audio_level_;
 };
 
 }  // namespace webrtc
diff --git a/audio/voip/audio_ingress.h b/audio/voip/audio_ingress.h
index acb84c0b94..d3680e0f00 100644
--- a/audio/voip/audio_ingress.h
+++ b/audio/voip/audio_ingress.h
@@ -68,16 +68,13 @@ class AudioIngress : public AudioMixer::Source {
   void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet);
   void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet);
 
-  // Retrieve highest speech output level in last 100 ms.  Note that
-  // this isn't RMS but absolute raw audio level on int16_t sample unit.
-  // Therefore, the return value will vary between 0 ~ 0xFFFF. This type of
-  // value may be useful to be used for measuring active speaker gauge.
-  int GetSpeechOutputLevelFullRange() const {
+  // See comments on LevelFullRange, TotalEnergy, TotalDuration from
+  // audio/audio_level.h.
+  int GetOutputAudioLevel() const {
     return output_audio_level_.LevelFullRange();
   }
-  // Retrieves the total duration for all samples played so far as explained in
-  // audio/AudioLevel.h.
-  double GetTotalDuration() const {
+  double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); }
+  double GetOutputTotalDuration() {
     return output_audio_level_.TotalDuration();
   }
 
diff --git a/audio/voip/test/audio_egress_unittest.cc b/audio/voip/test/audio_egress_unittest.cc
index 70fb6dcf36..0692ef2df4 100644
--- a/audio/voip/test/audio_egress_unittest.cc
+++ b/audio/voip/test/audio_egress_unittest.cc
@@ -43,12 +43,13 @@ std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock,
   return rtp_rtcp;
 }
 
+constexpr int16_t kAudioLevel = 3004;  // Used for sine wave level.
+
 // AudioEgressTest configures audio egress by using Rtp Stack, fake clock,
 // and task queue factory.  Encoder factory is needed to create codec and
 // configure the RTP stack in audio egress.
 class AudioEgressTest : public ::testing::Test {
  public:
-  static constexpr int16_t kAudioLevel = 3004;  // Used for sine wave level.
   static constexpr uint16_t kSeqNum = 12345;
   static constexpr uint64_t kStartTime = 123456789;
   static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF;
@@ -286,5 +287,37 @@ TEST_F(AudioEgressTest, SendDTMF) {
   EXPECT_EQ(dtmf_count, kExpected);
 }
 
+TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) {
+  // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
+  // get audio level from input source.
+  constexpr int kExpected = 6;
+  rtc::Event event;
+  int rtp_count = 0;
+  auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
+    if (++rtp_count == kExpected) {
+      event.Set();
+    }
+    return true;
+  };
+
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
+
+  // Two 10 ms audio frames will result in rtp packet with ptime 20.
+  for (size_t i = 0; i < kExpected * 2; i++) {
+    egress_->SendAudioData(GetAudioFrame(i));
+    fake_clock_.AdvanceTimeMilliseconds(10);
+  }
+
+  event.Wait(/*give_up_after_ms=*/1000);
+  EXPECT_EQ(rtp_count, kExpected);
+
+  constexpr double kExpectedEnergy = 0.00016809565587789564;
+  constexpr double kExpectedDuration = 0.11999999999999998;
+
+  EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel);
+  EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy);
+  EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration);
+}
+
 }  // namespace
 }  // namespace webrtc
diff --git a/audio/voip/test/audio_ingress_unittest.cc b/audio/voip/test/audio_ingress_unittest.cc
index 01b4d67dad..55ecfec695 100644
--- a/audio/voip/test/audio_ingress_unittest.cc
+++ b/audio/voip/test/audio_ingress_unittest.cc
@@ -134,9 +134,10 @@ TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) {
   EXPECT_EQ(audio_frame.elapsed_time_ms_, 0);
 }
 
-TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
-  // Per audio_level's kUpdateFrequency, we need 11 RTP to get audio level.
-  constexpr int kNumRtp = 11;
+TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) {
+  // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
+  // get audio level from output source.
+  constexpr int kNumRtp = 6;
   int rtp_count = 0;
   rtc::Event event;
   auto handle_rtp = [&](const uint8_t* packet, size_t length, Unused) {
@@ -151,15 +152,21 @@ TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
     egress_->SendAudioData(GetAudioFrame(i));
     fake_clock_.AdvanceTimeMilliseconds(10);
   }
-  event.Wait(/*ms=*/1000);
+  event.Wait(/*give_up_after_ms=*/1000);
 
-  for (int i = 0; i < kNumRtp; ++i) {
+  for (int i = 0; i < kNumRtp * 2; ++i) {
     AudioFrame audio_frame;
     EXPECT_EQ(
         ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame),
         AudioMixer::Source::AudioFrameInfo::kNormal);
   }
-  EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
+  EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
+
+  constexpr double kExpectedEnergy = 0.00016809565587789564;
+  constexpr double kExpectedDuration = 0.11999999999999998;
+
+  EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy);
+  EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration);
 }
 
 TEST_F(AudioIngressTest, PreferredSampleRate) {
@@ -221,7 +228,7 @@ TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) {
 
   // Now we should still see valid speech output level as StopPlay won't affect
   // the measurement.
-  EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
+  EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
 }
 
 }  // namespace
diff --git a/audio/voip/voip_core.cc b/audio/voip/voip_core.cc
index 92b80b5b71..ac29fbf6d8 100644
--- a/audio/voip/voip_core.cc
+++ b/audio/voip/voip_core.cc
@@ -420,4 +420,35 @@ absl::optional<IngressStatistics> VoipCore::GetIngressStatistics(
   return absl::nullopt;
 }
 
+void VoipCore::SetInputMuted(ChannelId channel_id, bool enable) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    channel->SetMute(enable);
+  }
+}
+
+absl::optional<VolumeInfo> VoipCore::GetInputVolumeInfo(ChannelId channel_id) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    VolumeInfo input_volume;
+    input_volume.audio_level = channel->GetInputAudioLevel();
+    input_volume.total_energy = channel->GetInputTotalEnergy();
+    input_volume.total_duration = channel->GetInputTotalDuration();
+    return input_volume;
+  }
+  return absl::nullopt;
+}
+
+absl::optional<VolumeInfo> VoipCore::GetOutputVolumeInfo(ChannelId channel_id) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    VolumeInfo output_volume;
+    output_volume.audio_level = channel->GetOutputAudioLevel();
+    output_volume.total_energy = channel->GetOutputTotalEnergy();
+    output_volume.total_duration = channel->GetOutputTotalDuration();
+    return output_volume;
+  }
+  return absl::nullopt;
+}
+
 }  // namespace webrtc
diff --git a/audio/voip/voip_core.h b/audio/voip/voip_core.h
index 4279f770d9..5ebf4381cc 100644
--- a/audio/voip/voip_core.h
+++ b/audio/voip/voip_core.h
@@ -27,6 +27,7 @@
 #include "api/voip/voip_engine.h"
 #include "api/voip/voip_network.h"
 #include "api/voip/voip_statistics.h"
+#include "api/voip/voip_volume_control.h"
 #include "audio/audio_transport_impl.h"
 #include "audio/voip/audio_channel.h"
 #include "modules/audio_device/include/audio_device.h"
@@ -49,7 +50,8 @@ class VoipCore : public VoipEngine,
                  public VoipNetwork,
                  public VoipCodec,
                  public VoipDtmf,
-                 public VoipStatistics {
+                 public VoipStatistics,
+                 public VoipVolumeControl {
  public:
   // Construct VoipCore with provided arguments.
   // ProcessThread implementation can be injected by |process_thread|
@@ -69,6 +71,7 @@ class VoipCore : public VoipEngine,
   VoipCodec& Codec() override { return *this; }
   VoipDtmf& Dtmf() override { return *this; }
   VoipStatistics& Statistics() override { return *this; }
+  VoipVolumeControl& VolumeControl() override { return *this; }
 
   // Implements VoipBase interfaces.
   absl::optional<ChannelId> CreateChannel(
@@ -106,6 +109,11 @@ class VoipCore : public VoipEngine,
   absl::optional<IngressStatistics> GetIngressStatistics(
       ChannelId channel_id) override;
 
+  // Implements VoipVolumeControl interfaces.
+  void SetInputMuted(ChannelId channel_id, bool enable) override;
+  absl::optional<VolumeInfo> GetInputVolumeInfo(ChannelId channel_id) override;
+  absl::optional<VolumeInfo> GetOutputVolumeInfo(ChannelId channel_id) override;
+
  private:
   // Initialize ADM and default audio device if needed.
   // Returns true if ADM is successfully initialized or already in such state