VoipVolumeControl subAPI for VoIP API

- mute/unmute API. - speech level/energy/duration API. Bug: webrtc:12111 Change-Id: I54757b9874d15d59a145f2ca70801ee9ef0f4430 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191060 Commit-Queue: Tim Na <natim@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32607}
2020-11-13 11:07:43 -08:00 · 2020-11-13 11:07:43 -08:00 · a58cae3eae
commit a58cae3eae
parent f42a94a7f5
12 changed files with 198 additions and 18 deletions
--- a/api/voip/BUILD.gn
+++ b/api/voip/BUILD.gn
@ -17,6 +17,7 @@ rtc_source_set("voip_api") {
    "voip_engine.h",
    "voip_network.h",
    "voip_statistics.h",
+    "voip_volume_control.h",
  ]
  deps = [
    "..:array_view",
--- a/api/voip/voip_engine.h
+++ b/api/voip/voip_engine.h
@ -18,6 +18,7 @@ class VoipCodec;
 class VoipNetwork;
 class VoipDtmf;
 class VoipStatistics;
+class VoipVolumeControl;

 // VoipEngine is the main interface serving as the entry point for all VoIP
 // APIs. A single instance of VoipEngine should suffice the most of the need for
@ -89,6 +90,9 @@ class VoipEngine {
  // VoipStatistics provides performance metrics around audio decoding module
  // and jitter buffer (NetEq).
  virtual VoipStatistics& Statistics() = 0;
+
+  // VoipVolumeControl provides various input/output volume control.
+  virtual VoipVolumeControl& VolumeControl() = 0;
 };

 }  // namespace webrtc
--- a/api/voip/voip_volume_control.h
+++ b/api/voip/voip_volume_control.h
@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_VOIP_VOIP_VOLUME_CONTROL_H_
+#define API_VOIP_VOIP_VOLUME_CONTROL_H_
+
+#include "api/voip/voip_base.h"
+
+namespace webrtc {
+
+struct VolumeInfo {
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-audiolevel
+  double audio_level = 0;
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalaudioenergy
+  double total_energy = 0.0;
+  // https://w3c.github.io/webrtc-stats/#dom-rtcaudiosourcestats-totalsamplesduration
+  double total_duration = 0.0;
+};
+
+// VoipVolumeControl interface.
+//
+// This sub-API supports functions related to the input (microphone) and output
+// (speaker) device.
+//
+// Caller must ensure that ChannelId is valid otherwise it will result in no-op
+// with error logging.
+class VoipVolumeControl {
+ public:
+  // Mute/unmutes the microphone input sample before encoding process. Note that
+  // mute doesn't affect audio input level and energy values as input sample is
+  // silenced after the measurement.
+  virtual void SetInputMuted(ChannelId channel_id, bool enable) = 0;
+
+  // Gets the microphone volume info.
+  // Returns absl::nullopt if |channel_id| is invalid.
+  virtual absl::optional<VolumeInfo> GetInputVolumeInfo(
+      ChannelId channel_id) = 0;
+
+  // Gets the speaker volume info.
+  // Returns absl::nullopt if |channel_id| is invalid.
+  virtual absl::optional<VolumeInfo> GetOutputVolumeInfo(
+      ChannelId channel_id) = 0;
+
+ protected:
+  virtual ~VoipVolumeControl() = default;
+};
+
+}  // namespace webrtc
+
+#endif  // API_VOIP_VOIP_VOLUME_CONTROL_H_
--- a/audio/voip/audio_channel.cc
+++ b/audio/voip/audio_channel.cc
@ -155,7 +155,7 @@ IngressStatistics AudioChannel::GetIngressStatistics() {
  ingress_stats.neteq_stats.interruption_count = stats.interruptionCount;
  ingress_stats.neteq_stats.total_interruption_duration_ms =
      stats.totalInterruptionDurationMs;
-  ingress_stats.total_duration = ingress_->GetTotalDuration();
+  ingress_stats.total_duration = ingress_->GetOutputTotalDuration();
  return ingress_stats;
 }

--- a/audio/voip/audio_channel.h
+++ b/audio/voip/audio_channel.h
@ -70,6 +70,7 @@ class AudioChannel : public rtc::RefCountInterface {
  bool SendTelephoneEvent(int dtmf_event, int duration_ms) {
    return egress_->SendTelephoneEvent(dtmf_event, duration_ms);
  }
+  void SetMute(bool enable) { egress_->SetMute(enable); }

  // APIs relayed to AudioIngress.
  bool IsPlaying() const { return ingress_->IsPlaying(); }
@ -84,6 +85,27 @@ class AudioChannel : public rtc::RefCountInterface {
  }
  IngressStatistics GetIngressStatistics();

+  // See comments on the methods used from AudioEgress and AudioIngress.
+  // Conversion to double is following what is done in
+  // DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be
+  // consistent.
+  double GetInputAudioLevel() const {
+    return egress_->GetInputAudioLevel() / 32767.0;
+  }
+  double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); }
+  double GetInputTotalDuration() const {
+    return egress_->GetInputTotalDuration();
+  }
+  double GetOutputAudioLevel() const {
+    return ingress_->GetOutputAudioLevel() / 32767.0;
+  }
+  double GetOutputTotalEnergy() const {
+    return ingress_->GetOutputTotalEnergy();
+  }
+  double GetOutputTotalDuration() const {
+    return ingress_->GetOutputTotalDuration();
+  }
+
 private:
  // ChannelId that this audio channel belongs for logging purpose.
  ChannelId id_;
--- a/audio/voip/audio_egress.cc
+++ b/audio/voip/audio_egress.cc
@ -80,6 +80,12 @@ void AudioEgress::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) {
          return;
        }

+        double duration_seconds =
+            static_cast<double>(audio_frame->samples_per_channel_) /
+            audio_frame->sample_rate_hz_;
+
+        input_audio_level_.ComputeLevel(*audio_frame, duration_seconds);
+
        AudioFrameOperations::Mute(audio_frame.get(),
                                   encoder_context_.previously_muted_,
                                   encoder_context_.mute_);
--- a/audio/voip/audio_egress.h
+++ b/audio/voip/audio_egress.h
@ -16,6 +16,7 @@

 #include "api/audio_codecs/audio_format.h"
 #include "api/task_queue/task_queue_factory.h"
+#include "audio/audio_level.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "call/audio_sender.h"
 #include "modules/audio_coding/include/audio_coding_module.h"
@ -89,6 +90,16 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
  // otherwise false when the dtmf queue reached maximum of 20 events.
  bool SendTelephoneEvent(int dtmf_event, int duration_ms);

+  // See comments on LevelFullRange, TotalEnergy, TotalDuration from
+  // audio/audio_level.h.
+  int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); }
+  double GetInputTotalEnergy() const {
+    return input_audio_level_.TotalEnergy();
+  }
+  double GetInputTotalDuration() const {
+    return input_audio_level_.TotalDuration();
+  }
+
  // Implementation of AudioSender interface.
  void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override;

@ -137,6 +148,9 @@ class AudioEgress : public AudioSender, public AudioPacketizationCallback {
  // Defined last to ensure that there are no running tasks when the other
  // members are destroyed.
  rtc::TaskQueue encoder_queue_;
+
+  // Synchronizaton is handled internally by voe::AudioLevel.
+  voe::AudioLevel input_audio_level_;
 };

 }  // namespace webrtc
--- a/audio/voip/audio_ingress.h
+++ b/audio/voip/audio_ingress.h
@ -68,16 +68,13 @@ class AudioIngress : public AudioMixer::Source {
  void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet);
  void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet);

-  // Retrieve highest speech output level in last 100 ms.  Note that
-  // this isn't RMS but absolute raw audio level on int16_t sample unit.
-  // Therefore, the return value will vary between 0 ~ 0xFFFF. This type of
-  // value may be useful to be used for measuring active speaker gauge.
-  int GetSpeechOutputLevelFullRange() const {
+  // See comments on LevelFullRange, TotalEnergy, TotalDuration from
+  // audio/audio_level.h.
+  int GetOutputAudioLevel() const {
    return output_audio_level_.LevelFullRange();
  }
-  // Retrieves the total duration for all samples played so far as explained in
-  // audio/AudioLevel.h.
-  double GetTotalDuration() const {
+  double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); }
+  double GetOutputTotalDuration() {
    return output_audio_level_.TotalDuration();
  }

--- a/audio/voip/test/audio_egress_unittest.cc
+++ b/audio/voip/test/audio_egress_unittest.cc
@ -43,12 +43,13 @@ std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock,
  return rtp_rtcp;
 }

+constexpr int16_t kAudioLevel = 3004;  // Used for sine wave level.
+
 // AudioEgressTest configures audio egress by using Rtp Stack, fake clock,
 // and task queue factory.  Encoder factory is needed to create codec and
 // configure the RTP stack in audio egress.
 class AudioEgressTest : public ::testing::Test {
 public:
-  static constexpr int16_t kAudioLevel = 3004;  // Used for sine wave level.
  static constexpr uint16_t kSeqNum = 12345;
  static constexpr uint64_t kStartTime = 123456789;
  static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF;
@ -286,5 +287,37 @@ TEST_F(AudioEgressTest, SendDTMF) {
  EXPECT_EQ(dtmf_count, kExpected);
 }

+TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) {
+  // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
+  // get audio level from input source.
+  constexpr int kExpected = 6;
+  rtc::Event event;
+  int rtp_count = 0;
+  auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
+    if (++rtp_count == kExpected) {
+      event.Set();
+    }
+    return true;
+  };
+
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
+
+  // Two 10 ms audio frames will result in rtp packet with ptime 20.
+  for (size_t i = 0; i < kExpected * 2; i++) {
+    egress_->SendAudioData(GetAudioFrame(i));
+    fake_clock_.AdvanceTimeMilliseconds(10);
+  }
+
+  event.Wait(/*give_up_after_ms=*/1000);
+  EXPECT_EQ(rtp_count, kExpected);
+
+  constexpr double kExpectedEnergy = 0.00016809565587789564;
+  constexpr double kExpectedDuration = 0.11999999999999998;
+
+  EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel);
+  EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy);
+  EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration);
+}
+
 }  // namespace
 }  // namespace webrtc
--- a/audio/voip/test/audio_ingress_unittest.cc
+++ b/audio/voip/test/audio_ingress_unittest.cc
@ -134,9 +134,10 @@ TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) {
  EXPECT_EQ(audio_frame.elapsed_time_ms_, 0);
 }

-TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
-  // Per audio_level's kUpdateFrequency, we need 11 RTP to get audio level.
-  constexpr int kNumRtp = 11;
+TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) {
+  // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
+  // get audio level from output source.
+  constexpr int kNumRtp = 6;
  int rtp_count = 0;
  rtc::Event event;
  auto handle_rtp = [&](const uint8_t* packet, size_t length, Unused) {
@ -151,15 +152,21 @@ TEST_F(AudioIngressTest, GetSpeechOutputLevelFullRange) {
    egress_->SendAudioData(GetAudioFrame(i));
    fake_clock_.AdvanceTimeMilliseconds(10);
  }
-  event.Wait(/*ms=*/1000);
+  event.Wait(/*give_up_after_ms=*/1000);

-  for (int i = 0; i < kNumRtp; ++i) {
+  for (int i = 0; i < kNumRtp * 2; ++i) {
    AudioFrame audio_frame;
    EXPECT_EQ(
        ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame),
        AudioMixer::Source::AudioFrameInfo::kNormal);
  }
-  EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
+  EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
+
+  constexpr double kExpectedEnergy = 0.00016809565587789564;
+  constexpr double kExpectedDuration = 0.11999999999999998;
+
+  EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy);
+  EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration);
 }

 TEST_F(AudioIngressTest, PreferredSampleRate) {
@ -221,7 +228,7 @@ TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) {

  // Now we should still see valid speech output level as StopPlay won't affect
  // the measurement.
-  EXPECT_EQ(ingress_->GetSpeechOutputLevelFullRange(), kAudioLevel);
+  EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel);
 }

 }  // namespace
--- a/audio/voip/voip_core.cc
+++ b/audio/voip/voip_core.cc
@ -420,4 +420,35 @@ absl::optional<IngressStatistics> VoipCore::GetIngressStatistics(
  return absl::nullopt;
 }

+void VoipCore::SetInputMuted(ChannelId channel_id, bool enable) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    channel->SetMute(enable);
+  }
+}
+
+absl::optional<VolumeInfo> VoipCore::GetInputVolumeInfo(ChannelId channel_id) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    VolumeInfo input_volume;
+    input_volume.audio_level = channel->GetInputAudioLevel();
+    input_volume.total_energy = channel->GetInputTotalEnergy();
+    input_volume.total_duration = channel->GetInputTotalDuration();
+    return input_volume;
+  }
+  return absl::nullopt;
+}
+
+absl::optional<VolumeInfo> VoipCore::GetOutputVolumeInfo(ChannelId channel_id) {
+  rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id);
+  if (channel) {
+    VolumeInfo output_volume;
+    output_volume.audio_level = channel->GetOutputAudioLevel();
+    output_volume.total_energy = channel->GetOutputTotalEnergy();
+    output_volume.total_duration = channel->GetOutputTotalDuration();
+    return output_volume;
+  }
+  return absl::nullopt;
+}
+
 }  // namespace webrtc
--- a/audio/voip/voip_core.h
+++ b/audio/voip/voip_core.h
@ -27,6 +27,7 @@
 #include "api/voip/voip_engine.h"
 #include "api/voip/voip_network.h"
 #include "api/voip/voip_statistics.h"
+#include "api/voip/voip_volume_control.h"
 #include "audio/audio_transport_impl.h"
 #include "audio/voip/audio_channel.h"
 #include "modules/audio_device/include/audio_device.h"
@ -49,7 +50,8 @@ class VoipCore : public VoipEngine,
                 public VoipNetwork,
                 public VoipCodec,
                 public VoipDtmf,
-                 public VoipStatistics {
+                 public VoipStatistics,
+                 public VoipVolumeControl {
 public:
  // Construct VoipCore with provided arguments.
  // ProcessThread implementation can be injected by |process_thread|
@ -69,6 +71,7 @@ class VoipCore : public VoipEngine,
  VoipCodec& Codec() override { return *this; }
  VoipDtmf& Dtmf() override { return *this; }
  VoipStatistics& Statistics() override { return *this; }
+  VoipVolumeControl& VolumeControl() override { return *this; }

  // Implements VoipBase interfaces.
  absl::optional<ChannelId> CreateChannel(
@ -106,6 +109,11 @@ class VoipCore : public VoipEngine,
  absl::optional<IngressStatistics> GetIngressStatistics(
      ChannelId channel_id) override;

+  // Implements VoipVolumeControl interfaces.
+  void SetInputMuted(ChannelId channel_id, bool enable) override;
+  absl::optional<VolumeInfo> GetInputVolumeInfo(ChannelId channel_id) override;
+  absl::optional<VolumeInfo> GetOutputVolumeInfo(ChannelId channel_id) override;
+
 private:
  // Initialize ADM and default audio device if needed.
  // Returns true if ADM is successfully initialized or already in such state