Adding stats that can be used to compute output audio levels as described here https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy.

BUG=webrtc:7982 Review-Url: https://codereview.webrtc.org/2964593002 Cr-Commit-Position: refs/heads/master@{#19027}
2017-07-14 12:17:49 -07:00 · 2017-07-14 12:17:49 -07:00 · e76bd3aa43
commit e76bd3aa43
parent 06f3aae345
23 changed files with 198 additions and 22 deletions
--- a/webrtc/api/stats/rtcstats_objects.h
+++ b/webrtc/api/stats/rtcstats_objects.h
@ -272,6 +272,8 @@ class RTCMediaStreamTrackStats final : public RTCStats {
  RTCStatsMember<uint32_t> full_frames_lost;
  // Audio-only members
  RTCStatsMember<double> audio_level;
+  RTCStatsMember<double> total_audio_energy;
+  RTCStatsMember<double> total_samples_duration;
  RTCStatsMember<double> echo_return_loss;
  RTCStatsMember<double> echo_return_loss_enhancement;
 };
--- a/webrtc/api/statstypes.cc
+++ b/webrtc/api/statstypes.cc
@ -594,6 +594,10 @@ const char* StatsReport::Value::display_name() const {
      return "srtpCipher";
    case kStatsValueNameTargetEncBitrate:
      return "googTargetEncBitrate";
+    case kStatsValueNameTotalAudioEnergy:
+      return "totalAudioEnergy";
+    case kStatsValueNameTotalSamplesDuration:
+      return "totalSamplesDuration";
    case kStatsValueNameTransmitBitrate:
      return "googTransmitBitrate";
    case kStatsValueNameTransportType:
--- a/webrtc/api/statstypes.h
+++ b/webrtc/api/statstypes.h
@ -118,6 +118,8 @@ class StatsReport {
    kStatsValueNameSelectedCandidatePairId,
    kStatsValueNameSsrc,
    kStatsValueNameState,
+    kStatsValueNameTotalAudioEnergy,
+    kStatsValueNameTotalSamplesDuration,
    kStatsValueNameTransportId,
    kStatsValueNameSentPingRequestsTotal,
    kStatsValueNameSentPingRequestsBeforeFirstResponse,
--- a/webrtc/audio/audio_receive_stream.cc
+++ b/webrtc/audio/audio_receive_stream.cc
@ -187,6 +187,8 @@ webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const {
  }
  stats.delay_estimate_ms = channel_proxy_->GetDelayEstimate();
  stats.audio_level = channel_proxy_->GetSpeechOutputLevelFullRange();
+  stats.total_output_energy = channel_proxy_->GetTotalOutputEnergy();
+  stats.total_output_duration = channel_proxy_->GetTotalOutputDuration();

  // Get jitter buffer and total delay (alg + jitter + playout) stats.
  auto ns = channel_proxy_->GetNetworkStatistics();
--- a/webrtc/audio/audio_receive_stream_unittest.cc
+++ b/webrtc/audio/audio_receive_stream_unittest.cc
@ -57,6 +57,9 @@ const int kTransportSequenceNumberId = 4;
 const int kJitterBufferDelay = -7;
 const int kPlayoutBufferDelay = 302;
 const unsigned int kSpeechOutputLevel = 99;
+const double kTotalOutputEnergy = 0.25;
+const double kTotalOutputDuration = 0.5;
+
 const CallStatistics kCallStats = {
    345,  678,  901, 234, -12, 3456, 7890, 567, 890, 123};
 const CodecInst kCodecInst = {
@ -154,6 +157,10 @@ struct ConfigHelper {
        .WillOnce(Return(kJitterBufferDelay + kPlayoutBufferDelay));
    EXPECT_CALL(*channel_proxy_, GetSpeechOutputLevelFullRange())
        .WillOnce(Return(kSpeechOutputLevel));
+    EXPECT_CALL(*channel_proxy_, GetTotalOutputEnergy())
+        .WillOnce(Return(kTotalOutputEnergy));
+    EXPECT_CALL(*channel_proxy_, GetTotalOutputDuration())
+        .WillOnce(Return(kTotalOutputDuration));
    EXPECT_CALL(*channel_proxy_, GetNetworkStatistics())
        .WillOnce(Return(kNetworkStats));
    EXPECT_CALL(*channel_proxy_, GetDecodingCallStatistics())
@ -310,6 +317,8 @@ TEST(AudioReceiveStreamTest, GetStats) {
  EXPECT_EQ(static_cast<uint32_t>(kJitterBufferDelay + kPlayoutBufferDelay),
            stats.delay_estimate_ms);
  EXPECT_EQ(static_cast<int32_t>(kSpeechOutputLevel), stats.audio_level);
+  EXPECT_EQ(kTotalOutputEnergy, stats.total_output_energy);
+  EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration);
  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate);
  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate),
            stats.speech_expand_rate);
--- a/webrtc/audio/audio_send_stream.cc
+++ b/webrtc/audio/audio_send_stream.cc
@ -279,6 +279,9 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
  stats.audio_level = base->transmit_mixer()->AudioLevelFullRange();
  RTC_DCHECK_LE(0, stats.audio_level);

+  stats.total_input_energy = base->transmit_mixer()->GetTotalInputEnergy();
+  stats.total_input_duration = base->transmit_mixer()->GetTotalInputDuration();
+
  RTC_DCHECK(audio_state_->audio_processing());
  auto audio_processing_stats =
      audio_state_->audio_processing()->GetStatistics();
--- a/webrtc/audio/audio_send_stream_unittest.cc
+++ b/webrtc/audio/audio_send_stream_unittest.cc
@ -55,6 +55,8 @@ const int kEchoReturnLoss = -65;
 const int kEchoReturnLossEnhancement = 101;
 const float kResidualEchoLikelihood = -1.0f;
 const int32_t kSpeechInputLevel = 96;
+const double kTotalInputEnergy = 0.25;
+const double kTotalInputDuration = 0.5;
 const CallStatistics kCallStats = {
    1345,  1678,  1901, 1234,  112, 13456, 17890, 1567, -1890, -1123};
 const ReportBlock kReportBlock = {456, 780, 123, 567, 890, 132, 143, 13354};
@ -82,6 +84,8 @@ class MockLimitObserver : public BitrateAllocator::LimitObserver {
 class MockTransmitMixer : public voe::TransmitMixer {
 public:
  MOCK_CONST_METHOD0(AudioLevelFullRange, int16_t());
+  MOCK_CONST_METHOD0(GetTotalInputEnergy, double());
+  MOCK_CONST_METHOD0(GetTotalInputDuration, double());
 };

 std::unique_ptr<MockAudioEncoder> SetupAudioEncoderMock(
@ -286,6 +290,10 @@ struct ConfigHelper {

    EXPECT_CALL(transmit_mixer_, AudioLevelFullRange())
        .WillRepeatedly(Return(kSpeechInputLevel));
+    EXPECT_CALL(transmit_mixer_, GetTotalInputEnergy())
+        .WillRepeatedly(Return(kTotalInputEnergy));
+    EXPECT_CALL(transmit_mixer_, GetTotalInputDuration())
+        .WillRepeatedly(Return(kTotalInputDuration));

    // We have to set the instantaneous value, the average, min and max. We only
    // care about the instantaneous value, so we set all to the same value.
@ -420,6 +428,8 @@ TEST(AudioSendStreamTest, GetStats) {
            stats.jitter_ms);
  EXPECT_EQ(kCallStats.rttMs, stats.rtt_ms);
  EXPECT_EQ(static_cast<int32_t>(kSpeechInputLevel), stats.audio_level);
+  EXPECT_EQ(kTotalInputEnergy, stats.total_input_energy);
+  EXPECT_EQ(kTotalInputDuration, stats.total_input_duration);
  EXPECT_EQ(-1, stats.aec_quality_min);
  EXPECT_EQ(kEchoDelayMedian, stats.echo_delay_median_ms);
  EXPECT_EQ(kEchoDelayStdDev, stats.echo_delay_std_ms);
--- a/webrtc/call/audio_receive_stream.h
+++ b/webrtc/call/audio_receive_stream.h
@ -49,6 +49,10 @@ class AudioReceiveStream {
    uint32_t jitter_buffer_preferred_ms = 0;
    uint32_t delay_estimate_ms = 0;
    int32_t audio_level = -1;
+    // See description of "totalAudioEnergy" in the WebRTC stats spec:
+    // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+    double total_output_energy = 0.0;
+    double total_output_duration = 0.0;
    float expand_rate = 0.0f;
    float speech_expand_rate = 0.0f;
    float secondary_decoded_rate = 0.0f;
--- a/webrtc/call/audio_send_stream.h
+++ b/webrtc/call/audio_send_stream.h
@ -47,6 +47,10 @@ class AudioSendStream {
    int32_t jitter_ms = -1;
    int64_t rtt_ms = -1;
    int32_t audio_level = -1;
+    // See description of "totalAudioEnergy" in the WebRTC stats spec:
+    // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+    double total_input_energy = 0.0;
+    double total_input_duration = 0.0;
    float aec_quality_min = -1.0f;
    int32_t echo_delay_median_ms = -1;
    int32_t echo_delay_std_ms = -1;
--- a/webrtc/media/base/mediachannel.h
+++ b/webrtc/media/base/mediachannel.h
@ -615,6 +615,8 @@ struct VoiceSenderInfo : public MediaSenderInfo {
      : ext_seqnum(0),
        jitter_ms(0),
        audio_level(0),
+        total_input_energy(0.0),
+        total_input_duration(0.0),
        aec_quality_min(0.0),
        echo_delay_median_ms(0),
        echo_delay_std_ms(0),
@ -627,6 +629,10 @@ struct VoiceSenderInfo : public MediaSenderInfo {
  int ext_seqnum;
  int jitter_ms;
  int audio_level;
+  // See description of "totalAudioEnergy" in the WebRTC stats spec:
+  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+  double total_input_energy;
+  double total_input_duration;
  float aec_quality_min;
  int echo_delay_median_ms;
  int echo_delay_std_ms;
@ -645,6 +651,8 @@ struct VoiceReceiverInfo : public MediaReceiverInfo {
        jitter_buffer_preferred_ms(0),
        delay_estimate_ms(0),
        audio_level(0),
+        total_output_energy(0.0),
+        total_output_duration(0.0),
        expand_rate(0),
        speech_expand_rate(0),
        secondary_decoded_rate(0),
@ -665,6 +673,10 @@ struct VoiceReceiverInfo : public MediaReceiverInfo {
  int jitter_buffer_preferred_ms;
  int delay_estimate_ms;
  int audio_level;
+  // See description of "totalAudioEnergy" in the WebRTC stats spec:
+  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+  double total_output_energy;
+  double total_output_duration;
  // fraction of synthesized audio inserted through expansion.
  float expand_rate;
  // fraction of synthesized speech inserted through expansion.
--- a/webrtc/media/engine/webrtcvoiceengine.cc
+++ b/webrtc/media/engine/webrtcvoiceengine.cc
@ -2248,6 +2248,8 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) {
    sinfo.jitter_ms = stats.jitter_ms;
    sinfo.rtt_ms = stats.rtt_ms;
    sinfo.audio_level = stats.audio_level;
+    sinfo.total_input_energy = stats.total_input_energy;
+    sinfo.total_input_duration = stats.total_input_duration;
    sinfo.aec_quality_min = stats.aec_quality_min;
    sinfo.echo_delay_median_ms = stats.echo_delay_median_ms;
    sinfo.echo_delay_std_ms = stats.echo_delay_std_ms;
@ -2278,6 +2280,8 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) {
    rinfo.jitter_buffer_preferred_ms = stats.jitter_buffer_preferred_ms;
    rinfo.delay_estimate_ms = stats.delay_estimate_ms;
    rinfo.audio_level = stats.audio_level;
+    rinfo.total_output_energy = stats.total_output_energy;
+    rinfo.total_output_duration = stats.total_output_duration;
    rinfo.expand_rate = stats.expand_rate;
    rinfo.speech_expand_rate = stats.speech_expand_rate;
    rinfo.secondary_decoded_rate = stats.secondary_decoded_rate;
--- a/webrtc/pc/rtcstats_integrationtest.cc
+++ b/webrtc/pc/rtcstats_integrationtest.cc
@ -475,6 +475,8 @@ class RTCStatsReportVerifier {
      verifier.TestMemberIsUndefined(media_stream_track.echo_return_loss);
      verifier.TestMemberIsUndefined(
          media_stream_track.echo_return_loss_enhancement);
+      verifier.TestMemberIsUndefined(media_stream_track.total_audio_energy);
+      verifier.TestMemberIsUndefined(media_stream_track.total_samples_duration);
    } else {
      RTC_DCHECK_EQ(*media_stream_track.kind,
                    RTCMediaStreamTrackKind::kAudio);
@ -491,6 +493,10 @@ class RTCStatsReportVerifier {
      verifier.TestMemberIsUndefined(media_stream_track.full_frames_lost);
      // Audio-only members
      verifier.TestMemberIsNonNegative<double>(media_stream_track.audio_level);
+      verifier.TestMemberIsNonNegative<double>(
+          media_stream_track.total_audio_energy);
+      verifier.TestMemberIsNonNegative<double>(
+          media_stream_track.total_samples_duration);
      // TODO(hbos): |echo_return_loss| and |echo_return_loss_enhancement| are
      // flaky on msan bot (sometimes defined, sometimes undefined). Should the
      // test run until available or is there a way to have it always be
--- a/webrtc/pc/rtcstatscollector.cc
+++ b/webrtc/pc/rtcstatscollector.cc
@ -374,6 +374,9 @@ ProduceMediaStreamTrackStatsFromVoiceSenderInfo(
    audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel(
        voice_sender_info.audio_level);
  }
+  audio_track_stats->total_audio_energy = voice_sender_info.total_input_energy;
+  audio_track_stats->total_samples_duration =
+      voice_sender_info.total_input_duration;
  if (voice_sender_info.echo_return_loss != -100) {
    audio_track_stats->echo_return_loss = static_cast<double>(
        voice_sender_info.echo_return_loss);
@ -405,6 +408,10 @@ ProduceMediaStreamTrackStatsFromVoiceReceiverInfo(
    audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel(
        voice_receiver_info.audio_level);
  }
+  audio_track_stats->total_audio_energy =
+      voice_receiver_info.total_output_energy;
+  audio_track_stats->total_samples_duration =
+      voice_receiver_info.total_output_duration;
  return audio_track_stats;
 }

--- a/webrtc/pc/rtcstatscollector_unittest.cc
+++ b/webrtc/pc/rtcstatscollector_unittest.cc
@ -1521,6 +1521,8 @@ TEST_F(RTCStatsCollectorTest,
  voice_sender_info_ssrc1.local_stats.push_back(cricket::SsrcSenderInfo());
  voice_sender_info_ssrc1.local_stats[0].ssrc = 1;
  voice_sender_info_ssrc1.audio_level = 32767;
+  voice_sender_info_ssrc1.total_input_energy = 0.25;
+  voice_sender_info_ssrc1.total_input_duration = 0.5;
  voice_sender_info_ssrc1.echo_return_loss = 42;
  voice_sender_info_ssrc1.echo_return_loss_enhancement = 52;

@ -1530,6 +1532,8 @@ TEST_F(RTCStatsCollectorTest,
  voice_sender_info_ssrc2.local_stats.push_back(cricket::SsrcSenderInfo());
  voice_sender_info_ssrc2.local_stats[0].ssrc = 2;
  voice_sender_info_ssrc2.audio_level = 0;
+  voice_sender_info_ssrc2.total_input_energy = 0.0;
+  voice_sender_info_ssrc2.total_input_duration = 0.0;
  voice_sender_info_ssrc2.echo_return_loss = -100;
  voice_sender_info_ssrc2.echo_return_loss_enhancement = -100;

@ -1544,6 +1548,8 @@ TEST_F(RTCStatsCollectorTest,
  voice_receiver_info.local_stats.push_back(cricket::SsrcReceiverInfo());
  voice_receiver_info.local_stats[0].ssrc = 3;
  voice_receiver_info.audio_level = 16383;
+  voice_receiver_info.total_output_energy = 0.125;
+  voice_receiver_info.total_output_duration = 0.25;

  test_->CreateMockRtpSendersReceiversAndChannels(
      { std::make_pair(local_audio_track.get(), voice_sender_info_ssrc1),
@ -1582,6 +1588,8 @@ TEST_F(RTCStatsCollectorTest,
  expected_local_audio_track_ssrc1.ended = true;
  expected_local_audio_track_ssrc1.detached = false;
  expected_local_audio_track_ssrc1.audio_level = 1.0;
+  expected_local_audio_track_ssrc1.total_audio_energy = 0.25;
+  expected_local_audio_track_ssrc1.total_samples_duration = 0.5;
  expected_local_audio_track_ssrc1.echo_return_loss = 42.0;
  expected_local_audio_track_ssrc1.echo_return_loss_enhancement = 52.0;
  ASSERT_TRUE(report->Get(expected_local_audio_track_ssrc1.id()));
@ -1597,6 +1605,8 @@ TEST_F(RTCStatsCollectorTest,
  expected_local_audio_track_ssrc2.ended = true;
  expected_local_audio_track_ssrc2.detached = false;
  expected_local_audio_track_ssrc2.audio_level = 0.0;
+  expected_local_audio_track_ssrc2.total_audio_energy = 0.0;
+  expected_local_audio_track_ssrc2.total_samples_duration = 0.0;
  // Should be undefined: |expected_local_audio_track_ssrc2.echo_return_loss|
  // and |expected_local_audio_track_ssrc2.echo_return_loss_enhancement|.
  ASSERT_TRUE(report->Get(expected_local_audio_track_ssrc2.id()));
@ -1612,6 +1622,8 @@ TEST_F(RTCStatsCollectorTest,
  expected_remote_audio_track.ended = false;
  expected_remote_audio_track.detached = false;
  expected_remote_audio_track.audio_level = 16383.0 / 32767.0;
+  expected_remote_audio_track.total_audio_energy = 0.125;
+  expected_remote_audio_track.total_samples_duration = 0.25;
  ASSERT_TRUE(report->Get(expected_remote_audio_track.id()));
  EXPECT_EQ(expected_remote_audio_track,
            report->Get(expected_remote_audio_track.id())->cast_to<
--- a/webrtc/pc/statscollector.cc
+++ b/webrtc/pc/statscollector.cc
@ -146,6 +146,9 @@ void ExtractStats(const cricket::VoiceReceiverInfo& info, StatsReport* report) {
    { StatsReport::kStatsValueNameAccelerateRate, info.accelerate_rate },
    { StatsReport::kStatsValueNamePreemptiveExpandRate,
      info.preemptive_expand_rate },
+    { StatsReport::kStatsValueNameTotalAudioEnergy, info.total_output_energy },
+    { StatsReport::kStatsValueNameTotalSamplesDuration,
+      info.total_output_duration }
  };

  const IntForAdd ints[] = {
@ -195,6 +198,12 @@ void ExtractStats(const cricket::VoiceSenderInfo& info, StatsReport* report) {
      info.aec_quality_min, info.echo_delay_std_ms,
      info.residual_echo_likelihood, info.residual_echo_likelihood_recent_max);

+  const FloatForAdd floats[] = {
+    { StatsReport::kStatsValueNameTotalAudioEnergy, info.total_input_energy },
+    { StatsReport::kStatsValueNameTotalSamplesDuration,
+      info.total_input_duration }
+  };
+
  RTC_DCHECK_GE(info.audio_level, 0);
  const IntForAdd ints[] = {
    { StatsReport::kStatsValueNameAudioInputLevel, info.audio_level},
@ -203,6 +212,10 @@ void ExtractStats(const cricket::VoiceSenderInfo& info, StatsReport* report) {
    { StatsReport::kStatsValueNamePacketsSent, info.packets_sent },
  };

+  for (const auto& f : floats) {
+    report->AddFloat(f.name, f.value);
+  }
+
  for (const auto& i : ints) {
    if (i.value >= 0) {
      report->AddInt(i.name, i.value);
--- a/webrtc/stats/rtcstats_objects.cc
+++ b/webrtc/stats/rtcstats_objects.cc
@ -349,32 +349,35 @@ RTCMediaStreamStats::~RTCMediaStreamStats() {
 }

 WEBRTC_RTCSTATS_IMPL(RTCMediaStreamTrackStats, RTCStats, "track",
-    &track_identifier,
-    &remote_source,
-    &ended,
-    &detached,
-    &kind,
-    &frame_width,
-    &frame_height,
-    &frames_per_second,
-    &frames_sent,
-    &frames_received,
-    &frames_decoded,
-    &frames_dropped,
-    &frames_corrupted,
-    &partial_frames_lost,
-    &full_frames_lost,
-    &audio_level,
-    &echo_return_loss,
-    &echo_return_loss_enhancement);
+                     &track_identifier,
+                     &remote_source,
+                     &ended,
+                     &detached,
+                     &kind,
+                     &frame_width,
+                     &frame_height,
+                     &frames_per_second,
+                     &frames_sent,
+                     &frames_received,
+                     &frames_decoded,
+                     &frames_dropped,
+                     &frames_corrupted,
+                     &partial_frames_lost,
+                     &full_frames_lost,
+                     &audio_level,
+                     &total_audio_energy,
+                     &total_samples_duration,
+                     &echo_return_loss,
+                     &echo_return_loss_enhancement);

 RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(
    const std::string& id, int64_t timestamp_us, const char* kind)
    : RTCMediaStreamTrackStats(std::string(id), timestamp_us, kind) {
 }

-RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(
-    std::string&& id, int64_t timestamp_us, const char* kind)
+RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(std::string&& id,
+                                                   int64_t timestamp_us,
+                                                   const char* kind)
    : RTCStats(std::move(id), timestamp_us),
      track_identifier("trackIdentifier"),
      remote_source("remoteSource"),
@ -392,6 +395,8 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(
      partial_frames_lost("partialFramesLost"),
      full_frames_lost("fullFramesLost"),
      audio_level("audioLevel"),
+      total_audio_energy("totalAudioEnergy"),
+      total_samples_duration("totalSamplesDuration"),
      echo_return_loss("echoReturnLoss"),
      echo_return_loss_enhancement("echoReturnLossEnhancement") {
  RTC_DCHECK(kind == RTCMediaStreamTrackKind::kAudio ||
@ -417,9 +422,10 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(
      partial_frames_lost(other.partial_frames_lost),
      full_frames_lost(other.full_frames_lost),
      audio_level(other.audio_level),
+      total_audio_energy(other.total_audio_energy),
+      total_samples_duration(other.total_samples_duration),
      echo_return_loss(other.echo_return_loss),
-      echo_return_loss_enhancement(other.echo_return_loss_enhancement) {
-}
+      echo_return_loss_enhancement(other.echo_return_loss_enhancement) {}

 RTCMediaStreamTrackStats::~RTCMediaStreamTrackStats() {
 }
--- a/webrtc/test/mock_voe_channel_proxy.h
+++ b/webrtc/test/mock_voe_channel_proxy.h
@ -54,6 +54,8 @@ class MockVoEChannelProxy : public voe::ChannelProxy {
  MOCK_CONST_METHOD0(GetDecodingCallStatistics, AudioDecodingCallStats());
  MOCK_CONST_METHOD0(GetSpeechOutputLevel, int());
  MOCK_CONST_METHOD0(GetSpeechOutputLevelFullRange, int());
+  MOCK_CONST_METHOD0(GetTotalOutputEnergy, double());
+  MOCK_CONST_METHOD0(GetTotalOutputDuration, double());
  MOCK_CONST_METHOD0(GetDelayEstimate, uint32_t());
  MOCK_METHOD2(SetSendTelephoneEventPayloadType, bool(int payload_type,
                                                      int payload_frequency));
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -50,6 +50,7 @@ namespace voe {

 namespace {

+constexpr double kAudioSampleDurationSeconds = 0.01;
 constexpr int64_t kMaxRetransmissionWindowMs = 1000;
 constexpr int64_t kMinRetransmissionWindowMs = 30;

@ -696,7 +697,20 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted(

  // Measure audio level (0-9)
  // TODO(henrik.lundin) Use the |muted| information here too.
+  // TODO(deadbeef): Use RmsLevel for |_outputAudioLevel| as well (see
+  // https://crbug.com/webrtc/7517).
  _outputAudioLevel.ComputeLevel(*audioFrame);
+  // See the description for "totalAudioEnergy" in the WebRTC stats spec
+  // (https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy)
+  // for an explanation of these formulas. In short, we need a value that can
+  // be used to compute RMS audio levels over different time intervals, by
+  // taking the difference between the results from two getStats calls. To do
+  // this, the value needs to be of units "squared sample value * time".
+  double additional_energy =
+      static_cast<double>(_outputAudioLevel.LevelFullRange()) / INT16_MAX;
+  additional_energy *= additional_energy;
+  totalOutputEnergy_ += additional_energy * kAudioSampleDurationSeconds;
+  totalOutputDuration_ += kAudioSampleDurationSeconds;

  if (capture_start_rtp_time_stamp_ < 0 && audioFrame->timestamp_ != 0) {
    // The first frame with a valid rtp timestamp.
@ -2370,6 +2384,14 @@ int Channel::GetSpeechOutputLevelFullRange() const {
  return _outputAudioLevel.LevelFullRange();
 }

+double Channel::GetTotalOutputEnergy() const {
+  return totalOutputEnergy_;
+}
+
+double Channel::GetTotalOutputDuration() const {
+  return totalOutputDuration_;
+}
+
 void Channel::SetInputMute(bool enable) {
  rtc::CritScope cs(&volume_settings_critsect_);
  input_mute_ = enable;
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@ -260,6 +260,10 @@ class Channel
  void SetChannelOutputVolumeScaling(float scaling);
  int GetSpeechOutputLevel() const;
  int GetSpeechOutputLevelFullRange() const;
+  // See description of "totalAudioEnergy" in the WebRTC stats spec:
+  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+  double GetTotalOutputEnergy() const;
+  double GetTotalOutputDuration() const;

  // Stats.
  int GetNetworkStatistics(NetworkStatistics& stats);
@ -470,6 +474,8 @@ class Channel
  acm2::RentACodec rent_a_codec_;
  std::unique_ptr<AudioSinkInterface> audio_sink_;
  AudioLevel _outputAudioLevel;
+  double totalOutputEnergy_ = 0.0;
+  double totalOutputDuration_ = 0.0;
  bool _externalTransport;
  // Downsamples to the codec rate if necessary.
  PushResampler<int16_t> input_resampler_;
--- a/webrtc/voice_engine/channel_proxy.cc
+++ b/webrtc/voice_engine/channel_proxy.cc
@ -154,6 +154,16 @@ int ChannelProxy::GetSpeechOutputLevelFullRange() const {
  return channel()->GetSpeechOutputLevelFullRange();
 }

+double ChannelProxy::GetTotalOutputEnergy() const {
+  RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+  return channel()->GetTotalOutputEnergy();
+}
+
+double ChannelProxy::GetTotalOutputDuration() const {
+  RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+  return channel()->GetTotalOutputDuration();
+}
+
 uint32_t ChannelProxy::GetDelayEstimate() const {
  RTC_DCHECK(worker_thread_checker_.CalledOnValidThread() ||
             module_process_thread_checker_.CalledOnValidThread());
--- a/webrtc/voice_engine/channel_proxy.h
+++ b/webrtc/voice_engine/channel_proxy.h
@ -83,6 +83,10 @@ class ChannelProxy : public RtpPacketSinkInterface {
  virtual AudioDecodingCallStats GetDecodingCallStatistics() const;
  virtual int GetSpeechOutputLevel() const;
  virtual int GetSpeechOutputLevelFullRange() const;
+  // See description of "totalAudioEnergy" in the WebRTC stats spec:
+  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+  virtual double GetTotalOutputEnergy() const;
+  virtual double GetTotalOutputDuration() const;
  virtual uint32_t GetDelayEstimate() const;
  virtual bool SetSendTelephoneEventPayloadType(int payload_type,
                                                int payload_frequency);
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@ -314,6 +314,20 @@ TransmitMixer::PrepareDemux(const void* audioSamples,

    // --- Measure audio level of speech after all processing.
    _audioLevel.ComputeLevel(_audioFrame);
+
+    // See the description for "totalAudioEnergy" in the WebRTC stats spec
+    // (https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy)
+    // for an explanation of these formulas. In short, we need a value that can
+    // be used to compute RMS audio levels over different time intervals, by
+    // taking the difference between the results from two getStats calls. To do
+    // this, the value needs to be of units "squared sample value * time".
+    double additional_energy =
+        static_cast<double>(_audioLevel.LevelFullRange()) / INT16_MAX;
+    additional_energy *= additional_energy;
+    double sample_duration = static_cast<double>(nSamples) / samplesPerSec;
+    totalInputEnergy_ += additional_energy * sample_duration;
+    totalInputDuration_ += sample_duration;
+
    return 0;
 }

@ -857,6 +871,14 @@ int16_t TransmitMixer::AudioLevelFullRange() const
    return _audioLevel.LevelFullRange();
 }

+double TransmitMixer::GetTotalInputEnergy() const {
+  return totalInputEnergy_;
+}
+
+double TransmitMixer::GetTotalInputDuration() const {
+  return totalInputDuration_;
+}
+
 bool TransmitMixer::IsRecordingCall()
 {
    return _fileCallRecording;
--- a/webrtc/voice_engine/transmit_mixer.h
+++ b/webrtc/voice_engine/transmit_mixer.h
@ -76,6 +76,14 @@ public:
    // 'virtual' to allow mocking.
    virtual int16_t AudioLevelFullRange() const;

+    // See description of "totalAudioEnergy" in the WebRTC stats spec:
+    // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
+    // 'virtual' to allow mocking.
+    virtual double GetTotalInputEnergy() const;
+
+    // 'virtual' to allow mocking.
+    virtual double GetTotalInputDuration() const;
+
    bool IsRecordingCall();

    bool IsRecordingMic();
@ -189,6 +197,8 @@ private:
    bool _fileRecording = false;
    bool _fileCallRecording = false;
    voe::AudioLevel _audioLevel;
+    double totalInputEnergy_ = 0.0;
+    double totalInputDuration_ = 0.0;
    // protect file instances and their variables in MixedParticipants()
    rtc::CriticalSection _critSect;
    rtc::CriticalSection _callbackCritSect;