diff --git a/webrtc/api/stats/rtcstats_objects.h b/webrtc/api/stats/rtcstats_objects.h index 49135f89a1..d73c3b8327 100644 --- a/webrtc/api/stats/rtcstats_objects.h +++ b/webrtc/api/stats/rtcstats_objects.h @@ -272,6 +272,8 @@ class RTCMediaStreamTrackStats final : public RTCStats { RTCStatsMember full_frames_lost; // Audio-only members RTCStatsMember audio_level; + RTCStatsMember total_audio_energy; + RTCStatsMember total_samples_duration; RTCStatsMember echo_return_loss; RTCStatsMember echo_return_loss_enhancement; }; diff --git a/webrtc/api/statstypes.cc b/webrtc/api/statstypes.cc index b5481ecbef..d27036959b 100644 --- a/webrtc/api/statstypes.cc +++ b/webrtc/api/statstypes.cc @@ -594,6 +594,10 @@ const char* StatsReport::Value::display_name() const { return "srtpCipher"; case kStatsValueNameTargetEncBitrate: return "googTargetEncBitrate"; + case kStatsValueNameTotalAudioEnergy: + return "totalAudioEnergy"; + case kStatsValueNameTotalSamplesDuration: + return "totalSamplesDuration"; case kStatsValueNameTransmitBitrate: return "googTransmitBitrate"; case kStatsValueNameTransportType: diff --git a/webrtc/api/statstypes.h b/webrtc/api/statstypes.h index b4a805b250..dcd514c28a 100644 --- a/webrtc/api/statstypes.h +++ b/webrtc/api/statstypes.h @@ -118,6 +118,8 @@ class StatsReport { kStatsValueNameSelectedCandidatePairId, kStatsValueNameSsrc, kStatsValueNameState, + kStatsValueNameTotalAudioEnergy, + kStatsValueNameTotalSamplesDuration, kStatsValueNameTransportId, kStatsValueNameSentPingRequestsTotal, kStatsValueNameSentPingRequestsBeforeFirstResponse, diff --git a/webrtc/audio/audio_receive_stream.cc b/webrtc/audio/audio_receive_stream.cc index 6dedca5f63..d9829c75f0 100644 --- a/webrtc/audio/audio_receive_stream.cc +++ b/webrtc/audio/audio_receive_stream.cc @@ -187,6 +187,8 @@ webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const { } stats.delay_estimate_ms = channel_proxy_->GetDelayEstimate(); stats.audio_level = channel_proxy_->GetSpeechOutputLevelFullRange(); + stats.total_output_energy = channel_proxy_->GetTotalOutputEnergy(); + stats.total_output_duration = channel_proxy_->GetTotalOutputDuration(); // Get jitter buffer and total delay (alg + jitter + playout) stats. auto ns = channel_proxy_->GetNetworkStatistics(); diff --git a/webrtc/audio/audio_receive_stream_unittest.cc b/webrtc/audio/audio_receive_stream_unittest.cc index 127ea077b0..0fbfe51d0c 100644 --- a/webrtc/audio/audio_receive_stream_unittest.cc +++ b/webrtc/audio/audio_receive_stream_unittest.cc @@ -57,6 +57,9 @@ const int kTransportSequenceNumberId = 4; const int kJitterBufferDelay = -7; const int kPlayoutBufferDelay = 302; const unsigned int kSpeechOutputLevel = 99; +const double kTotalOutputEnergy = 0.25; +const double kTotalOutputDuration = 0.5; + const CallStatistics kCallStats = { 345, 678, 901, 234, -12, 3456, 7890, 567, 890, 123}; const CodecInst kCodecInst = { @@ -154,6 +157,10 @@ struct ConfigHelper { .WillOnce(Return(kJitterBufferDelay + kPlayoutBufferDelay)); EXPECT_CALL(*channel_proxy_, GetSpeechOutputLevelFullRange()) .WillOnce(Return(kSpeechOutputLevel)); + EXPECT_CALL(*channel_proxy_, GetTotalOutputEnergy()) + .WillOnce(Return(kTotalOutputEnergy)); + EXPECT_CALL(*channel_proxy_, GetTotalOutputDuration()) + .WillOnce(Return(kTotalOutputDuration)); EXPECT_CALL(*channel_proxy_, GetNetworkStatistics()) .WillOnce(Return(kNetworkStats)); EXPECT_CALL(*channel_proxy_, GetDecodingCallStatistics()) @@ -310,6 +317,8 @@ TEST(AudioReceiveStreamTest, GetStats) { EXPECT_EQ(static_cast(kJitterBufferDelay + kPlayoutBufferDelay), stats.delay_estimate_ms); EXPECT_EQ(static_cast(kSpeechOutputLevel), stats.audio_level); + EXPECT_EQ(kTotalOutputEnergy, stats.total_output_energy); + EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration); EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate); EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate), stats.speech_expand_rate); diff --git a/webrtc/audio/audio_send_stream.cc b/webrtc/audio/audio_send_stream.cc index 1861cc004f..0659cbfd2d 100644 --- a/webrtc/audio/audio_send_stream.cc +++ b/webrtc/audio/audio_send_stream.cc @@ -279,6 +279,9 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const { stats.audio_level = base->transmit_mixer()->AudioLevelFullRange(); RTC_DCHECK_LE(0, stats.audio_level); + stats.total_input_energy = base->transmit_mixer()->GetTotalInputEnergy(); + stats.total_input_duration = base->transmit_mixer()->GetTotalInputDuration(); + RTC_DCHECK(audio_state_->audio_processing()); auto audio_processing_stats = audio_state_->audio_processing()->GetStatistics(); diff --git a/webrtc/audio/audio_send_stream_unittest.cc b/webrtc/audio/audio_send_stream_unittest.cc index b906443106..12fad6f83e 100644 --- a/webrtc/audio/audio_send_stream_unittest.cc +++ b/webrtc/audio/audio_send_stream_unittest.cc @@ -55,6 +55,8 @@ const int kEchoReturnLoss = -65; const int kEchoReturnLossEnhancement = 101; const float kResidualEchoLikelihood = -1.0f; const int32_t kSpeechInputLevel = 96; +const double kTotalInputEnergy = 0.25; +const double kTotalInputDuration = 0.5; const CallStatistics kCallStats = { 1345, 1678, 1901, 1234, 112, 13456, 17890, 1567, -1890, -1123}; const ReportBlock kReportBlock = {456, 780, 123, 567, 890, 132, 143, 13354}; @@ -82,6 +84,8 @@ class MockLimitObserver : public BitrateAllocator::LimitObserver { class MockTransmitMixer : public voe::TransmitMixer { public: MOCK_CONST_METHOD0(AudioLevelFullRange, int16_t()); + MOCK_CONST_METHOD0(GetTotalInputEnergy, double()); + MOCK_CONST_METHOD0(GetTotalInputDuration, double()); }; std::unique_ptr SetupAudioEncoderMock( @@ -286,6 +290,10 @@ struct ConfigHelper { EXPECT_CALL(transmit_mixer_, AudioLevelFullRange()) .WillRepeatedly(Return(kSpeechInputLevel)); + EXPECT_CALL(transmit_mixer_, GetTotalInputEnergy()) + .WillRepeatedly(Return(kTotalInputEnergy)); + EXPECT_CALL(transmit_mixer_, GetTotalInputDuration()) + .WillRepeatedly(Return(kTotalInputDuration)); // We have to set the instantaneous value, the average, min and max. We only // care about the instantaneous value, so we set all to the same value. @@ -420,6 +428,8 @@ TEST(AudioSendStreamTest, GetStats) { stats.jitter_ms); EXPECT_EQ(kCallStats.rttMs, stats.rtt_ms); EXPECT_EQ(static_cast(kSpeechInputLevel), stats.audio_level); + EXPECT_EQ(kTotalInputEnergy, stats.total_input_energy); + EXPECT_EQ(kTotalInputDuration, stats.total_input_duration); EXPECT_EQ(-1, stats.aec_quality_min); EXPECT_EQ(kEchoDelayMedian, stats.echo_delay_median_ms); EXPECT_EQ(kEchoDelayStdDev, stats.echo_delay_std_ms); diff --git a/webrtc/call/audio_receive_stream.h b/webrtc/call/audio_receive_stream.h index 5837b2ef62..d6924256c5 100644 --- a/webrtc/call/audio_receive_stream.h +++ b/webrtc/call/audio_receive_stream.h @@ -49,6 +49,10 @@ class AudioReceiveStream { uint32_t jitter_buffer_preferred_ms = 0; uint32_t delay_estimate_ms = 0; int32_t audio_level = -1; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double total_output_energy = 0.0; + double total_output_duration = 0.0; float expand_rate = 0.0f; float speech_expand_rate = 0.0f; float secondary_decoded_rate = 0.0f; diff --git a/webrtc/call/audio_send_stream.h b/webrtc/call/audio_send_stream.h index e0fe47db6d..26729e426c 100644 --- a/webrtc/call/audio_send_stream.h +++ b/webrtc/call/audio_send_stream.h @@ -47,6 +47,10 @@ class AudioSendStream { int32_t jitter_ms = -1; int64_t rtt_ms = -1; int32_t audio_level = -1; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double total_input_energy = 0.0; + double total_input_duration = 0.0; float aec_quality_min = -1.0f; int32_t echo_delay_median_ms = -1; int32_t echo_delay_std_ms = -1; diff --git a/webrtc/media/base/mediachannel.h b/webrtc/media/base/mediachannel.h index 1109f56205..25d566791e 100644 --- a/webrtc/media/base/mediachannel.h +++ b/webrtc/media/base/mediachannel.h @@ -615,6 +615,8 @@ struct VoiceSenderInfo : public MediaSenderInfo { : ext_seqnum(0), jitter_ms(0), audio_level(0), + total_input_energy(0.0), + total_input_duration(0.0), aec_quality_min(0.0), echo_delay_median_ms(0), echo_delay_std_ms(0), @@ -627,6 +629,10 @@ struct VoiceSenderInfo : public MediaSenderInfo { int ext_seqnum; int jitter_ms; int audio_level; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double total_input_energy; + double total_input_duration; float aec_quality_min; int echo_delay_median_ms; int echo_delay_std_ms; @@ -645,6 +651,8 @@ struct VoiceReceiverInfo : public MediaReceiverInfo { jitter_buffer_preferred_ms(0), delay_estimate_ms(0), audio_level(0), + total_output_energy(0.0), + total_output_duration(0.0), expand_rate(0), speech_expand_rate(0), secondary_decoded_rate(0), @@ -665,6 +673,10 @@ struct VoiceReceiverInfo : public MediaReceiverInfo { int jitter_buffer_preferred_ms; int delay_estimate_ms; int audio_level; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double total_output_energy; + double total_output_duration; // fraction of synthesized audio inserted through expansion. float expand_rate; // fraction of synthesized speech inserted through expansion. diff --git a/webrtc/media/engine/webrtcvoiceengine.cc b/webrtc/media/engine/webrtcvoiceengine.cc index 8b269c4aea..53bd8f2d15 100644 --- a/webrtc/media/engine/webrtcvoiceengine.cc +++ b/webrtc/media/engine/webrtcvoiceengine.cc @@ -2248,6 +2248,8 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) { sinfo.jitter_ms = stats.jitter_ms; sinfo.rtt_ms = stats.rtt_ms; sinfo.audio_level = stats.audio_level; + sinfo.total_input_energy = stats.total_input_energy; + sinfo.total_input_duration = stats.total_input_duration; sinfo.aec_quality_min = stats.aec_quality_min; sinfo.echo_delay_median_ms = stats.echo_delay_median_ms; sinfo.echo_delay_std_ms = stats.echo_delay_std_ms; @@ -2278,6 +2280,8 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) { rinfo.jitter_buffer_preferred_ms = stats.jitter_buffer_preferred_ms; rinfo.delay_estimate_ms = stats.delay_estimate_ms; rinfo.audio_level = stats.audio_level; + rinfo.total_output_energy = stats.total_output_energy; + rinfo.total_output_duration = stats.total_output_duration; rinfo.expand_rate = stats.expand_rate; rinfo.speech_expand_rate = stats.speech_expand_rate; rinfo.secondary_decoded_rate = stats.secondary_decoded_rate; diff --git a/webrtc/pc/rtcstats_integrationtest.cc b/webrtc/pc/rtcstats_integrationtest.cc index 3d1e7d66ea..ea1f8df977 100644 --- a/webrtc/pc/rtcstats_integrationtest.cc +++ b/webrtc/pc/rtcstats_integrationtest.cc @@ -475,6 +475,8 @@ class RTCStatsReportVerifier { verifier.TestMemberIsUndefined(media_stream_track.echo_return_loss); verifier.TestMemberIsUndefined( media_stream_track.echo_return_loss_enhancement); + verifier.TestMemberIsUndefined(media_stream_track.total_audio_energy); + verifier.TestMemberIsUndefined(media_stream_track.total_samples_duration); } else { RTC_DCHECK_EQ(*media_stream_track.kind, RTCMediaStreamTrackKind::kAudio); @@ -491,6 +493,10 @@ class RTCStatsReportVerifier { verifier.TestMemberIsUndefined(media_stream_track.full_frames_lost); // Audio-only members verifier.TestMemberIsNonNegative(media_stream_track.audio_level); + verifier.TestMemberIsNonNegative( + media_stream_track.total_audio_energy); + verifier.TestMemberIsNonNegative( + media_stream_track.total_samples_duration); // TODO(hbos): |echo_return_loss| and |echo_return_loss_enhancement| are // flaky on msan bot (sometimes defined, sometimes undefined). Should the // test run until available or is there a way to have it always be diff --git a/webrtc/pc/rtcstatscollector.cc b/webrtc/pc/rtcstatscollector.cc index 81b4d3e4d8..01764e909a 100644 --- a/webrtc/pc/rtcstatscollector.cc +++ b/webrtc/pc/rtcstatscollector.cc @@ -374,6 +374,9 @@ ProduceMediaStreamTrackStatsFromVoiceSenderInfo( audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel( voice_sender_info.audio_level); } + audio_track_stats->total_audio_energy = voice_sender_info.total_input_energy; + audio_track_stats->total_samples_duration = + voice_sender_info.total_input_duration; if (voice_sender_info.echo_return_loss != -100) { audio_track_stats->echo_return_loss = static_cast( voice_sender_info.echo_return_loss); @@ -405,6 +408,10 @@ ProduceMediaStreamTrackStatsFromVoiceReceiverInfo( audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel( voice_receiver_info.audio_level); } + audio_track_stats->total_audio_energy = + voice_receiver_info.total_output_energy; + audio_track_stats->total_samples_duration = + voice_receiver_info.total_output_duration; return audio_track_stats; } diff --git a/webrtc/pc/rtcstatscollector_unittest.cc b/webrtc/pc/rtcstatscollector_unittest.cc index 43c94efe62..92024ad18d 100644 --- a/webrtc/pc/rtcstatscollector_unittest.cc +++ b/webrtc/pc/rtcstatscollector_unittest.cc @@ -1521,6 +1521,8 @@ TEST_F(RTCStatsCollectorTest, voice_sender_info_ssrc1.local_stats.push_back(cricket::SsrcSenderInfo()); voice_sender_info_ssrc1.local_stats[0].ssrc = 1; voice_sender_info_ssrc1.audio_level = 32767; + voice_sender_info_ssrc1.total_input_energy = 0.25; + voice_sender_info_ssrc1.total_input_duration = 0.5; voice_sender_info_ssrc1.echo_return_loss = 42; voice_sender_info_ssrc1.echo_return_loss_enhancement = 52; @@ -1530,6 +1532,8 @@ TEST_F(RTCStatsCollectorTest, voice_sender_info_ssrc2.local_stats.push_back(cricket::SsrcSenderInfo()); voice_sender_info_ssrc2.local_stats[0].ssrc = 2; voice_sender_info_ssrc2.audio_level = 0; + voice_sender_info_ssrc2.total_input_energy = 0.0; + voice_sender_info_ssrc2.total_input_duration = 0.0; voice_sender_info_ssrc2.echo_return_loss = -100; voice_sender_info_ssrc2.echo_return_loss_enhancement = -100; @@ -1544,6 +1548,8 @@ TEST_F(RTCStatsCollectorTest, voice_receiver_info.local_stats.push_back(cricket::SsrcReceiverInfo()); voice_receiver_info.local_stats[0].ssrc = 3; voice_receiver_info.audio_level = 16383; + voice_receiver_info.total_output_energy = 0.125; + voice_receiver_info.total_output_duration = 0.25; test_->CreateMockRtpSendersReceiversAndChannels( { std::make_pair(local_audio_track.get(), voice_sender_info_ssrc1), @@ -1582,6 +1588,8 @@ TEST_F(RTCStatsCollectorTest, expected_local_audio_track_ssrc1.ended = true; expected_local_audio_track_ssrc1.detached = false; expected_local_audio_track_ssrc1.audio_level = 1.0; + expected_local_audio_track_ssrc1.total_audio_energy = 0.25; + expected_local_audio_track_ssrc1.total_samples_duration = 0.5; expected_local_audio_track_ssrc1.echo_return_loss = 42.0; expected_local_audio_track_ssrc1.echo_return_loss_enhancement = 52.0; ASSERT_TRUE(report->Get(expected_local_audio_track_ssrc1.id())); @@ -1597,6 +1605,8 @@ TEST_F(RTCStatsCollectorTest, expected_local_audio_track_ssrc2.ended = true; expected_local_audio_track_ssrc2.detached = false; expected_local_audio_track_ssrc2.audio_level = 0.0; + expected_local_audio_track_ssrc2.total_audio_energy = 0.0; + expected_local_audio_track_ssrc2.total_samples_duration = 0.0; // Should be undefined: |expected_local_audio_track_ssrc2.echo_return_loss| // and |expected_local_audio_track_ssrc2.echo_return_loss_enhancement|. ASSERT_TRUE(report->Get(expected_local_audio_track_ssrc2.id())); @@ -1612,6 +1622,8 @@ TEST_F(RTCStatsCollectorTest, expected_remote_audio_track.ended = false; expected_remote_audio_track.detached = false; expected_remote_audio_track.audio_level = 16383.0 / 32767.0; + expected_remote_audio_track.total_audio_energy = 0.125; + expected_remote_audio_track.total_samples_duration = 0.25; ASSERT_TRUE(report->Get(expected_remote_audio_track.id())); EXPECT_EQ(expected_remote_audio_track, report->Get(expected_remote_audio_track.id())->cast_to< diff --git a/webrtc/pc/statscollector.cc b/webrtc/pc/statscollector.cc index f74a683a85..d1160b2747 100644 --- a/webrtc/pc/statscollector.cc +++ b/webrtc/pc/statscollector.cc @@ -146,6 +146,9 @@ void ExtractStats(const cricket::VoiceReceiverInfo& info, StatsReport* report) { { StatsReport::kStatsValueNameAccelerateRate, info.accelerate_rate }, { StatsReport::kStatsValueNamePreemptiveExpandRate, info.preemptive_expand_rate }, + { StatsReport::kStatsValueNameTotalAudioEnergy, info.total_output_energy }, + { StatsReport::kStatsValueNameTotalSamplesDuration, + info.total_output_duration } }; const IntForAdd ints[] = { @@ -195,6 +198,12 @@ void ExtractStats(const cricket::VoiceSenderInfo& info, StatsReport* report) { info.aec_quality_min, info.echo_delay_std_ms, info.residual_echo_likelihood, info.residual_echo_likelihood_recent_max); + const FloatForAdd floats[] = { + { StatsReport::kStatsValueNameTotalAudioEnergy, info.total_input_energy }, + { StatsReport::kStatsValueNameTotalSamplesDuration, + info.total_input_duration } + }; + RTC_DCHECK_GE(info.audio_level, 0); const IntForAdd ints[] = { { StatsReport::kStatsValueNameAudioInputLevel, info.audio_level}, @@ -203,6 +212,10 @@ void ExtractStats(const cricket::VoiceSenderInfo& info, StatsReport* report) { { StatsReport::kStatsValueNamePacketsSent, info.packets_sent }, }; + for (const auto& f : floats) { + report->AddFloat(f.name, f.value); + } + for (const auto& i : ints) { if (i.value >= 0) { report->AddInt(i.name, i.value); diff --git a/webrtc/stats/rtcstats_objects.cc b/webrtc/stats/rtcstats_objects.cc index 5397ef782c..bd8e9cd30c 100644 --- a/webrtc/stats/rtcstats_objects.cc +++ b/webrtc/stats/rtcstats_objects.cc @@ -349,32 +349,35 @@ RTCMediaStreamStats::~RTCMediaStreamStats() { } WEBRTC_RTCSTATS_IMPL(RTCMediaStreamTrackStats, RTCStats, "track", - &track_identifier, - &remote_source, - &ended, - &detached, - &kind, - &frame_width, - &frame_height, - &frames_per_second, - &frames_sent, - &frames_received, - &frames_decoded, - &frames_dropped, - &frames_corrupted, - &partial_frames_lost, - &full_frames_lost, - &audio_level, - &echo_return_loss, - &echo_return_loss_enhancement); + &track_identifier, + &remote_source, + &ended, + &detached, + &kind, + &frame_width, + &frame_height, + &frames_per_second, + &frames_sent, + &frames_received, + &frames_decoded, + &frames_dropped, + &frames_corrupted, + &partial_frames_lost, + &full_frames_lost, + &audio_level, + &total_audio_energy, + &total_samples_duration, + &echo_return_loss, + &echo_return_loss_enhancement); RTCMediaStreamTrackStats::RTCMediaStreamTrackStats( const std::string& id, int64_t timestamp_us, const char* kind) : RTCMediaStreamTrackStats(std::string(id), timestamp_us, kind) { } -RTCMediaStreamTrackStats::RTCMediaStreamTrackStats( - std::string&& id, int64_t timestamp_us, const char* kind) +RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(std::string&& id, + int64_t timestamp_us, + const char* kind) : RTCStats(std::move(id), timestamp_us), track_identifier("trackIdentifier"), remote_source("remoteSource"), @@ -392,6 +395,8 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats( partial_frames_lost("partialFramesLost"), full_frames_lost("fullFramesLost"), audio_level("audioLevel"), + total_audio_energy("totalAudioEnergy"), + total_samples_duration("totalSamplesDuration"), echo_return_loss("echoReturnLoss"), echo_return_loss_enhancement("echoReturnLossEnhancement") { RTC_DCHECK(kind == RTCMediaStreamTrackKind::kAudio || @@ -417,9 +422,10 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats( partial_frames_lost(other.partial_frames_lost), full_frames_lost(other.full_frames_lost), audio_level(other.audio_level), + total_audio_energy(other.total_audio_energy), + total_samples_duration(other.total_samples_duration), echo_return_loss(other.echo_return_loss), - echo_return_loss_enhancement(other.echo_return_loss_enhancement) { -} + echo_return_loss_enhancement(other.echo_return_loss_enhancement) {} RTCMediaStreamTrackStats::~RTCMediaStreamTrackStats() { } diff --git a/webrtc/test/mock_voe_channel_proxy.h b/webrtc/test/mock_voe_channel_proxy.h index bd6bb4c0d8..6aa60857d1 100644 --- a/webrtc/test/mock_voe_channel_proxy.h +++ b/webrtc/test/mock_voe_channel_proxy.h @@ -54,6 +54,8 @@ class MockVoEChannelProxy : public voe::ChannelProxy { MOCK_CONST_METHOD0(GetDecodingCallStatistics, AudioDecodingCallStats()); MOCK_CONST_METHOD0(GetSpeechOutputLevel, int()); MOCK_CONST_METHOD0(GetSpeechOutputLevelFullRange, int()); + MOCK_CONST_METHOD0(GetTotalOutputEnergy, double()); + MOCK_CONST_METHOD0(GetTotalOutputDuration, double()); MOCK_CONST_METHOD0(GetDelayEstimate, uint32_t()); MOCK_METHOD2(SetSendTelephoneEventPayloadType, bool(int payload_type, int payload_frequency)); diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index efdfee3bfd..02001bac6b 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -50,6 +50,7 @@ namespace voe { namespace { +constexpr double kAudioSampleDurationSeconds = 0.01; constexpr int64_t kMaxRetransmissionWindowMs = 1000; constexpr int64_t kMinRetransmissionWindowMs = 30; @@ -696,7 +697,20 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted( // Measure audio level (0-9) // TODO(henrik.lundin) Use the |muted| information here too. + // TODO(deadbeef): Use RmsLevel for |_outputAudioLevel| as well (see + // https://crbug.com/webrtc/7517). _outputAudioLevel.ComputeLevel(*audioFrame); + // See the description for "totalAudioEnergy" in the WebRTC stats spec + // (https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy) + // for an explanation of these formulas. In short, we need a value that can + // be used to compute RMS audio levels over different time intervals, by + // taking the difference between the results from two getStats calls. To do + // this, the value needs to be of units "squared sample value * time". + double additional_energy = + static_cast(_outputAudioLevel.LevelFullRange()) / INT16_MAX; + additional_energy *= additional_energy; + totalOutputEnergy_ += additional_energy * kAudioSampleDurationSeconds; + totalOutputDuration_ += kAudioSampleDurationSeconds; if (capture_start_rtp_time_stamp_ < 0 && audioFrame->timestamp_ != 0) { // The first frame with a valid rtp timestamp. @@ -2370,6 +2384,14 @@ int Channel::GetSpeechOutputLevelFullRange() const { return _outputAudioLevel.LevelFullRange(); } +double Channel::GetTotalOutputEnergy() const { + return totalOutputEnergy_; +} + +double Channel::GetTotalOutputDuration() const { + return totalOutputDuration_; +} + void Channel::SetInputMute(bool enable) { rtc::CritScope cs(&volume_settings_critsect_); input_mute_ = enable; diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index 492083e0a8..7fb8ae8b3c 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -260,6 +260,10 @@ class Channel void SetChannelOutputVolumeScaling(float scaling); int GetSpeechOutputLevel() const; int GetSpeechOutputLevelFullRange() const; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double GetTotalOutputEnergy() const; + double GetTotalOutputDuration() const; // Stats. int GetNetworkStatistics(NetworkStatistics& stats); @@ -470,6 +474,8 @@ class Channel acm2::RentACodec rent_a_codec_; std::unique_ptr audio_sink_; AudioLevel _outputAudioLevel; + double totalOutputEnergy_ = 0.0; + double totalOutputDuration_ = 0.0; bool _externalTransport; // Downsamples to the codec rate if necessary. PushResampler input_resampler_; diff --git a/webrtc/voice_engine/channel_proxy.cc b/webrtc/voice_engine/channel_proxy.cc index 4c6dd383ec..027659d2f0 100644 --- a/webrtc/voice_engine/channel_proxy.cc +++ b/webrtc/voice_engine/channel_proxy.cc @@ -154,6 +154,16 @@ int ChannelProxy::GetSpeechOutputLevelFullRange() const { return channel()->GetSpeechOutputLevelFullRange(); } +double ChannelProxy::GetTotalOutputEnergy() const { + RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); + return channel()->GetTotalOutputEnergy(); +} + +double ChannelProxy::GetTotalOutputDuration() const { + RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); + return channel()->GetTotalOutputDuration(); +} + uint32_t ChannelProxy::GetDelayEstimate() const { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread() || module_process_thread_checker_.CalledOnValidThread()); diff --git a/webrtc/voice_engine/channel_proxy.h b/webrtc/voice_engine/channel_proxy.h index eb4bd701a9..5aa2839feb 100644 --- a/webrtc/voice_engine/channel_proxy.h +++ b/webrtc/voice_engine/channel_proxy.h @@ -83,6 +83,10 @@ class ChannelProxy : public RtpPacketSinkInterface { virtual AudioDecodingCallStats GetDecodingCallStatistics() const; virtual int GetSpeechOutputLevel() const; virtual int GetSpeechOutputLevelFullRange() const; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + virtual double GetTotalOutputEnergy() const; + virtual double GetTotalOutputDuration() const; virtual uint32_t GetDelayEstimate() const; virtual bool SetSendTelephoneEventPayloadType(int payload_type, int payload_frequency); diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc index 05040b2710..06f37c2798 100644 --- a/webrtc/voice_engine/transmit_mixer.cc +++ b/webrtc/voice_engine/transmit_mixer.cc @@ -314,6 +314,20 @@ TransmitMixer::PrepareDemux(const void* audioSamples, // --- Measure audio level of speech after all processing. _audioLevel.ComputeLevel(_audioFrame); + + // See the description for "totalAudioEnergy" in the WebRTC stats spec + // (https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy) + // for an explanation of these formulas. In short, we need a value that can + // be used to compute RMS audio levels over different time intervals, by + // taking the difference between the results from two getStats calls. To do + // this, the value needs to be of units "squared sample value * time". + double additional_energy = + static_cast(_audioLevel.LevelFullRange()) / INT16_MAX; + additional_energy *= additional_energy; + double sample_duration = static_cast(nSamples) / samplesPerSec; + totalInputEnergy_ += additional_energy * sample_duration; + totalInputDuration_ += sample_duration; + return 0; } @@ -857,6 +871,14 @@ int16_t TransmitMixer::AudioLevelFullRange() const return _audioLevel.LevelFullRange(); } +double TransmitMixer::GetTotalInputEnergy() const { + return totalInputEnergy_; +} + +double TransmitMixer::GetTotalInputDuration() const { + return totalInputDuration_; +} + bool TransmitMixer::IsRecordingCall() { return _fileCallRecording; diff --git a/webrtc/voice_engine/transmit_mixer.h b/webrtc/voice_engine/transmit_mixer.h index 47f67f9040..6fcb86ea6f 100644 --- a/webrtc/voice_engine/transmit_mixer.h +++ b/webrtc/voice_engine/transmit_mixer.h @@ -76,6 +76,14 @@ public: // 'virtual' to allow mocking. virtual int16_t AudioLevelFullRange() const; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + // 'virtual' to allow mocking. + virtual double GetTotalInputEnergy() const; + + // 'virtual' to allow mocking. + virtual double GetTotalInputDuration() const; + bool IsRecordingCall(); bool IsRecordingMic(); @@ -189,6 +197,8 @@ private: bool _fileRecording = false; bool _fileCallRecording = false; voe::AudioLevel _audioLevel; + double totalInputEnergy_ = 0.0; + double totalInputDuration_ = 0.0; // protect file instances and their variables in MixedParticipants() rtc::CriticalSection _critSect; rtc::CriticalSection _callbackCritSect;