From fe6595f0064db2c5d4a95020924b643538577e7a Mon Sep 17 00:00:00 2001 From: Doudou Kisabaka Date: Tue, 18 May 2021 11:50:01 +0200 Subject: [PATCH] Include all RTP packet infos from the mix list when updating the audio frame for mixing. Users of the mixer can use this information to determine which sources were included in the frame. Bug: webrtc:12745 Change-Id: I11a8e3b1f4e8f95eb870336cad8dd082330bdf02 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/217768 Reviewed-by: Danil Chapovalov Reviewed-by: Minyue Li Reviewed-by: Alessio Bazzica Reviewed-by: Chen Xing Commit-Queue: Doudou Kisabaka Cr-Commit-Position: refs/heads/master@{#34035} --- modules/audio_mixer/BUILD.gn | 5 +- .../audio_mixer/audio_mixer_impl_unittest.cc | 105 ++++++++++++ modules/audio_mixer/frame_combiner.cc | 20 ++- .../audio_mixer/frame_combiner_unittest.cc | 69 +++++++- .../source/source_tracker_unittest.cc | 159 ++++++++++++++---- 5 files changed, 323 insertions(+), 35 deletions(-) diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn index 5f227c737f..d51be4af04 100644 --- a/modules/audio_mixer/BUILD.gn +++ b/modules/audio_mixer/BUILD.gn @@ -39,6 +39,7 @@ rtc_library("audio_mixer_impl") { deps = [ ":audio_frame_manipulator", "../../api:array_view", + "../../api:rtp_packet_info", "../../api:scoped_refptr", "../../api/audio:audio_frame_api", "../../api/audio:audio_mixer_api", @@ -105,13 +106,15 @@ if (rtc_include_tests) { "audio_mixer_impl_unittest.cc", "frame_combiner_unittest.cc", ] - + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] deps = [ ":audio_frame_manipulator", ":audio_mixer_impl", ":audio_mixer_test_utils", "../../api:array_view", + "../../api:rtp_packet_info", "../../api/audio:audio_mixer_api", + "../../api/units:timestamp", "../../audio/utility:audio_frame_operations", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc index e4ba6ce4c2..61aa74e0a1 100644 --- a/modules/audio_mixer/audio_mixer_impl_unittest.cc +++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -19,7 +19,11 @@ #include #include +#include "absl/types/optional.h" #include "api/audio/audio_mixer.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" +#include "api/units/timestamp.h" #include "modules/audio_mixer/default_output_rate_calculator.h" #include "rtc_base/checks.h" #include "rtc_base/strings/string_builder.h" @@ -31,6 +35,7 @@ using ::testing::_; using ::testing::Exactly; using ::testing::Invoke; using ::testing::Return; +using ::testing::UnorderedElementsAre; namespace webrtc { @@ -89,6 +94,10 @@ class MockMixerAudioSource : public ::testing::NiceMock { fake_audio_frame_info_ = audio_frame_info; } + void set_packet_infos(const RtpPacketInfos& packet_infos) { + packet_infos_ = packet_infos; + } + private: AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz, AudioFrame* audio_frame) { @@ -96,11 +105,13 @@ class MockMixerAudioSource : public ::testing::NiceMock { audio_frame->sample_rate_hz_ = sample_rate_hz; audio_frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100); + audio_frame->packet_infos_ = packet_infos_; return fake_info(); } AudioFrame fake_frame_; AudioFrameInfo fake_audio_frame_info_; + RtpPacketInfos packet_infos_; }; class CustomRateCalculator : public OutputRateCalculator { @@ -640,6 +651,100 @@ TEST(AudioMixer, MultipleChannelsManyParticipants) { } } +TEST(AudioMixer, ShouldIncludeRtpPacketInfoFromAllMixedSources) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 12; + const uint32_t kCsrc0 = 20; + const uint32_t kCsrc1 = 21; + const uint32_t kCsrc2 = 22; + const uint32_t kCsrc3 = 23; + const int kAudioLevel0 = 10; + const int kAudioLevel1 = 40; + const absl::optional kAudioLevel2 = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, + kAudioLevel0, absl::nullopt, kReceiveTime0); + const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1, + kAudioLevel1, absl::nullopt, kReceiveTime1); + const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1, + kAudioLevel2, absl::nullopt, kReceiveTime1); + + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource source; + source.set_packet_infos(RtpPacketInfos({kPacketInfo0})); + mixer->AddSource(&source); + ResetFrame(source.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource other_source; + other_source.set_packet_infos(RtpPacketInfos({kPacketInfo1, kPacketInfo2})); + ResetFrame(other_source.fake_frame()); + mixer->AddSource(&other_source); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, + UnorderedElementsAre(kPacketInfo0, kPacketInfo1, kPacketInfo2)); +} + +TEST(AudioMixer, MixerShouldIncludeRtpPacketInfoFromMixedSourcesOnly) { + const uint32_t kSsrc0 = 10; + const uint32_t kSsrc1 = 11; + const uint32_t kSsrc2 = 21; + const uint32_t kCsrc0 = 30; + const uint32_t kCsrc1 = 31; + const uint32_t kCsrc2 = 32; + const uint32_t kCsrc3 = 33; + const int kAudioLevel0 = 10; + const absl::optional kAudioLevelMissing = absl::nullopt; + const uint32_t kRtpTimestamp0 = 300; + const uint32_t kRtpTimestamp1 = 400; + const Timestamp kReceiveTime0 = Timestamp::Millis(10); + const Timestamp kReceiveTime1 = Timestamp::Millis(20); + + const RtpPacketInfo kPacketInfo0(kSsrc0, {kCsrc0, kCsrc1}, kRtpTimestamp0, + kAudioLevel0, absl::nullopt, kReceiveTime0); + const RtpPacketInfo kPacketInfo1(kSsrc1, {kCsrc2}, kRtpTimestamp1, + kAudioLevelMissing, absl::nullopt, + kReceiveTime1); + const RtpPacketInfo kPacketInfo2(kSsrc2, {kCsrc3}, kRtpTimestamp1, + kAudioLevelMissing, absl::nullopt, + kReceiveTime1); + + const auto mixer = AudioMixerImpl::Create(/*max_sources_to_mix=*/2); + + MockMixerAudioSource source1; + source1.set_packet_infos(RtpPacketInfos({kPacketInfo0})); + mixer->AddSource(&source1); + ResetFrame(source1.fake_frame()); + mixer->Mix(1, &frame_for_mixing); + + MockMixerAudioSource source2; + source2.set_packet_infos(RtpPacketInfos({kPacketInfo1})); + ResetFrame(source2.fake_frame()); + mixer->AddSource(&source2); + + // The mixer prioritizes kVadActive over kVadPassive. + // We limit the number of sources to mix to 2 and set the third source's VAD + // activity to kVadPassive so that it will not be added to the mix. + MockMixerAudioSource source3; + source3.set_packet_infos(RtpPacketInfos({kPacketInfo2})); + ResetFrame(source3.fake_frame()); + source3.fake_frame()->vad_activity_ = AudioFrame::kVadPassive; + mixer->AddSource(&source3); + + mixer->Mix(/*number_of_channels=*/1, &frame_for_mixing); + + EXPECT_THAT(frame_for_mixing.packet_infos_, + UnorderedElementsAre(kPacketInfo0, kPacketInfo1)); +} + class HighOutputRateCalculator : public OutputRateCalculator { public: static const int kDefaultFrequency = 76000; diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index db301aac72..e31eea595f 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -16,8 +16,12 @@ #include #include #include +#include +#include #include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" #include "common_audio/include/audio_util.h" #include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_mixer_impl.h" @@ -54,11 +58,23 @@ void SetAudioFrameFields(rtc::ArrayView mix_list, if (mix_list.empty()) { audio_frame_for_mixing->elapsed_time_ms_ = -1; - } else if (mix_list.size() == 1) { + } else { audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_; - audio_frame_for_mixing->packet_infos_ = mix_list[0]->packet_infos_; + std::vector packet_infos; + for (const auto& frame : mix_list) { + audio_frame_for_mixing->timestamp_ = + std::min(audio_frame_for_mixing->timestamp_, frame->timestamp_); + audio_frame_for_mixing->ntp_time_ms_ = + std::min(audio_frame_for_mixing->ntp_time_ms_, frame->ntp_time_ms_); + audio_frame_for_mixing->elapsed_time_ms_ = std::max( + audio_frame_for_mixing->elapsed_time_ms_, frame->elapsed_time_ms_); + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + audio_frame_for_mixing->packet_infos_ = + RtpPacketInfos(std::move(packet_infos)); } } diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc index 4b189a052e..fa1fef325c 100644 --- a/modules/audio_mixer/frame_combiner_unittest.cc +++ b/modules/audio_mixer/frame_combiner_unittest.cc @@ -15,8 +15,12 @@ #include #include #include +#include +#include "absl/types/optional.h" #include "api/array_view.h" +#include "api/rtp_packet_info.h" +#include "api/rtp_packet_infos.h" #include "audio/utility/audio_frame_operations.h" #include "modules/audio_mixer/gain_change_calculator.h" #include "modules/audio_mixer/sine_wave_generator.h" @@ -28,7 +32,13 @@ namespace webrtc { namespace { + +using ::testing::ElementsAreArray; +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAreArray; + using LimiterType = FrameCombiner::LimiterType; + struct FrameCombinerConfig { bool use_limiter; int sample_rate_hz; @@ -57,9 +67,24 @@ std::string ProduceDebugText(const FrameCombinerConfig& config) { AudioFrame frame1; AudioFrame frame2; -AudioFrame audio_frame_for_mixing; void SetUpFrames(int sample_rate_hz, int number_of_channels) { + RtpPacketInfo packet_info1( + /*ssrc=*/1001, /*csrcs=*/{}, /*rtp_timestamp=*/1000, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/1); + RtpPacketInfo packet_info2( + /*ssrc=*/4004, /*csrcs=*/{}, /*rtp_timestamp=*/1234, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/2); + RtpPacketInfo packet_info3( + /*ssrc=*/7007, /*csrcs=*/{}, /*rtp_timestamp=*/1333, + /*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt, + /*receive_time_ms=*/2); + + frame1.packet_infos_ = RtpPacketInfos({packet_info1}); + frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3}); + for (auto* frame : {&frame1, &frame2}) { frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100), sample_rate_hz, AudioFrame::kNormalSpeech, @@ -81,6 +106,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } @@ -88,6 +114,35 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { } } +// The RtpPacketInfos field of the mixed packet should contain the union of the +// RtpPacketInfos from the frames that were actually mixed. +TEST(FrameCombiner, ContainsAllRtpPacketInfos) { + static constexpr int kSampleRateHz = 48000; + static constexpr int kNumChannels = 1; + FrameCombiner combiner(true); + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(kSampleRateHz, kNumChannels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + + std::vector packet_infos; + for (const auto& frame : frames_to_combine) { + packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), + frame->packet_infos_.end()); + } + + AudioFrame audio_frame_for_mixing; + combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz, + frames_to_combine.size(), &audio_frame_for_mixing); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + UnorderedElementsAreArray(packet_infos)); + } +} + // There are DCHECKs in place to check for invalid parameters. TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { FrameCombiner combiner(true); @@ -105,6 +160,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, @@ -134,6 +190,7 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, @@ -161,6 +218,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) { ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } @@ -174,10 +232,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); + AudioFrame audio_frame_for_mixing; + const std::vector frames_to_combine; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); - const int16_t* audio_frame_for_mixing_data = audio_frame_for_mixing.data(); const std::vector mixed_data( @@ -186,6 +245,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { const std::vector expected(number_of_channels * rate / 100, 0); EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty()); } } } @@ -196,6 +256,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { for (const int number_of_channels : {1, 2, 4, 8, 10}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); + AudioFrame audio_frame_for_mixing; + SetUpFrames(rate, number_of_channels); int16_t* frame1_data = frame1.mutable_data(); std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0); @@ -212,6 +274,8 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { std::vector expected(number_of_channels * rate / 100); std::iota(expected.begin(), expected.end(), 0); EXPECT_EQ(mixed_data, expected); + EXPECT_THAT(audio_frame_for_mixing.packet_infos_, + ElementsAreArray(frame1.packet_infos_)); } } } @@ -255,6 +319,7 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { // Ensures limiter is on if 'use_limiter'. constexpr size_t number_of_streams = 2; + AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, config.number_of_channels, config.sample_rate_hz, number_of_streams, &audio_frame_for_mixing); diff --git a/modules/rtp_rtcp/source/source_tracker_unittest.cc b/modules/rtp_rtcp/source/source_tracker_unittest.cc index c88e801d00..8514e8462d 100644 --- a/modules/rtp_rtcp/source/source_tracker_unittest.cc +++ b/modules/rtp_rtcp/source/source_tracker_unittest.cc @@ -240,78 +240,156 @@ TEST(SourceTrackerTest, StartEmpty) { EXPECT_THAT(tracker.GetSources(), IsEmpty()); } -TEST(SourceTrackerTest, OnFrameDeliveredRecordsSources) { - constexpr uint32_t kSsrc = 10; +TEST(SourceTrackerTest, OnFrameDeliveredRecordsSourcesDistinctSsrcs) { + constexpr uint32_t kSsrc1 = 10; + constexpr uint32_t kSsrc2 = 11; constexpr uint32_t kCsrcs0 = 20; constexpr uint32_t kCsrcs1 = 21; - constexpr uint32_t kRtpTimestamp = 40; - constexpr absl::optional kAudioLevel = 50; + constexpr uint32_t kCsrcs2 = 22; + constexpr uint32_t kRtpTimestamp0 = 40; + constexpr uint32_t kRtpTimestamp1 = 50; + constexpr absl::optional kAudioLevel0 = 50; + constexpr absl::optional kAudioLevel1 = 20; constexpr absl::optional kAbsoluteCaptureTime = AbsoluteCaptureTime{/*absolute_capture_timestamp=*/12, /*estimated_capture_clock_offset=*/absl::nullopt}; - constexpr Timestamp kReceiveTime = Timestamp::Millis(60); + constexpr Timestamp kReceiveTime0 = Timestamp::Millis(60); + constexpr Timestamp kReceiveTime1 = Timestamp::Millis(70); SimulatedClock clock(1000000000000ULL); SourceTracker tracker(&clock); tracker.OnFrameDelivered(RtpPacketInfos( - {RtpPacketInfo(kSsrc, {kCsrcs0, kCsrcs1}, kRtpTimestamp, kAudioLevel, - kAbsoluteCaptureTime, kReceiveTime)})); + {RtpPacketInfo(kSsrc1, {kCsrcs0, kCsrcs1}, kRtpTimestamp0, kAudioLevel0, + kAbsoluteCaptureTime, kReceiveTime0), + RtpPacketInfo(kSsrc2, {kCsrcs2}, kRtpTimestamp1, kAudioLevel1, + kAbsoluteCaptureTime, kReceiveTime1)})); int64_t timestamp_ms = clock.TimeInMilliseconds(); - constexpr RtpSource::Extensions extensions = {kAudioLevel, - kAbsoluteCaptureTime}; + constexpr RtpSource::Extensions extensions0 = {kAudioLevel0, + kAbsoluteCaptureTime}; + constexpr RtpSource::Extensions extensions1 = {kAudioLevel1, + kAbsoluteCaptureTime}; EXPECT_THAT(tracker.GetSources(), - ElementsAre(RtpSource(timestamp_ms, kSsrc, RtpSourceType::SSRC, - kRtpTimestamp, extensions), + ElementsAre(RtpSource(timestamp_ms, kSsrc2, RtpSourceType::SSRC, + kRtpTimestamp1, extensions1), + RtpSource(timestamp_ms, kCsrcs2, RtpSourceType::CSRC, + kRtpTimestamp1, extensions1), + RtpSource(timestamp_ms, kSsrc1, RtpSourceType::SSRC, + kRtpTimestamp0, extensions0), RtpSource(timestamp_ms, kCsrcs1, RtpSourceType::CSRC, - kRtpTimestamp, extensions), + kRtpTimestamp0, extensions0), RtpSource(timestamp_ms, kCsrcs0, RtpSourceType::CSRC, - kRtpTimestamp, extensions))); + kRtpTimestamp0, extensions0))); } -TEST(SourceTrackerTest, OnFrameDeliveredUpdatesSources) { +TEST(SourceTrackerTest, OnFrameDeliveredRecordsSourcesSameSsrc) { constexpr uint32_t kSsrc = 10; constexpr uint32_t kCsrcs0 = 20; constexpr uint32_t kCsrcs1 = 21; constexpr uint32_t kCsrcs2 = 22; constexpr uint32_t kRtpTimestamp0 = 40; - constexpr uint32_t kRtpTimestamp1 = 41; + constexpr uint32_t kRtpTimestamp1 = 45; + constexpr uint32_t kRtpTimestamp2 = 50; constexpr absl::optional kAudioLevel0 = 50; - constexpr absl::optional kAudioLevel1 = absl::nullopt; - constexpr absl::optional kAbsoluteCaptureTime0 = - AbsoluteCaptureTime{12, 34}; - constexpr absl::optional kAbsoluteCaptureTime1 = - AbsoluteCaptureTime{56, 78}; + constexpr absl::optional kAudioLevel1 = 20; + constexpr absl::optional kAudioLevel2 = 10; + constexpr absl::optional kAbsoluteCaptureTime = + AbsoluteCaptureTime{/*absolute_capture_timestamp=*/12, + /*estimated_capture_clock_offset=*/absl::nullopt}; constexpr Timestamp kReceiveTime0 = Timestamp::Millis(60); - constexpr Timestamp kReceiveTime1 = Timestamp::Millis(61); + constexpr Timestamp kReceiveTime1 = Timestamp::Millis(70); + constexpr Timestamp kReceiveTime2 = Timestamp::Millis(80); SimulatedClock clock(1000000000000ULL); SourceTracker tracker(&clock); tracker.OnFrameDelivered(RtpPacketInfos( {RtpPacketInfo(kSsrc, {kCsrcs0, kCsrcs1}, kRtpTimestamp0, kAudioLevel0, - kAbsoluteCaptureTime0, kReceiveTime0)})); + kAbsoluteCaptureTime, kReceiveTime0), + RtpPacketInfo(kSsrc, {kCsrcs2}, kRtpTimestamp1, kAudioLevel1, + kAbsoluteCaptureTime, kReceiveTime1), + RtpPacketInfo(kSsrc, {kCsrcs0}, kRtpTimestamp2, kAudioLevel2, + kAbsoluteCaptureTime, kReceiveTime2)})); - int64_t timestamp_ms_0 = clock.TimeInMilliseconds(); + int64_t timestamp_ms = clock.TimeInMilliseconds(); + constexpr RtpSource::Extensions extensions0 = {kAudioLevel0, + kAbsoluteCaptureTime}; + constexpr RtpSource::Extensions extensions1 = {kAudioLevel1, + kAbsoluteCaptureTime}; + constexpr RtpSource::Extensions extensions2 = {kAudioLevel2, + kAbsoluteCaptureTime}; - clock.AdvanceTimeMilliseconds(17); + EXPECT_THAT(tracker.GetSources(), + ElementsAre(RtpSource(timestamp_ms, kSsrc, RtpSourceType::SSRC, + kRtpTimestamp2, extensions2), + RtpSource(timestamp_ms, kCsrcs0, RtpSourceType::CSRC, + kRtpTimestamp2, extensions2), + RtpSource(timestamp_ms, kCsrcs2, RtpSourceType::CSRC, + kRtpTimestamp1, extensions1), + RtpSource(timestamp_ms, kCsrcs1, RtpSourceType::CSRC, + kRtpTimestamp0, extensions0))); +} - tracker.OnFrameDelivered(RtpPacketInfos( - {RtpPacketInfo(kSsrc, {kCsrcs0, kCsrcs2}, kRtpTimestamp1, kAudioLevel1, - kAbsoluteCaptureTime1, kReceiveTime1)})); - - int64_t timestamp_ms_1 = clock.TimeInMilliseconds(); +TEST(SourceTrackerTest, OnFrameDeliveredUpdatesSources) { + constexpr uint32_t kSsrc1 = 10; + constexpr uint32_t kSsrc2 = 11; + constexpr uint32_t kCsrcs0 = 20; + constexpr uint32_t kCsrcs1 = 21; + constexpr uint32_t kCsrcs2 = 22; + constexpr uint32_t kRtpTimestamp0 = 40; + constexpr uint32_t kRtpTimestamp1 = 41; + constexpr uint32_t kRtpTimestamp2 = 42; + constexpr absl::optional kAudioLevel0 = 50; + constexpr absl::optional kAudioLevel1 = absl::nullopt; + constexpr absl::optional kAudioLevel2 = 10; + constexpr absl::optional kAbsoluteCaptureTime0 = + AbsoluteCaptureTime{12, 34}; + constexpr absl::optional kAbsoluteCaptureTime1 = + AbsoluteCaptureTime{56, 78}; + constexpr absl::optional kAbsoluteCaptureTime2 = + AbsoluteCaptureTime{89, 90}; + constexpr Timestamp kReceiveTime0 = Timestamp::Millis(60); + constexpr Timestamp kReceiveTime1 = Timestamp::Millis(61); + constexpr Timestamp kReceiveTime2 = Timestamp::Millis(62); constexpr RtpSource::Extensions extensions0 = {kAudioLevel0, kAbsoluteCaptureTime0}; constexpr RtpSource::Extensions extensions1 = {kAudioLevel1, kAbsoluteCaptureTime1}; + constexpr RtpSource::Extensions extensions2 = {kAudioLevel2, + kAbsoluteCaptureTime2}; + + SimulatedClock clock(1000000000000ULL); + SourceTracker tracker(&clock); + + tracker.OnFrameDelivered(RtpPacketInfos( + {RtpPacketInfo(kSsrc1, {kCsrcs0, kCsrcs1}, kRtpTimestamp0, kAudioLevel0, + kAbsoluteCaptureTime0, kReceiveTime0)})); + + int64_t timestamp_ms_0 = clock.TimeInMilliseconds(); + EXPECT_THAT( + tracker.GetSources(), + ElementsAre(RtpSource(timestamp_ms_0, kSsrc1, RtpSourceType::SSRC, + kRtpTimestamp0, extensions0), + RtpSource(timestamp_ms_0, kCsrcs1, RtpSourceType::CSRC, + kRtpTimestamp0, extensions0), + RtpSource(timestamp_ms_0, kCsrcs0, RtpSourceType::CSRC, + kRtpTimestamp0, extensions0))); + + // Deliver packets with updated sources. + + clock.AdvanceTimeMilliseconds(17); + tracker.OnFrameDelivered(RtpPacketInfos( + {RtpPacketInfo(kSsrc1, {kCsrcs0, kCsrcs2}, kRtpTimestamp1, kAudioLevel1, + kAbsoluteCaptureTime1, kReceiveTime1)})); + + int64_t timestamp_ms_1 = clock.TimeInMilliseconds(); EXPECT_THAT( tracker.GetSources(), - ElementsAre(RtpSource(timestamp_ms_1, kSsrc, RtpSourceType::SSRC, + ElementsAre(RtpSource(timestamp_ms_1, kSsrc1, RtpSourceType::SSRC, kRtpTimestamp1, extensions1), RtpSource(timestamp_ms_1, kCsrcs2, RtpSourceType::CSRC, kRtpTimestamp1, extensions1), @@ -319,6 +397,27 @@ TEST(SourceTrackerTest, OnFrameDeliveredUpdatesSources) { kRtpTimestamp1, extensions1), RtpSource(timestamp_ms_0, kCsrcs1, RtpSourceType::CSRC, kRtpTimestamp0, extensions0))); + + // Deliver more packets with update csrcs and a new ssrc. + clock.AdvanceTimeMilliseconds(17); + tracker.OnFrameDelivered(RtpPacketInfos( + {RtpPacketInfo(kSsrc2, {kCsrcs0}, kRtpTimestamp2, kAudioLevel2, + kAbsoluteCaptureTime2, kReceiveTime2)})); + + int64_t timestamp_ms_2 = clock.TimeInMilliseconds(); + + EXPECT_THAT( + tracker.GetSources(), + ElementsAre(RtpSource(timestamp_ms_2, kSsrc2, RtpSourceType::SSRC, + kRtpTimestamp2, extensions2), + RtpSource(timestamp_ms_2, kCsrcs0, RtpSourceType::CSRC, + kRtpTimestamp2, extensions2), + RtpSource(timestamp_ms_1, kSsrc1, RtpSourceType::SSRC, + kRtpTimestamp1, extensions1), + RtpSource(timestamp_ms_1, kCsrcs2, RtpSourceType::CSRC, + kRtpTimestamp1, extensions1), + RtpSource(timestamp_ms_0, kCsrcs1, RtpSourceType::CSRC, + kRtpTimestamp0, extensions0))); } TEST(SourceTrackerTest, TimedOutSourcesAreRemoved) {