From 8fc7fa798f7a36955f1b933980401afad2aff592 Mon Sep 17 00:00:00 2001
From: pbos <pbos@webrtc.org>
Date: Wed, 15 Jul 2015 08:02:58 -0700
Subject: [PATCH] Base A/V synchronization on sync_labels.

Groups of streams that should be synchronized are signalled through
SDP. These should be used rather than synchronizing the first-added
video stream to the first-added audio stream implicitly.

BUG=webrtc:4667
R=hta@webrtc.org, solenberg@webrtc.org, stefan@webrtc.org
TBR=mflodman@webrtc.org

Review URL: https://codereview.webrtc.org/1181653002

Cr-Commit-Position: refs/heads/master@{#9586}
---
 talk/media/webrtc/fakewebrtccall.cc           |  1 +
 talk/media/webrtc/webrtcvideoengine2.cc       | 11 +--
 .../webrtc/webrtcvideoengine2_unittest.cc     | 54 ++++-----------
 talk/media/webrtc/webrtcvoiceengine.cc        | 52 ++++++++------
 talk/media/webrtc/webrtcvoiceengine.h         |  2 +-
 .../webrtc/webrtcvoiceengine_unittest.cc      | 57 ++++++++++++++--
 webrtc/audio_receive_stream.h                 | 10 +++
 webrtc/video/audio_receive_stream.cc          |  4 ++
 webrtc/video/bitrate_estimator_tests.cc       |  4 ++
 webrtc/video/call.cc                          | 68 ++++++++++++++++++-
 webrtc/video/call_perf_tests.cc               | 35 ++++++++--
 webrtc/video/video_receive_stream.cc          | 26 +++----
 webrtc/video/video_receive_stream.h           |  6 +-
 webrtc/video_engine/vie_sync_module.cc        |  7 ++
 webrtc/video_receive_stream.h                 |  8 +--
 15 files changed, 242 insertions(+), 103 deletions(-)

diff --git a/talk/media/webrtc/fakewebrtccall.cc b/talk/media/webrtc/fakewebrtccall.cc
index 9f6a0ff740..cfb6bd1b11 100644
--- a/talk/media/webrtc/fakewebrtccall.cc
+++ b/talk/media/webrtc/fakewebrtccall.cc
@@ -37,6 +37,7 @@ namespace cricket {
 FakeAudioReceiveStream::FakeAudioReceiveStream(
     const webrtc::AudioReceiveStream::Config& config)
     : config_(config), received_packets_(0) {
+  DCHECK(config.voe_channel_id != -1);
 }
 
 webrtc::AudioReceiveStream::Stats FakeAudioReceiveStream::GetStats() const {
diff --git a/talk/media/webrtc/webrtcvideoengine2.cc b/talk/media/webrtc/webrtcvideoengine2.cc
index d33d1909cf..657475b281 100644
--- a/talk/media/webrtc/webrtcvideoengine2.cc
+++ b/talk/media/webrtc/webrtcvideoengine2.cc
@@ -1155,15 +1155,8 @@ bool WebRtcVideoChannel2::AddRecvStream(const StreamParams& sp,
   webrtc::VideoReceiveStream::Config config;
   ConfigureReceiverRtp(&config, sp);
 
-  // Set up A/V sync if there is a VoiceChannel.
-  // TODO(pbos): The A/V is synched by the receiving channel. So we need to know
-  // the SSRC of the remote audio channel in order to sync the correct webrtc
-  // VoiceEngine channel. For now sync the first channel in non-conference to
-  // match existing behavior in WebRtcVideoEngine.
-  if (voice_channel_id_ != -1 && receive_streams_.empty() &&
-      !options_.conference_mode.GetWithDefaultIfUnset(false)) {
-    config.audio_channel_id = voice_channel_id_;
-  }
+  // Set up A/V sync group based on sync label.
+  config.sync_group = sp.sync_label;
 
   config.rtp.remb = false;
   VideoCodecSettings send_codec;
diff --git a/talk/media/webrtc/webrtcvideoengine2_unittest.cc b/talk/media/webrtc/webrtcvideoengine2_unittest.cc
index 71eebba0b0..f3d3a542ab 100644
--- a/talk/media/webrtc/webrtcvideoengine2_unittest.cc
+++ b/talk/media/webrtc/webrtcvideoengine2_unittest.cc
@@ -168,46 +168,6 @@ class WebRtcVideoEngine2Test : public ::testing::Test {
   std::map<int, int> default_apt_rtx_types_;
 };
 
-class WebRtcVideoEngine2VoiceTest : public WebRtcVideoEngine2Test {
- public:
-  WebRtcVideoEngine2VoiceTest() : WebRtcVideoEngine2Test(&voice_engine_) {}
-};
-
-TEST_F(WebRtcVideoEngine2VoiceTest, ConfiguresAvSyncForFirstReceiveChannel) {
-  FakeCallFactory call_factory;
-  engine_.SetCallFactory(&call_factory);
-  voice_engine_.Init(rtc::Thread::Current());
-  engine_.Init();
-
-  rtc::scoped_ptr<VoiceMediaChannel> voice_channel(
-      voice_engine_.CreateChannel(cricket::AudioOptions()));
-  ASSERT_TRUE(voice_channel.get() != nullptr);
-  WebRtcVoiceMediaChannel* webrtc_voice_channel =
-      static_cast<WebRtcVoiceMediaChannel*>(voice_channel.get());
-  ASSERT_NE(webrtc_voice_channel->voe_channel(), -1);
-  rtc::scoped_ptr<VideoMediaChannel> channel(
-      engine_.CreateChannel(cricket::VideoOptions(), voice_channel.get()));
-
-  FakeCall* fake_call = call_factory.GetCall();
-  ASSERT_TRUE(fake_call != nullptr);
-
-  webrtc::Call::Config call_config = fake_call->GetConfig();
-
-  ASSERT_TRUE(voice_engine_.voe()->engine() != nullptr);
-  ASSERT_EQ(voice_engine_.voe()->engine(), call_config.voice_engine);
-
-  EXPECT_TRUE(channel->AddRecvStream(StreamParams::CreateLegacy(kSsrc)));
-  EXPECT_TRUE(channel->AddRecvStream(StreamParams::CreateLegacy(kSsrc + 1)));
-  std::vector<FakeVideoReceiveStream*> receive_streams =
-      fake_call->GetVideoReceiveStreams();
-
-  ASSERT_EQ(2u, receive_streams.size());
-  EXPECT_EQ(webrtc_voice_channel->voe_channel(),
-            receive_streams[0]->GetConfig().audio_channel_id);
-  EXPECT_EQ(-1, receive_streams[1]->GetConfig().audio_channel_id)
-      << "AV sync should only be set up for the first receive channel.";
-}
-
 TEST_F(WebRtcVideoEngine2Test, FindCodec) {
   const std::vector<cricket::VideoCodec>& c = engine_.codecs();
   EXPECT_EQ(cricket::DefaultVideoCodecList().size(), c.size());
@@ -1003,6 +963,20 @@ class WebRtcVideoChannel2Test : public WebRtcVideoEngine2Test,
   uint32 last_ssrc_;
 };
 
+TEST_F(WebRtcVideoChannel2Test, SetsSyncGroupFromSyncLabel) {
+  const uint32 kVideoSsrc = 123;
+  const std::string kSyncLabel = "AvSyncLabel";
+
+  cricket::StreamParams sp = cricket::StreamParams::CreateLegacy(kVideoSsrc);
+  sp.sync_label = kSyncLabel;
+  EXPECT_TRUE(channel_->AddRecvStream(sp));
+
+  EXPECT_EQ(1, fake_call_->GetVideoReceiveStreams().size());
+  EXPECT_EQ(kSyncLabel,
+            fake_call_->GetVideoReceiveStreams()[0]->GetConfig().sync_group)
+      << "SyncGroup should be set based on sync_label";
+}
+
 TEST_F(WebRtcVideoChannel2Test, RecvStreamWithSimAndRtx) {
   EXPECT_TRUE(channel_->SetSendCodecs(engine_.codecs()));
   EXPECT_TRUE(channel_->SetSend(true));
diff --git a/talk/media/webrtc/webrtcvoiceengine.cc b/talk/media/webrtc/webrtcvoiceengine.cc
index 284afed4d1..6710539aed 100644
--- a/talk/media/webrtc/webrtcvoiceengine.cc
+++ b/talk/media/webrtc/webrtcvoiceengine.cc
@@ -1624,8 +1624,7 @@ class WebRtcVoiceMediaChannel::WebRtcVoiceChannelRenderer
                              webrtc::AudioTransport* voe_audio_transport)
       : channel_(ch),
         voe_audio_transport_(voe_audio_transport),
-        renderer_(NULL) {
-  }
+        renderer_(NULL) {}
   ~WebRtcVoiceChannelRenderer() override { Stop(); }
 
   // Starts the rendering by setting a sink to the renderer to get data
@@ -2479,9 +2478,9 @@ bool WebRtcVoiceMediaChannel::AddSendStream(const StreamParams& sp) {
   // delete the channel in case failure happens below.
   webrtc::AudioTransport* audio_transport =
       engine()->voe()->base()->audio_transport();
-  send_channels_.insert(std::make_pair(
-      sp.first_ssrc(),
-      new WebRtcVoiceChannelRenderer(channel, audio_transport)));
+  send_channels_.insert(
+      std::make_pair(sp.first_ssrc(),
+                     new WebRtcVoiceChannelRenderer(channel, audio_transport)));
 
   // Set the send (local) SSRC.
   // If there are multiple send SSRCs, we can only set the first one here, and
@@ -2574,7 +2573,7 @@ bool WebRtcVoiceMediaChannel::AddRecvStream(const StreamParams& sp) {
     return false;
   }
 
-  TryAddAudioRecvStream(ssrc);
+  DCHECK(receive_stream_params_.find(ssrc) == receive_stream_params_.end());
 
   // Reuse default channel for recv stream in non-conference mode call
   // when the default channel is not being used.
@@ -2583,9 +2582,11 @@ bool WebRtcVoiceMediaChannel::AddRecvStream(const StreamParams& sp) {
   if (!InConferenceMode() && default_receive_ssrc_ == 0) {
     LOG(LS_INFO) << "Recv stream " << ssrc << " reuse default channel";
     default_receive_ssrc_ = ssrc;
-    receive_channels_.insert(std::make_pair(
-        default_receive_ssrc_,
-        new WebRtcVoiceChannelRenderer(voe_channel(), audio_transport)));
+    WebRtcVoiceChannelRenderer* channel_renderer =
+        new WebRtcVoiceChannelRenderer(voe_channel(), audio_transport);
+    receive_channels_.insert(std::make_pair(ssrc, channel_renderer));
+    receive_stream_params_[ssrc] = sp;
+    TryAddAudioRecvStream(ssrc);
     return SetPlayout(voe_channel(), playout_);
   }
 
@@ -2601,9 +2602,11 @@ bool WebRtcVoiceMediaChannel::AddRecvStream(const StreamParams& sp) {
     return false;
   }
 
-  receive_channels_.insert(
-      std::make_pair(
-          ssrc, new WebRtcVoiceChannelRenderer(channel, audio_transport)));
+  WebRtcVoiceChannelRenderer* channel_renderer =
+      new WebRtcVoiceChannelRenderer(channel, audio_transport);
+  receive_channels_.insert(std::make_pair(ssrc, channel_renderer));
+  receive_stream_params_[ssrc] = sp;
+  TryAddAudioRecvStream(ssrc);
 
   LOG(LS_INFO) << "New audio stream " << ssrc
                << " registered to VoiceEngine channel #"
@@ -2694,6 +2697,7 @@ bool WebRtcVoiceMediaChannel::RemoveRecvStream(uint32 ssrc) {
   }
 
   TryRemoveAudioRecvStream(ssrc);
+  receive_stream_params_.erase(ssrc);
 
   // Delete the WebRtcVoiceChannelRenderer object connected to the channel, this
   // will disconnect the audio renderer with the receive channel.
@@ -3450,7 +3454,7 @@ int WebRtcVoiceMediaChannel::GetReceiveChannelNum(uint32 ssrc) {
   ChannelMap::iterator it = receive_channels_.find(ssrc);
   if (it != receive_channels_.end())
     return it->second->channel();
-  return (ssrc == default_receive_ssrc_) ?  voe_channel() : -1;
+  return (ssrc == default_receive_ssrc_) ? voe_channel() : -1;
 }
 
 int WebRtcVoiceMediaChannel::GetSendChannelNum(uint32 ssrc) {
@@ -3623,15 +3627,23 @@ bool WebRtcVoiceMediaChannel::SetHeaderExtension(ExtensionSetterFunction setter,
 
 void WebRtcVoiceMediaChannel::TryAddAudioRecvStream(uint32 ssrc) {
   DCHECK(thread_checker_.CalledOnValidThread());
+  WebRtcVoiceChannelRenderer* channel = receive_channels_[ssrc];
+  DCHECK(channel != nullptr);
+  DCHECK(receive_streams_.find(ssrc) == receive_streams_.end());
   // If we are hooked up to a webrtc::Call, create an AudioReceiveStream too.
-  if (call_ && options_.combined_audio_video_bwe.GetWithDefaultIfUnset(false)) {
-    DCHECK(receive_streams_.find(ssrc) == receive_streams_.end());
-    webrtc::AudioReceiveStream::Config config;
-    config.rtp.remote_ssrc = ssrc;
-    config.rtp.extensions = recv_rtp_extensions_;
-    webrtc::AudioReceiveStream* s = call_->CreateAudioReceiveStream(config);
-    receive_streams_.insert(std::make_pair(ssrc, s));
+  if (!call_) {
+    return;
   }
+  webrtc::AudioReceiveStream::Config config;
+  config.rtp.remote_ssrc = ssrc;
+  // Only add RTP extensions if we support combined A/V BWE.
+  if (options_.combined_audio_video_bwe.GetWithDefaultIfUnset(false)) {
+    config.rtp.extensions = recv_rtp_extensions_;
+  }
+  config.voe_channel_id = channel->channel();
+  config.sync_group = receive_stream_params_[ssrc].sync_label;
+  webrtc::AudioReceiveStream* s = call_->CreateAudioReceiveStream(config);
+  receive_streams_.insert(std::make_pair(ssrc, s));
 }
 
 void WebRtcVoiceMediaChannel::TryRemoveAudioRecvStream(uint32 ssrc) {
diff --git a/talk/media/webrtc/webrtcvoiceengine.h b/talk/media/webrtc/webrtcvoiceengine.h
index 35f2dbc93e..c28721f899 100644
--- a/talk/media/webrtc/webrtcvoiceengine.h
+++ b/talk/media/webrtc/webrtcvoiceengine.h
@@ -444,10 +444,10 @@ class WebRtcVoiceMediaChannel : public VoiceMediaChannel,
   // case it will only be there if a non-zero default_receive_ssrc_ is set.
   ChannelMap receive_channels_;  // for multiple sources
   std::map<uint32, webrtc::AudioReceiveStream*> receive_streams_;
+  std::map<uint32, StreamParams> receive_stream_params_;
   // receive_channels_ can be read from WebRtc callback thread.  Access from
   // the WebRtc thread must be synchronized with edits on the worker thread.
   // Reads on the worker thread are ok.
-  //
   std::vector<RtpHeaderExtension> receive_extensions_;
   std::vector<webrtc::RtpExtension> recv_rtp_extensions_;
 
diff --git a/talk/media/webrtc/webrtcvoiceengine_unittest.cc b/talk/media/webrtc/webrtcvoiceengine_unittest.cc
index 0dc7b510ee..77f437c60a 100644
--- a/talk/media/webrtc/webrtcvoiceengine_unittest.cc
+++ b/talk/media/webrtc/webrtcvoiceengine_unittest.cc
@@ -3353,6 +3353,34 @@ TEST(WebRtcVoiceEngineTest, CoInitialize) {
 }
 #endif
 
+TEST_F(WebRtcVoiceEngineTestFake, SetsSyncGroupFromSyncLabel) {
+  cricket::FakeCall call(webrtc::Call::Config(nullptr));
+  const uint32 kAudioSsrc = 123;
+  const std::string kSyncLabel = "AvSyncLabel";
+
+  EXPECT_TRUE(SetupEngine());
+  cricket::WebRtcVoiceMediaChannel* media_channel =
+      static_cast<cricket::WebRtcVoiceMediaChannel*>(channel_);
+  media_channel->SetCall(&call);
+  cricket::StreamParams sp = cricket::StreamParams::CreateLegacy(kAudioSsrc);
+  sp.sync_label = kSyncLabel;
+  // Creating two channels to make sure that sync label is set properly for both
+  // the default voice channel and following ones.
+  EXPECT_TRUE(channel_->AddRecvStream(sp));
+  sp.ssrcs[0] += 1;
+  EXPECT_TRUE(channel_->AddRecvStream(sp));
+
+  ASSERT_EQ(2, call.GetAudioReceiveStreams().size());
+  EXPECT_EQ(kSyncLabel,
+            call.GetAudioReceiveStream(kAudioSsrc)->GetConfig().sync_group)
+      << "SyncGroup should be set based on sync_label";
+  EXPECT_EQ(kSyncLabel,
+            call.GetAudioReceiveStream(kAudioSsrc + 1)->GetConfig().sync_group)
+      << "SyncGroup should be set based on sync_label";
+
+  media_channel->SetCall(nullptr);
+}
+
 TEST_F(WebRtcVoiceEngineTestFake, ChangeCombinedBweOption_Call) {
   // Test that changing the combined_audio_video_bwe option results in the
   // expected state changes on an associated Call.
@@ -3363,27 +3391,45 @@ TEST_F(WebRtcVoiceEngineTestFake, ChangeCombinedBweOption_Call) {
   EXPECT_TRUE(SetupEngine());
   cricket::WebRtcVoiceMediaChannel* media_channel =
       static_cast<cricket::WebRtcVoiceMediaChannel*>(channel_);
+  const auto& rtp_extensions = engine_.rtp_header_extensions();
+  media_channel->SetRecvRtpHeaderExtensions(rtp_extensions);
   media_channel->SetCall(&call);
   EXPECT_TRUE(media_channel->AddRecvStream(
       cricket::StreamParams::CreateLegacy(kAudioSsrc1)));
   EXPECT_TRUE(media_channel->AddRecvStream(
       cricket::StreamParams::CreateLegacy(kAudioSsrc2)));
 
-  // Combined BWE should not be set up yet.
-  EXPECT_EQ(0, call.GetAudioReceiveStreams().size());
+  // Combined BWE should not be set up yet (no RTP extensions).
+  EXPECT_EQ(2, call.GetAudioReceiveStreams().size());
+  EXPECT_TRUE(call.GetAudioReceiveStream(kAudioSsrc1)
+                  ->GetConfig()
+                  .rtp.extensions.empty());
+  EXPECT_TRUE(call.GetAudioReceiveStream(kAudioSsrc2)
+                  ->GetConfig()
+                  .rtp.extensions.empty());
 
   // Enable combined BWE option - now it should be set up.
   cricket::AudioOptions options;
   options.combined_audio_video_bwe.Set(true);
   EXPECT_TRUE(media_channel->SetOptions(options));
   EXPECT_EQ(2, call.GetAudioReceiveStreams().size());
-  EXPECT_NE(nullptr, call.GetAudioReceiveStream(kAudioSsrc1));
-  EXPECT_NE(nullptr, call.GetAudioReceiveStream(kAudioSsrc2));
+  EXPECT_FALSE(call.GetAudioReceiveStream(kAudioSsrc1)
+                   ->GetConfig()
+                   .rtp.extensions.empty());
+  EXPECT_FALSE(call.GetAudioReceiveStream(kAudioSsrc2)
+                   ->GetConfig()
+                   .rtp.extensions.empty());
 
   // Disable combined BWE option - should be disabled again.
   options.combined_audio_video_bwe.Set(false);
   EXPECT_TRUE(media_channel->SetOptions(options));
-  EXPECT_EQ(0, call.GetAudioReceiveStreams().size());
+  EXPECT_EQ(2, call.GetAudioReceiveStreams().size());
+  EXPECT_TRUE(call.GetAudioReceiveStream(kAudioSsrc1)
+                  ->GetConfig()
+                  .rtp.extensions.empty());
+  EXPECT_TRUE(call.GetAudioReceiveStream(kAudioSsrc2)
+                  ->GetConfig()
+                  .rtp.extensions.empty());
 
   media_channel->SetCall(nullptr);
 }
@@ -3620,4 +3666,3 @@ TEST_F(WebRtcVoiceEngineTestFake, AssociateChannelResetUponDeleteChannnel) {
   EXPECT_TRUE(channel_->RemoveSendStream(2));
   EXPECT_EQ(voe_.GetAssociateSendChannel(recv_ch), -1);
 }
-
diff --git a/webrtc/audio_receive_stream.h b/webrtc/audio_receive_stream.h
index c68d2647f2..5e200785a9 100644
--- a/webrtc/audio_receive_stream.h
+++ b/webrtc/audio_receive_stream.h
@@ -43,6 +43,16 @@ class AudioReceiveStream {
       std::vector<RtpExtension> extensions;
     } rtp;
 
+    // Underlying VoiceEngine handle, used to map AudioReceiveStream to
+    // lower-level components. Temporarily used while VoiceEngine channels are
+    // created outside of Call.
+    int voe_channel_id = -1;
+
+    // Identifier for an A/V synchronization group. Empty string to disable.
+    // TODO(pbos): Synchronize streams in a sync group, not just one video
+    // stream to one audio stream. Tracked by issue webrtc:4762.
+    std::string sync_group;
+
     // Decoders for every payload that we can receive. Call owns the
     // AudioDecoder instances once the Config is submitted to
     // Call::CreateReceiveStream().
diff --git a/webrtc/video/audio_receive_stream.cc b/webrtc/video/audio_receive_stream.cc
index 332d95b903..ef0eb783e2 100644
--- a/webrtc/video/audio_receive_stream.cc
+++ b/webrtc/video/audio_receive_stream.cc
@@ -34,6 +34,9 @@ std::string AudioReceiveStream::Config::Rtp::ToString() const {
 std::string AudioReceiveStream::Config::ToString() const {
   std::stringstream ss;
   ss << "{rtp: " << rtp.ToString();
+  ss << ", voe_channel_id: " << voe_channel_id;
+  if (!sync_group.empty())
+    ss << ", sync_group: " << sync_group;
   ss << '}';
   return ss.str();
 }
@@ -45,6 +48,7 @@ AudioReceiveStream::AudioReceiveStream(
     : remote_bitrate_estimator_(remote_bitrate_estimator),
       config_(config),
       rtp_header_parser_(RtpHeaderParser::Create()) {
+  DCHECK(config.voe_channel_id != -1);
   DCHECK(remote_bitrate_estimator_ != nullptr);
   DCHECK(rtp_header_parser_ != nullptr);
   for (const auto& ext : config.rtp.extensions) {
diff --git a/webrtc/video/bitrate_estimator_tests.cc b/webrtc/video/bitrate_estimator_tests.cc
index a0fdc92b61..872fe44803 100644
--- a/webrtc/video/bitrate_estimator_tests.cc
+++ b/webrtc/video/bitrate_estimator_tests.cc
@@ -204,6 +204,10 @@ class BitrateEstimatorTest : public test::CallTest {
       if (receive_audio) {
         AudioReceiveStream::Config receive_config;
         receive_config.rtp.remote_ssrc = test_->send_config_.rtp.ssrcs[0];
+        // Bogus non-default id to prevent hitting a DCHECK when creating the
+        // AudioReceiveStream. Every receive stream has to correspond to an
+        // underlying channel id.
+        receive_config.voe_channel_id = 0;
         receive_config.rtp.extensions.push_back(
             RtpExtension(RtpExtension::kAbsSendTime, kASTExtensionId));
         audio_receive_stream_ = test_->receiver_call_->CreateAudioReceiveStream(
diff --git a/webrtc/video/call.cc b/webrtc/video/call.cc
index 39eb7d6bbc..a73932509b 100644
--- a/webrtc/video/call.cc
+++ b/webrtc/video/call.cc
@@ -109,6 +109,9 @@ class Call : public webrtc::Call, public PacketReceiver {
   void SetBitrateControllerConfig(
       const webrtc::Call::Config::BitrateConfig& bitrate_config);
 
+  void ConfigureSync(const std::string& sync_group)
+      EXCLUSIVE_LOCKS_REQUIRED(receive_crit_);
+
   const int num_cpu_cores_;
   const rtc::scoped_ptr<ProcessThread> module_process_thread_;
   const rtc::scoped_ptr<ChannelGroup> channel_group_;
@@ -130,6 +133,8 @@ class Call : public webrtc::Call, public PacketReceiver {
       GUARDED_BY(receive_crit_);
   std::set<VideoReceiveStream*> video_receive_streams_
       GUARDED_BY(receive_crit_);
+  std::map<std::string, AudioReceiveStream*> sync_stream_mapping_
+      GUARDED_BY(receive_crit_);
 
   rtc::scoped_ptr<RWLockWrapper> send_crit_;
   std::map<uint32_t, VideoSendStream*> video_send_ssrcs_ GUARDED_BY(send_crit_);
@@ -219,6 +224,7 @@ webrtc::AudioReceiveStream* Call::CreateAudioReceiveStream(
     DCHECK(audio_receive_ssrcs_.find(config.rtp.remote_ssrc) ==
         audio_receive_ssrcs_.end());
     audio_receive_ssrcs_[config.rtp.remote_ssrc] = receive_stream;
+    ConfigureSync(config.sync_group);
   }
   return receive_stream;
 }
@@ -234,6 +240,13 @@ void Call::DestroyAudioReceiveStream(
     size_t num_deleted = audio_receive_ssrcs_.erase(
         audio_receive_stream->config().rtp.remote_ssrc);
     DCHECK(num_deleted == 1);
+    const std::string& sync_group = audio_receive_stream->config().sync_group;
+    const auto it = sync_stream_mapping_.find(sync_group);
+    if (it != sync_stream_mapping_.end() &&
+        it->second == audio_receive_stream) {
+      sync_stream_mapping_.erase(it);
+      ConfigureSync(sync_group);
+    }
   }
   delete audio_receive_stream;
 }
@@ -324,8 +337,11 @@ webrtc::VideoReceiveStream* Call::CreateVideoReceiveStream(
     video_receive_ssrcs_[it->second.ssrc] = receive_stream;
   video_receive_streams_.insert(receive_stream);
 
+  ConfigureSync(config.sync_group);
+
   if (!network_enabled_)
     receive_stream->SignalNetworkState(kNetworkDown);
+
   return receive_stream;
 }
 
@@ -333,7 +349,6 @@ void Call::DestroyVideoReceiveStream(
     webrtc::VideoReceiveStream* receive_stream) {
   TRACE_EVENT0("webrtc", "Call::DestroyVideoReceiveStream");
   DCHECK(receive_stream != nullptr);
-
   VideoReceiveStream* receive_stream_impl = nullptr;
   {
     WriteLockScoped write_lock(*receive_crit_);
@@ -351,8 +366,9 @@ void Call::DestroyVideoReceiveStream(
       }
     }
     video_receive_streams_.erase(receive_stream_impl);
+    CHECK(receive_stream_impl != nullptr);
+    ConfigureSync(receive_stream_impl->config().sync_group);
   }
-  CHECK(receive_stream_impl != nullptr);
   delete receive_stream_impl;
 }
 
@@ -428,6 +444,54 @@ void Call::SignalNetworkState(NetworkState state) {
   }
 }
 
+void Call::ConfigureSync(const std::string& sync_group) {
+  // Set sync only if there was no previous one.
+  if (config_.voice_engine == nullptr || sync_group.empty())
+    return;
+
+  AudioReceiveStream* sync_audio_stream = nullptr;
+  // Find existing audio stream.
+  const auto it = sync_stream_mapping_.find(sync_group);
+  if (it != sync_stream_mapping_.end()) {
+    sync_audio_stream = it->second;
+  } else {
+    // No configured audio stream, see if we can find one.
+    for (const auto& kv : audio_receive_ssrcs_) {
+      if (kv.second->config().sync_group == sync_group) {
+        if (sync_audio_stream != nullptr) {
+          LOG(LS_WARNING) << "Attempting to sync more than one audio stream "
+                             "within the same sync group. This is not "
+                             "supported in the current implementation.";
+          break;
+        }
+        sync_audio_stream = kv.second;
+      }
+    }
+  }
+  if (sync_audio_stream)
+    sync_stream_mapping_[sync_group] = sync_audio_stream;
+  size_t num_synced_streams = 0;
+  for (VideoReceiveStream* video_stream : video_receive_streams_) {
+    if (video_stream->config().sync_group != sync_group)
+      continue;
+    ++num_synced_streams;
+    if (num_synced_streams > 1) {
+      // TODO(pbos): Support synchronizing more than one A/V pair.
+      // https://code.google.com/p/webrtc/issues/detail?id=4762
+      LOG(LS_WARNING) << "Attempting to sync more than one audio/video pair "
+                         "within the same sync group. This is not supported in "
+                         "the current implementation.";
+    }
+    // Only sync the first A/V pair within this sync group.
+    if (sync_audio_stream != nullptr && num_synced_streams == 1) {
+      video_stream->SetSyncChannel(config_.voice_engine,
+                                   sync_audio_stream->config().voe_channel_id);
+    } else {
+      video_stream->SetSyncChannel(config_.voice_engine, -1);
+    }
+  }
+}
+
 PacketReceiver::DeliveryStatus Call::DeliverRtcp(MediaType media_type,
                                                  const uint8_t* packet,
                                                  size_t length) {
diff --git a/webrtc/video/call_perf_tests.cc b/webrtc/video/call_perf_tests.cc
index 65e9d96e7b..fd159f54cb 100644
--- a/webrtc/video/call_perf_tests.cc
+++ b/webrtc/video/call_perf_tests.cc
@@ -44,7 +44,7 @@ namespace webrtc {
 
 class CallPerfTest : public test::CallTest {
  protected:
-  void TestAudioVideoSync(bool fec);
+  void TestAudioVideoSync(bool fec, bool create_audio_first);
 
   void TestCpuOveruse(LoadObserver::Load tested_load, int encode_delay_ms);
 
@@ -189,7 +189,8 @@ class VideoRtcpAndSyncObserver : public SyncRtcpObserver, public VideoRenderer {
   int64_t first_time_in_sync_;
 };
 
-void CallPerfTest::TestAudioVideoSync(bool fec) {
+void CallPerfTest::TestAudioVideoSync(bool fec, bool create_audio_first) {
+  const char* kSyncGroup = "av_sync";
   class AudioPacketReceiver : public PacketReceiver {
    public:
     AudioPacketReceiver(int channel, VoENetwork* voe_network)
@@ -269,9 +270,23 @@ void CallPerfTest::TestAudioVideoSync(bool fec) {
   }
   receive_configs_[0].rtp.nack.rtp_history_ms = 1000;
   receive_configs_[0].renderer = &observer;
-  receive_configs_[0].audio_channel_id = channel;
+  receive_configs_[0].sync_group = kSyncGroup;
 
-  CreateStreams();
+  AudioReceiveStream::Config audio_config;
+  audio_config.voe_channel_id = channel;
+  audio_config.sync_group = kSyncGroup;
+
+  AudioReceiveStream* audio_receive_stream = nullptr;
+
+  if (create_audio_first) {
+    audio_receive_stream =
+        receiver_call_->CreateAudioReceiveStream(audio_config);
+    CreateStreams();
+  } else {
+    CreateStreams();
+    audio_receive_stream =
+        receiver_call_->CreateAudioReceiveStream(audio_config);
+  }
 
   CreateFrameGeneratorCapturer();
 
@@ -302,15 +317,21 @@ void CallPerfTest::TestAudioVideoSync(bool fec) {
 
   DestroyStreams();
 
+  receiver_call_->DestroyAudioReceiveStream(audio_receive_stream);
+
   VoiceEngine::Delete(voice_engine);
 }
 
-TEST_F(CallPerfTest, PlaysOutAudioAndVideoInSync) {
-  TestAudioVideoSync(false);
+TEST_F(CallPerfTest, PlaysOutAudioAndVideoInSyncWithAudioCreatedFirst) {
+  TestAudioVideoSync(false, true);
+}
+
+TEST_F(CallPerfTest, PlaysOutAudioAndVideoInSyncWithVideoCreatedFirst) {
+  TestAudioVideoSync(false, false);
 }
 
 TEST_F(CallPerfTest, PlaysOutAudioAndVideoInSyncWithFec) {
-  TestAudioVideoSync(true);
+  TestAudioVideoSync(true, false);
 }
 
 void CallPerfTest::TestCaptureNtpTime(const FakeNetworkPipe::Config& net_config,
diff --git a/webrtc/video/video_receive_stream.cc b/webrtc/video/video_receive_stream.cc
index d0c17d649e..d7874fa0df 100644
--- a/webrtc/video/video_receive_stream.cc
+++ b/webrtc/video/video_receive_stream.cc
@@ -47,7 +47,8 @@ std::string VideoReceiveStream::Config::ToString() const {
   ss << ", rtp: " << rtp.ToString();
   ss << ", renderer: " << (renderer != nullptr ? "(renderer)" : "nullptr");
   ss << ", render_delay_ms: " << render_delay_ms;
-  ss << ", audio_channel_id: " << audio_channel_id;
+  if (!sync_group.empty())
+    ss << ", sync_group: " << sync_group;
   ss << ", pre_decode_callback: "
      << (pre_decode_callback != nullptr ? "(EncodedFrameObserver)" : "nullptr");
   ss << ", pre_render_callback: "
@@ -135,8 +136,7 @@ VideoReceiveStream::VideoReceiveStream(int num_cpu_cores,
       config_(config),
       clock_(Clock::GetRealTimeClock()),
       channel_group_(channel_group),
-      channel_id_(channel_id),
-      voe_sync_interface_(nullptr) {
+      channel_id_(channel_id) {
   CHECK(channel_group_->CreateReceiveChannel(channel_id_, 0, base_channel_id,
                                              &transport_adapter_, num_cpu_cores,
                                              true));
@@ -242,11 +242,6 @@ VideoReceiveStream::VideoReceiveStream(int num_cpu_cores,
   incoming_video_stream_->SetExternalCallback(this);
   vie_channel_->SetIncomingVideoStream(incoming_video_stream_.get());
 
-  if (voice_engine && config_.audio_channel_id != -1) {
-    voe_sync_interface_ = VoEVideoSync::GetInterface(voice_engine);
-    vie_channel_->SetVoiceChannel(config.audio_channel_id, voe_sync_interface_);
-  }
-
   if (config.pre_decode_callback)
     vie_channel_->RegisterPreDecodeImageCallback(&encoded_frame_proxy_);
   vie_channel_->RegisterPreRenderCallback(this);
@@ -260,10 +255,6 @@ VideoReceiveStream::~VideoReceiveStream() {
   for (size_t i = 0; i < config_.decoders.size(); ++i)
     vie_channel_->DeRegisterExternalDecoder(config_.decoders[i].payload_type);
 
-  if (voe_sync_interface_ != nullptr) {
-    vie_channel_->SetVoiceChannel(-1, nullptr);
-    voe_sync_interface_->Release();
-  }
   vie_channel_->RegisterCodecObserver(nullptr);
   vie_channel_->RegisterReceiveChannelRtpStatisticsCallback(nullptr);
   vie_channel_->RegisterReceiveChannelRtcpStatisticsCallback(nullptr);
@@ -283,6 +274,17 @@ void VideoReceiveStream::Stop() {
   transport_adapter_.Disable();
 }
 
+void VideoReceiveStream::SetSyncChannel(VoiceEngine* voice_engine,
+                                        int audio_channel_id) {
+  if (voice_engine != nullptr && audio_channel_id != -1) {
+    VoEVideoSync* voe_sync_interface = VoEVideoSync::GetInterface(voice_engine);
+    vie_channel_->SetVoiceChannel(audio_channel_id, voe_sync_interface);
+    voe_sync_interface->Release();
+  } else {
+    vie_channel_->SetVoiceChannel(-1, nullptr);
+  }
+}
+
 VideoReceiveStream::Stats VideoReceiveStream::GetStats() const {
   return stats_proxy_->GetStats();
 }
diff --git a/webrtc/video/video_receive_stream.h b/webrtc/video/video_receive_stream.h
index e9df849c0c..af341a9b11 100644
--- a/webrtc/video/video_receive_stream.h
+++ b/webrtc/video/video_receive_stream.h
@@ -57,11 +57,15 @@ class VideoReceiveStream : public webrtc::VideoReceiveStream,
   int RenderFrame(const uint32_t /*stream_id*/,
                   const VideoFrame& video_frame) override;
 
+  const Config& config() const { return config_; }
+
   void SignalNetworkState(Call::NetworkState state);
 
   bool DeliverRtcp(const uint8_t* packet, size_t length);
   bool DeliverRtp(const uint8_t* packet, size_t length);
 
+  void SetSyncChannel(VoiceEngine* voice_engine, int audio_channel_id);
+
  private:
   void SetRtcpMode(newapi::RtcpMode mode);
 
@@ -76,8 +80,6 @@ class VideoReceiveStream : public webrtc::VideoReceiveStream,
   ViEChannel* vie_channel_;
   rtc::scoped_ptr<IncomingVideoStream> incoming_video_stream_;
 
-  VoEVideoSync* voe_sync_interface_;
-
   rtc::scoped_ptr<ReceiveStatisticsProxy> stats_proxy_;
 };
 }  // namespace internal
diff --git a/webrtc/video_engine/vie_sync_module.cc b/webrtc/video_engine/vie_sync_module.cc
index c5a774462d..9c0aab37d5 100644
--- a/webrtc/video_engine/vie_sync_module.cc
+++ b/webrtc/video_engine/vie_sync_module.cc
@@ -67,6 +67,13 @@ int ViESyncModule::ConfigureSync(int voe_channel_id,
                                  RtpRtcp* video_rtcp_module,
                                  RtpReceiver* video_receiver) {
   CriticalSectionScoped cs(data_cs_.get());
+  // Prevent expensive no-ops.
+  if (voe_channel_id_ == voe_channel_id &&
+      voe_sync_interface_ == voe_sync_interface &&
+      video_receiver_ == video_receiver &&
+      video_rtp_rtcp_ == video_rtcp_module) {
+    return 0;
+  }
   voe_channel_id_ = voe_channel_id;
   voe_sync_interface_ = voe_sync_interface;
   video_receiver_ = video_receiver;
diff --git a/webrtc/video_receive_stream.h b/webrtc/video_receive_stream.h
index a8003c1aa9..11033812d5 100644
--- a/webrtc/video_receive_stream.h
+++ b/webrtc/video_receive_stream.h
@@ -145,10 +145,10 @@ class VideoReceiveStream {
     // Only valid if 'renderer' is set.
     int render_delay_ms = 10;
 
-    // Audio channel corresponding to this video stream, used for audio/video
-    // synchronization. 'audio_channel_id' is ignored if no VoiceEngine is set
-    // when creating the VideoEngine instance. '-1' disables a/v sync.
-    int audio_channel_id = -1;
+    // Identifier for an A/V synchronization group. Empty string to disable.
+    // TODO(pbos): Synchronize streams in a sync group, not just video streams
+    // to one of the audio streams.
+    std::string sync_group;
 
     // Called for each incoming video frame, i.e. in encoded state. E.g. used
     // when