diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 1988287d4c..79c84f95c5 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -241,6 +241,10 @@ 'video_coding/codecs/test/videoprocessor_unittest.cc', 'video_coding/codecs/vp8/default_temporal_layers_unittest.cc', 'video_coding/codecs/vp8/reference_picture_selection_unittest.cc', + 'video_coding/codecs/vp8/screenshare_layers_unittest.cc', + 'video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc', + 'video_coding/codecs/vp8/simulcast_unittest.cc', + 'video_coding/codecs/vp8/simulcast_unittest.h', 'video_coding/main/interface/mock/mock_vcm_callbacks.h', 'video_coding/main/source/decoding_state_unittest.cc', 'video_coding/main/source/jitter_buffer_unittest.cc', diff --git a/webrtc/modules/video_coding/BUILD.gn b/webrtc/modules/video_coding/BUILD.gn index 706e9d99a7..7a7b0f0b8c 100644 --- a/webrtc/modules/video_coding/BUILD.gn +++ b/webrtc/modules/video_coding/BUILD.gn @@ -125,13 +125,18 @@ source_set("webrtc_vp8") { sources = [ "codecs/vp8/default_temporal_layers.cc", "codecs/vp8/default_temporal_layers.h", + "codecs/vp8/include/vp8.h", + "codecs/vp8/include/vp8_common_types.h", "codecs/vp8/realtime_temporal_layers.cc", "codecs/vp8/reference_picture_selection.cc", "codecs/vp8/reference_picture_selection.h", - "codecs/vp8/include/vp8.h", - "codecs/vp8/include/vp8_common_types.h", + "codecs/vp8/screenshare_layers.cc", + "codecs/vp8/screenshare_layers.h", + "codecs/vp8/simulcast_encoder_adapter.cc", + "codecs/vp8/simulcast_encoder_adapter.h", "codecs/vp8/temporal_layers.h", "codecs/vp8/vp8_factory.cc", + "codecs/vp8/vp8_factory.h", "codecs/vp8/vp8_impl.cc", "codecs/vp8/vp8_impl.h", ] @@ -159,6 +164,12 @@ source_set("webrtc_vp8") { "//third_party/libvpx", ] } + if (rtc_build_libyuv) { + deps += [ "//third_party/libyuv" ] + } else { + # Need to add a directory normally exported by libyuv. + include_dirs += [ "//third_party/libyuv/include" ] + } } source_set("webrtc_vp9") { diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc new file mode 100644 index 0000000000..c31fe142ea --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc @@ -0,0 +1,159 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#include "webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h" + +#include + +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#include "webrtc/modules/video_coding/codecs/interface/video_codec_interface.h" + +namespace webrtc { + +enum { kOneSecond90Khz = 90000 }; + +ScreenshareLayers::ScreenshareLayers(int num_temporal_layers, + uint8_t initial_tl0_pic_idx, + FrameDropper* tl0_frame_dropper, + FrameDropper* tl1_frame_dropper) + : tl0_frame_dropper_(tl0_frame_dropper), + tl1_frame_dropper_(tl1_frame_dropper), + number_of_temporal_layers_(num_temporal_layers), + last_base_layer_sync_(false), + tl0_pic_idx_(initial_tl0_pic_idx), + active_layer_(0), + framerate_(5), + last_sync_timestamp_(-1) { + assert(num_temporal_layers > 0); + assert(num_temporal_layers <= 2); + assert(tl0_frame_dropper && tl1_frame_dropper); +} + +int ScreenshareLayers::CurrentLayerId() const { + // Codec does not use temporal layers for screenshare. + return 0; +} + +int ScreenshareLayers::EncodeFlags(uint32_t timestamp) { + if (number_of_temporal_layers_ <= 1) { + // No flags needed for 1 layer screenshare. + return 0; + } + CalculateFramerate(timestamp); + int flags = 0; + // Note that ARF on purpose isn't used in this scheme since it is allocated + // for the last key frame to make key frame caching possible. + if (tl0_frame_dropper_->DropFrame()) { + // Must drop TL0, encode TL1 instead. + if (tl1_frame_dropper_->DropFrame()) { + // Must drop both TL0 and TL1. + flags = -1; + } else { + active_layer_ = 1; + if (TimeToSync(timestamp)) { + last_sync_timestamp_ = timestamp; + // Allow predicting from only TL0 to allow participants to switch to the + // high bitrate stream. This means predicting only from the LAST + // reference frame, but only updating GF to not corrupt TL0. + flags = VP8_EFLAG_NO_REF_ARF; + flags |= VP8_EFLAG_NO_REF_GF; + flags |= VP8_EFLAG_NO_UPD_ARF; + flags |= VP8_EFLAG_NO_UPD_LAST; + } else { + // Allow predicting from both TL0 and TL1. + flags = VP8_EFLAG_NO_REF_ARF; + flags |= VP8_EFLAG_NO_UPD_ARF; + flags |= VP8_EFLAG_NO_UPD_LAST; + } + } + } else { + active_layer_ = 0; + // Since this is TL0 we only allow updating and predicting from the LAST + // reference frame. + flags = VP8_EFLAG_NO_UPD_GF; + flags |= VP8_EFLAG_NO_UPD_ARF; + flags |= VP8_EFLAG_NO_REF_GF; + flags |= VP8_EFLAG_NO_REF_ARF; + } + // Make sure both frame droppers leak out bits. + tl0_frame_dropper_->Leak(framerate_); + tl1_frame_dropper_->Leak(framerate_); + return flags; +} + +bool ScreenshareLayers::ConfigureBitrates(int bitrate_kbit, + int max_bitrate_kbit, + int framerate, + vpx_codec_enc_cfg_t* cfg) { + if (framerate > 0) { + framerate_ = framerate; + } + tl0_frame_dropper_->SetRates(bitrate_kbit, framerate_); + tl1_frame_dropper_->SetRates(max_bitrate_kbit, framerate_); + return true; +} + +void ScreenshareLayers::FrameEncoded(unsigned int size, uint32_t timestamp) { + if (active_layer_ == 0) { + tl0_frame_dropper_->Fill(size, true); + } + tl1_frame_dropper_->Fill(size, true); +} + +void ScreenshareLayers::PopulateCodecSpecific(bool base_layer_sync, + CodecSpecificInfoVP8 *vp8_info, + uint32_t timestamp) { + if (number_of_temporal_layers_ == 1) { + vp8_info->temporalIdx = kNoTemporalIdx; + vp8_info->layerSync = false; + vp8_info->tl0PicIdx = kNoTl0PicIdx; + } else { + vp8_info->temporalIdx = active_layer_; + if (base_layer_sync) { + vp8_info->temporalIdx = 0; + last_sync_timestamp_ = timestamp; + } else if (last_base_layer_sync_ && vp8_info->temporalIdx != 0) { + // Regardless of pattern the frame after a base layer sync will always + // be a layer sync. + last_sync_timestamp_ = timestamp; + } + vp8_info->layerSync = (last_sync_timestamp_ == timestamp); + if (vp8_info->temporalIdx == 0) { + tl0_pic_idx_++; + } + last_base_layer_sync_ = base_layer_sync; + vp8_info->tl0PicIdx = tl0_pic_idx_; + } +} + +bool ScreenshareLayers::TimeToSync(uint32_t timestamp) const { + const uint32_t timestamp_diff = timestamp - last_sync_timestamp_; + return last_sync_timestamp_ < 0 || timestamp_diff > kOneSecond90Khz; +} + +void ScreenshareLayers::CalculateFramerate(uint32_t timestamp) { + timestamp_list_.push_front(timestamp); + // Remove timestamps older than 1 second from the list. + uint32_t timestamp_diff = timestamp - timestamp_list_.back(); + while (timestamp_diff > kOneSecond90Khz) { + timestamp_list_.pop_back(); + timestamp_diff = timestamp - timestamp_list_.back(); + } + // If we have encoded frames within the last second, that number of frames + // is a reasonable first estimate of the framerate. + framerate_ = timestamp_list_.size(); + if (timestamp_diff > 0) { + // Estimate the framerate by dividing the number of timestamp diffs with + // the sum of the timestamp diffs (with rounding). + framerate_ = (kOneSecond90Khz * (timestamp_list_.size() - 1) + + timestamp_diff / 2) / timestamp_diff; + } +} +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h new file mode 100644 index 0000000000..ce974894fb --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ + +#include + +#include "webrtc/modules/video_coding/codecs/vp8/temporal_layers.h" +#include "webrtc/modules/video_coding/utility/include/frame_dropper.h" +#include "webrtc/typedefs.h" + +// libvpx forward declaration. +typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t; + +namespace webrtc { + +struct CodecSpecificInfoVP8; + +class ScreenshareLayers : public TemporalLayers { + public: + ScreenshareLayers(int num_temporal_layers, + uint8_t initial_tl0_pic_idx, + FrameDropper* tl0_frame_dropper, + FrameDropper* tl1_frame_dropper); + virtual ~ScreenshareLayers() {} + + // Returns the recommended VP8 encode flags needed. May refresh the decoder + // and/or update the reference buffers. + virtual int EncodeFlags(uint32_t timestamp); + + virtual bool ConfigureBitrates(int bitrate_kbit, + int max_bitrate_kbit, + int framerate, + vpx_codec_enc_cfg_t* cfg); + + virtual void PopulateCodecSpecific(bool base_layer_sync, + CodecSpecificInfoVP8 *vp8_info, + uint32_t timestamp); + + virtual void FrameEncoded(unsigned int size, uint32_t timestamp); + + virtual int CurrentLayerId() const; + + private: + void CalculateFramerate(uint32_t timestamp); + bool TimeToSync(uint32_t timestamp) const; + + FrameDropper* tl0_frame_dropper_; + FrameDropper* tl1_frame_dropper_; + int number_of_temporal_layers_; + bool last_base_layer_sync_; + uint8_t tl0_pic_idx_; + int active_layer_; + std::list timestamp_list_; + int framerate_; + int64_t last_sync_timestamp_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SCREENSHARE_LAYERS_H_ diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc new file mode 100644 index 0000000000..c3090e10db --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "gtest/gtest.h" +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#include "webrtc/modules/video_coding/codecs/interface/video_codec_interface.h" +#include "webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h" +#include "webrtc/modules/video_coding/utility/include/mock/mock_frame_dropper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +using ::testing::_; +using ::testing::NiceMock; +using ::testing::Return; + +namespace webrtc { + +enum { kTimestampDelta5Fps = 90000 / 5 }; // 5 frames per second at 90 kHz. +enum { kTimestampDelta30Fps = 90000 / 30 }; // 30 frames per second at 90 kHz. +enum { kFrameSize = 2500 }; + +const int kFlagsTL0 = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; +const int kFlagsTL1 = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; +const int kFlagsTL1Sync = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + +class ScreenshareLayerTest : public ::testing::Test { + protected: + void SetEncodeExpectations(bool drop_tl0, bool drop_tl1, int framerate) { + EXPECT_CALL(tl0_frame_dropper_, DropFrame()) + .Times(1) + .WillRepeatedly(Return(drop_tl0)); + if (drop_tl0) { + EXPECT_CALL(tl1_frame_dropper_, DropFrame()) + .Times(1) + .WillRepeatedly(Return(drop_tl1)); + } + EXPECT_CALL(tl0_frame_dropper_, Leak(framerate)) + .Times(1); + EXPECT_CALL(tl1_frame_dropper_, Leak(framerate)) + .Times(1); + if (drop_tl0) { + EXPECT_CALL(tl0_frame_dropper_, Fill(_, _)) + .Times(0); + if (drop_tl1) { + EXPECT_CALL(tl1_frame_dropper_, Fill(_, _)) + .Times(0); + } else { + EXPECT_CALL(tl1_frame_dropper_, Fill(kFrameSize, true)) + .Times(1); + } + } else { + EXPECT_CALL(tl0_frame_dropper_, Fill(kFrameSize, true)) + .Times(1); + EXPECT_CALL(tl1_frame_dropper_, Fill(kFrameSize, true)) + .Times(1); + } + } + + void EncodeFrame(uint32_t timestamp, + bool base_sync, + CodecSpecificInfoVP8* vp8_info, + int* flags) { + *flags = layers_->EncodeFlags(timestamp); + layers_->PopulateCodecSpecific(base_sync, vp8_info, timestamp); + layers_->FrameEncoded(kFrameSize, timestamp); + } + + NiceMock tl0_frame_dropper_; + NiceMock tl1_frame_dropper_; + scoped_ptr layers_; +}; + +TEST_F(ScreenshareLayerTest, 1Layer) { + layers_.reset(new ScreenshareLayers(1, 0, &tl0_frame_dropper_, + &tl1_frame_dropper_)); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, 5, NULL)); + int flags = 0; + uint32_t timestamp = 0; + CodecSpecificInfoVP8 vp8_info; + // One layer screenshare should not use the frame dropper as all frames will + // belong to the base layer. + EXPECT_CALL(tl0_frame_dropper_, DropFrame()) + .Times(0); + EXPECT_CALL(tl1_frame_dropper_, DropFrame()) + .Times(0); + flags = layers_->EncodeFlags(timestamp); + EXPECT_EQ(0, flags); + layers_->PopulateCodecSpecific(false, &vp8_info, timestamp); + EXPECT_EQ(static_cast(kNoTemporalIdx), vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + EXPECT_EQ(kNoTl0PicIdx, vp8_info.tl0PicIdx); + layers_->FrameEncoded(kFrameSize, timestamp); + + EXPECT_CALL(tl0_frame_dropper_, DropFrame()) + .Times(0); + EXPECT_CALL(tl1_frame_dropper_, DropFrame()) + .Times(0); + flags = layers_->EncodeFlags(timestamp); + EXPECT_EQ(0, flags); + timestamp += kTimestampDelta5Fps; + layers_->PopulateCodecSpecific(false, &vp8_info, timestamp); + EXPECT_EQ(static_cast(kNoTemporalIdx), vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + EXPECT_EQ(kNoTl0PicIdx, vp8_info.tl0PicIdx); + layers_->FrameEncoded(kFrameSize, timestamp); +} + +TEST_F(ScreenshareLayerTest, 2Layer) { + layers_.reset(new ScreenshareLayers(2, 0, &tl0_frame_dropper_, + &tl1_frame_dropper_)); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, 5, NULL)); + int flags = 0; + uint32_t timestamp = 0; + uint8_t expected_tl0_idx = 0; + CodecSpecificInfoVP8 vp8_info; + SetEncodeExpectations(false, false, 1); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kFlagsTL0, flags); + EXPECT_EQ(0, vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + ++expected_tl0_idx; + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); + + EXPECT_CALL(tl1_frame_dropper_, SetRates(1000, 1)) + .Times(1); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, -1, NULL)); + // Insert 5 frames at 30 fps. All should belong to TL0. + for (int i = 0; i < 5; ++i) { + timestamp += kTimestampDelta30Fps; + // First iteration has a framerate based on a single frame, thus 1. + SetEncodeExpectations(false, false, 30); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(0, vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + ++expected_tl0_idx; + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); + } + // Drop two frames from TL0, thus being coded in TL1. + timestamp += kTimestampDelta30Fps; + SetEncodeExpectations(true, false, 30); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kFlagsTL1Sync, flags); + EXPECT_EQ(1, vp8_info.temporalIdx); + EXPECT_TRUE(vp8_info.layerSync); + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); + + timestamp += kTimestampDelta30Fps; + SetEncodeExpectations(true, false, 30); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kFlagsTL1, flags); + EXPECT_EQ(1, vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); +} + +TEST_F(ScreenshareLayerTest, 2LayersPeriodicSync) { + layers_.reset(new ScreenshareLayers(2, 0, &tl0_frame_dropper_, + &tl1_frame_dropper_)); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, 5, NULL)); + int flags = 0; + uint32_t timestamp = 0; + CodecSpecificInfoVP8 vp8_info; + const int kNumFrames = 10; + const bool kDrops[kNumFrames] = {false, true, true, true, true, + true, true, true, true, true}; + const int kExpectedFramerates[kNumFrames] = {1, 5, 5, 5, 5, 5, 5, 5, 5, 5}; + const bool kExpectedSyncs[kNumFrames] = {false, true, false, false, false, + false, false, true, false, false}; + const int kExpectedTemporalIdx[kNumFrames] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + for (int i = 0; i < kNumFrames; ++i) { + timestamp += kTimestampDelta5Fps; + SetEncodeExpectations(kDrops[i], false, kExpectedFramerates[i]); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kExpectedTemporalIdx[i], vp8_info.temporalIdx); + EXPECT_EQ(kExpectedSyncs[i], vp8_info.layerSync) << "Iteration: " << i; + EXPECT_EQ(1, vp8_info.tl0PicIdx); + } +} + +TEST_F(ScreenshareLayerTest, 2LayersToggling) { + layers_.reset(new ScreenshareLayers(2, 0, &tl0_frame_dropper_, + &tl1_frame_dropper_)); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, 5, NULL)); + int flags = 0; + uint32_t timestamp = 0; + CodecSpecificInfoVP8 vp8_info; + const int kNumFrames = 10; + const bool kDrops[kNumFrames] = {false, true, false, true, false, + true, false, true, false, true}; + const int kExpectedFramerates[kNumFrames] = {1, 5, 5, 5, 5, 5, 5, 5, 5, 5}; + const bool kExpectedSyncs[kNumFrames] = {false, true, false, false, false, + false, false, true, false, false}; + const int kExpectedTemporalIdx[kNumFrames] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + const int kExpectedTl0Idx[kNumFrames] = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5}; + for (int i = 0; i < kNumFrames; ++i) { + timestamp += kTimestampDelta5Fps; + SetEncodeExpectations(kDrops[i], false, kExpectedFramerates[i]); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kExpectedTemporalIdx[i], vp8_info.temporalIdx); + EXPECT_EQ(kExpectedSyncs[i], vp8_info.layerSync) << "Iteration: " << i; + EXPECT_EQ(kExpectedTl0Idx[i], vp8_info.tl0PicIdx); + } +} + +TEST_F(ScreenshareLayerTest, 2LayersBothDrops) { + layers_.reset(new ScreenshareLayers(2, 0, &tl0_frame_dropper_, + &tl1_frame_dropper_)); + EXPECT_TRUE(layers_->ConfigureBitrates(100, 1000, 5, NULL)); + int flags = 0; + uint32_t timestamp = 0; + uint8_t expected_tl0_idx = 0; + CodecSpecificInfoVP8 vp8_info; + SetEncodeExpectations(false, false, 1); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kFlagsTL0, flags); + EXPECT_EQ(0, vp8_info.temporalIdx); + EXPECT_FALSE(vp8_info.layerSync); + ++expected_tl0_idx; + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); + + timestamp += kTimestampDelta5Fps; + SetEncodeExpectations(true, false, 5); + EncodeFrame(timestamp, false, &vp8_info, &flags); + EXPECT_EQ(kFlagsTL1Sync, flags); + EXPECT_EQ(1, vp8_info.temporalIdx); + EXPECT_TRUE(vp8_info.layerSync); + EXPECT_EQ(expected_tl0_idx, vp8_info.tl0PicIdx); + + timestamp += kTimestampDelta5Fps; + SetEncodeExpectations(true, true, 5); + flags = layers_->EncodeFlags(timestamp); + EXPECT_EQ(-1, flags); +} +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.cc b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.cc new file mode 100644 index 0000000000..38a5bddf47 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.cc @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h" + +#include + +// NOTE(ajm): Path provided by gyp. +#include "libyuv/scale.h" // NOLINT + +#include "webrtc/common.h" +#include "webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h" + +namespace { + +const unsigned int kDefaultMinQp = 2; +const unsigned int kDefaultMaxQp = 56; +// Max qp for lowest spatial resolution when doing simulcast. +const unsigned int kLowestResMaxQp = 45; + +uint32_t SumStreamTargetBitrate(int streams, const webrtc::VideoCodec& codec) { + uint32_t bitrate_sum = 0; + for (int i = 0; i < streams; ++i) { + bitrate_sum += codec.simulcastStream[i].targetBitrate; + } + return bitrate_sum; +} + +uint32_t SumStreamMaxBitrate(int streams, const webrtc::VideoCodec& codec) { + uint32_t bitrate_sum = 0; + for (int i = 0; i < streams; ++i) { + bitrate_sum += codec.simulcastStream[i].maxBitrate; + } + return bitrate_sum; +} + +int NumberOfStreams(const webrtc::VideoCodec& codec) { + int streams = + codec.numberOfSimulcastStreams < 1 ? 1 : codec.numberOfSimulcastStreams; + uint32_t simulcast_max_bitrate = SumStreamMaxBitrate(streams, codec); + if (simulcast_max_bitrate == 0) { + streams = 1; + } + return streams; +} + +bool ValidSimulcastResolutions(const webrtc::VideoCodec& codec, + int num_streams) { + if (codec.width != codec.simulcastStream[num_streams - 1].width || + codec.height != codec.simulcastStream[num_streams - 1].height) { + return false; + } + for (int i = 0; i < num_streams; ++i) { + if (codec.width * codec.simulcastStream[i].height != + codec.height * codec.simulcastStream[i].width) { + return false; + } + } + return true; +} + +int VerifyCodec(const webrtc::VideoCodec* inst) { + if (inst == NULL) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // allow zero to represent an unspecified maxBitRate + if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width <= 1 || inst->height <= 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->codecSpecific.VP8.feedbackModeOn && + inst->numberOfSimulcastStreams > 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->codecSpecific.VP8.automaticResizeOn && + inst->numberOfSimulcastStreams > 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +// TL1 FrameDropper's max time to drop frames. +const float kTl1MaxTimeToDropFrames = 20.0f; + +struct ScreenshareTemporalLayersFactory : webrtc::TemporalLayers::Factory { + ScreenshareTemporalLayersFactory() + : tl1_frame_dropper_(kTl1MaxTimeToDropFrames) {} + + virtual ~ScreenshareTemporalLayersFactory() {} + + virtual webrtc::TemporalLayers* Create(int num_temporal_layers, + uint8_t initial_tl0_pic_idx) const { + return new webrtc::ScreenshareLayers(num_temporal_layers, + rand(), + &tl0_frame_dropper_, + &tl1_frame_dropper_); + } + + mutable webrtc::FrameDropper tl0_frame_dropper_; + mutable webrtc::FrameDropper tl1_frame_dropper_; +}; + +} // namespace + +namespace webrtc { + +SimulcastEncoderAdapter::SimulcastEncoderAdapter( + scoped_ptr factory) + : factory_(factory.Pass()), encoded_complete_callback_(NULL) { + memset(&codec_, 0, sizeof(webrtc::VideoCodec)); +} + +SimulcastEncoderAdapter::~SimulcastEncoderAdapter() { + Release(); +} + +int SimulcastEncoderAdapter::Release() { + while (!streaminfos_.empty()) { + VideoEncoder* encoder = streaminfos_.back().encoder; + factory_->Destroy(encoder); + streaminfos_.pop_back(); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int SimulcastEncoderAdapter::InitEncode(const VideoCodec* inst, + int number_of_cores, + size_t max_payload_size) { + if (number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + int ret = VerifyCodec(inst); + if (ret < 0) { + return ret; + } + + ret = Release(); + if (ret < 0) { + return ret; + } + + int number_of_streams = NumberOfStreams(*inst); + bool doing_simulcast = (number_of_streams > 1); + + if (doing_simulcast && !ValidSimulcastResolutions(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + codec_ = *inst; + + // Special mode when screensharing on a single stream. + if (number_of_streams == 1 && inst->mode == kScreensharing) { + screensharing_extra_options_.reset(new Config()); + screensharing_extra_options_->Set( + new ScreenshareTemporalLayersFactory()); + codec_.extra_options = screensharing_extra_options_.get(); + } + + // Create |number_of_streams| of encoder instances and init them. + for (int i = 0; i < number_of_streams; ++i) { + VideoCodec stream_codec; + bool send_stream = true; + if (!doing_simulcast) { + stream_codec = codec_; + stream_codec.numberOfSimulcastStreams = 1; + } else { + bool highest_resolution_stream = (i == (number_of_streams - 1)); + PopulateStreamCodec(&codec_, i, highest_resolution_stream, + &stream_codec, &send_stream); + } + + // TODO(ronghuawu): Remove once this is handled in VP8EncoderImpl. + if (stream_codec.qpMax < kDefaultMinQp) { + stream_codec.qpMax = kDefaultMaxQp; + } + + VideoEncoder* encoder = factory_->Create(); + ret = encoder->InitEncode(&stream_codec, + number_of_cores, + max_payload_size); + if (ret < 0) { + Release(); + return ret; + } + encoder->RegisterEncodeCompleteCallback(this); + streaminfos_.push_back(StreamInfo(encoder, + stream_codec.width, + stream_codec.height, + send_stream)); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int SimulcastEncoderAdapter::Encode( + const I420VideoFrame& input_image, + const CodecSpecificInfo* codec_specific_info, + const std::vector* frame_types) { + if (!Initialized()) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (encoded_complete_callback_ == NULL) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + // All active streams should generate a key frame if + // a key frame is requested by any stream. + bool send_key_frame = false; + if (frame_types) { + for (size_t i = 0; i < frame_types->size(); ++i) { + if (frame_types->at(i) == kKeyFrame) { + send_key_frame = true; + break; + } + } + } + for (size_t stream_idx = 0; stream_idx < streaminfos_.size(); ++stream_idx) { + if (streaminfos_[stream_idx].key_frame_request && + streaminfos_[stream_idx].send_stream) { + send_key_frame = true; + break; + } + } + + int src_width = input_image.width(); + int src_height = input_image.height(); + for (size_t stream_idx = 0; stream_idx < streaminfos_.size(); ++stream_idx) { + std::vector stream_frame_types; + if (send_key_frame) { + stream_frame_types.push_back(kKeyFrame); + streaminfos_[stream_idx].key_frame_request = false; + } else { + stream_frame_types.push_back(kDeltaFrame); + } + + int dst_width = streaminfos_[stream_idx].width; + int dst_height = streaminfos_[stream_idx].height; + // If scaling isn't required, because the input resolution + // matches the destination or the input image is empty (e.g. + // a keyframe request for encoders with internal camera + // sources), pass the image on directly. Otherwise, we'll + // scale it to match what the encoder expects (below). + if ((dst_width == src_width && dst_height == src_height) || + input_image.IsZeroSize()) { + streaminfos_[stream_idx].encoder->Encode(input_image, + codec_specific_info, + &stream_frame_types); + } else { + I420VideoFrame dst_frame; + // Making sure that destination frame is of sufficient size. + // Aligning stride values based on width. + dst_frame.CreateEmptyFrame(dst_width, dst_height, + dst_width, (dst_width + 1) / 2, + (dst_width + 1) / 2); + libyuv::I420Scale(input_image.buffer(kYPlane), + input_image.stride(kYPlane), + input_image.buffer(kUPlane), + input_image.stride(kUPlane), + input_image.buffer(kVPlane), + input_image.stride(kVPlane), + src_width, src_height, + dst_frame.buffer(kYPlane), + dst_frame.stride(kYPlane), + dst_frame.buffer(kUPlane), + dst_frame.stride(kUPlane), + dst_frame.buffer(kVPlane), + dst_frame.stride(kVPlane), + dst_width, dst_height, + libyuv::kFilterBilinear); + dst_frame.set_timestamp(input_image.timestamp()); + dst_frame.set_render_time_ms(input_image.render_time_ms()); + streaminfos_[stream_idx].encoder->Encode(dst_frame, + codec_specific_info, + &stream_frame_types); + } + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int SimulcastEncoderAdapter::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int SimulcastEncoderAdapter::SetChannelParameters(uint32_t packet_loss, + int rtt) { + for (size_t stream_idx = 0; stream_idx < streaminfos_.size(); ++stream_idx) { + streaminfos_[stream_idx].encoder->SetChannelParameters(packet_loss, rtt); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int SimulcastEncoderAdapter::SetRates(uint32_t new_bitrate_kbit, + uint32_t new_framerate) { + if (!Initialized()) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (new_framerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (codec_.maxBitrate > 0 && new_bitrate_kbit > codec_.maxBitrate) { + new_bitrate_kbit = codec_.maxBitrate; + } + if (new_bitrate_kbit < codec_.minBitrate) { + new_bitrate_kbit = codec_.minBitrate; + } + if (codec_.numberOfSimulcastStreams > 0 && + new_bitrate_kbit < codec_.simulcastStream[0].minBitrate) { + new_bitrate_kbit = codec_.simulcastStream[0].minBitrate; + } + codec_.maxFramerate = new_framerate; + + bool send_stream = true; + uint32_t stream_bitrate = 0; + for (size_t stream_idx = 0; stream_idx < streaminfos_.size(); ++stream_idx) { + stream_bitrate = GetStreamBitrate(stream_idx, + new_bitrate_kbit, + &send_stream); + // Need a key frame if we have not sent this stream before. + if (send_stream && !streaminfos_[stream_idx].send_stream) { + streaminfos_[stream_idx].key_frame_request = true; + } + streaminfos_[stream_idx].send_stream = send_stream; + + // TODO(holmer): This is a temporary hack for screensharing, where we + // interpret the startBitrate as the encoder target bitrate. This is + // to allow for a different max bitrate, so if the codec can't meet + // the target we still allow it to overshoot up to the max before dropping + // frames. This hack should be improved. + if (codec_.targetBitrate > 0 && + (codec_.codecSpecific.VP8.numberOfTemporalLayers == 2 || + codec_.simulcastStream[0].numberOfTemporalLayers == 2)) { + stream_bitrate = std::min(codec_.maxBitrate, stream_bitrate); + // TODO(ronghuawu): Can't change max bitrate via the VideoEncoder + // interface. And VP8EncoderImpl doesn't take negative framerate. + // max_bitrate = std::min(codec_.maxBitrate, stream_bitrate); + // new_framerate = -1; + } + + streaminfos_[stream_idx].encoder->SetRates(stream_bitrate, new_framerate); + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t SimulcastEncoderAdapter::Encoded( + const EncodedImage& encodedImage, + const CodecSpecificInfo* codecSpecificInfo, + const RTPFragmentationHeader* fragmentation) { + size_t stream_idx = GetStreamIndex(encodedImage); + + CodecSpecificInfo stream_codec_specific = *codecSpecificInfo; + CodecSpecificInfoVP8* vp8Info = &(stream_codec_specific.codecSpecific.VP8); + vp8Info->simulcastIdx = stream_idx; + + if (streaminfos_[stream_idx].send_stream) { + return encoded_complete_callback_->Encoded(encodedImage, + &stream_codec_specific, + fragmentation); + } else { + EncodedImage dummy_image; + // Required in case padding is applied to dropped frames. + dummy_image._timeStamp = encodedImage._timeStamp; + dummy_image.capture_time_ms_ = encodedImage.capture_time_ms_; + dummy_image._encodedWidth = encodedImage._encodedWidth; + dummy_image._encodedHeight = encodedImage._encodedHeight; + dummy_image._length = 0; + dummy_image._frameType = kSkipFrame; + vp8Info->keyIdx = kNoKeyIdx; + return encoded_complete_callback_->Encoded(dummy_image, + &stream_codec_specific, NULL); + } +} + +uint32_t SimulcastEncoderAdapter::GetStreamBitrate(int stream_idx, + uint32_t new_bitrate_kbit, + bool* send_stream) const { + if (streaminfos_.size() == 1) { + *send_stream = true; + return new_bitrate_kbit; + } + + // The bitrate needed to start sending this stream is given by the + // minimum bitrate allowed for encoding this stream, plus the sum target + // rates of all lower streams. + uint32_t sum_target_lower_streams = + SumStreamTargetBitrate(stream_idx, codec_); + uint32_t bitrate_to_send_this_layer = + codec_.simulcastStream[stream_idx].minBitrate + sum_target_lower_streams; + if (new_bitrate_kbit >= bitrate_to_send_this_layer) { + // We have enough bandwidth to send this stream. + *send_stream = true; + // Bitrate for this stream is the new bitrate (|new_bitrate_kbit|) minus the + // sum target rates of the lower streams, and capped to a maximum bitrate. + // The maximum cap depends on whether we send the next higher stream. + // If we will be sending the next higher stream, |max_rate| is given by + // current stream's |targetBitrate|, otherwise it's capped by |maxBitrate|. + if (stream_idx < codec_.numberOfSimulcastStreams - 1) { + unsigned int max_rate = codec_.simulcastStream[stream_idx].maxBitrate; + if (new_bitrate_kbit >= SumStreamTargetBitrate(stream_idx + 1, codec_) + + codec_.simulcastStream[stream_idx + 1].minBitrate) { + max_rate = codec_.simulcastStream[stream_idx].targetBitrate; + } + return std::min(new_bitrate_kbit - sum_target_lower_streams, max_rate); + } else { + // For the highest stream (highest resolution), the |targetBitRate| and + // |maxBitrate| are not used. Any excess bitrate (above the targets of + // all lower streams) is given to this (highest resolution) stream. + return new_bitrate_kbit - sum_target_lower_streams; + } + } else { + // Not enough bitrate for this stream. + // Return our max bitrate of |stream_idx| - 1, but we don't send it. We need + // to keep this resolution coding in order for the multi-encoder to work. + *send_stream = false; + return codec_.simulcastStream[stream_idx - 1].maxBitrate; + } +} + +void SimulcastEncoderAdapter::PopulateStreamCodec( + const webrtc::VideoCodec* inst, + int stream_index, + bool highest_resolution_stream, + webrtc::VideoCodec* stream_codec, + bool* send_stream) { + *stream_codec = *inst; + + // Stream specific settings. + stream_codec->codecSpecific.VP8.numberOfTemporalLayers = + inst->simulcastStream[stream_index].numberOfTemporalLayers; + stream_codec->numberOfSimulcastStreams = 0; + stream_codec->width = inst->simulcastStream[stream_index].width; + stream_codec->height = inst->simulcastStream[stream_index].height; + stream_codec->maxBitrate = inst->simulcastStream[stream_index].maxBitrate; + stream_codec->minBitrate = inst->simulcastStream[stream_index].minBitrate; + stream_codec->qpMax = inst->simulcastStream[stream_index].qpMax; + // Settings that are based on stream/resolution. + if (stream_index == 0) { + // Settings for lowest spatial resolutions. + stream_codec->qpMax = kLowestResMaxQp; + } + if (!highest_resolution_stream) { + // For resolutions below CIF, set the codec |complexity| parameter to + // kComplexityHigher, which maps to cpu_used = -4. + int pixels_per_frame = stream_codec->width * stream_codec->height; + if (pixels_per_frame < 352 * 288) { + stream_codec->codecSpecific.VP8.complexity = webrtc::kComplexityHigher; + } + // Turn off denoising for all streams but the highest resolution. + stream_codec->codecSpecific.VP8.denoisingOn = false; + } + // TODO(ronghuawu): what to do with targetBitrate. + + int stream_bitrate = GetStreamBitrate(stream_index, + inst->startBitrate, + send_stream); + stream_codec->startBitrate = stream_bitrate; +} + +size_t SimulcastEncoderAdapter::GetStreamIndex( + const EncodedImage& encodedImage) { + uint32_t width = encodedImage._encodedWidth; + uint32_t height = encodedImage._encodedHeight; + for (size_t stream_idx = 0; stream_idx < streaminfos_.size(); ++stream_idx) { + if (streaminfos_[stream_idx].width == width && + streaminfos_[stream_idx].height == height) { + return stream_idx; + } + } + // should not be here + assert(false); + return 0; +} + +bool SimulcastEncoderAdapter::Initialized() const { + return !streaminfos_.empty(); +} + +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h new file mode 100644 index 0000000000..8b27bed750 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_ENCODER_ADAPTER_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_ENCODER_ADAPTER_H_ + +#include + +#include "webrtc/modules/video_coding/codecs/vp8/include/vp8.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +class VideoEncoderFactory { + public: + virtual VideoEncoder* Create() = 0; + virtual void Destroy(VideoEncoder* encoder) = 0; + virtual ~VideoEncoderFactory() {} +}; + +// SimulcastEncoderAdapter implements simulcast support by creating multiple +// webrtc::VideoEncoder instances with the given VideoEncoderFactory. +// All the public interfaces are expected to be called from the same thread, +// e.g the encoder thread. +class SimulcastEncoderAdapter : public VP8Encoder, + public EncodedImageCallback { + public: + explicit SimulcastEncoderAdapter(scoped_ptr factory); + + virtual ~SimulcastEncoderAdapter(); + + // Implements VideoEncoder + virtual int Release() OVERRIDE; + virtual int InitEncode(const VideoCodec* inst, + int number_of_cores, + size_t max_payload_size) OVERRIDE; + virtual int Encode(const I420VideoFrame& input_image, + const CodecSpecificInfo* codec_specific_info, + const std::vector* frame_types) OVERRIDE; + virtual int RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) OVERRIDE; + virtual int SetChannelParameters(uint32_t packet_loss, int rtt) OVERRIDE; + virtual int SetRates(uint32_t new_bitrate_kbit, + uint32_t new_framerate) OVERRIDE; + + // Implements EncodedImageCallback + virtual int32_t Encoded( + const EncodedImage& encodedImage, + const CodecSpecificInfo* codecSpecificInfo = NULL, + const RTPFragmentationHeader* fragmentation = NULL) OVERRIDE; + + private: + struct StreamInfo { + StreamInfo() + : encoder(NULL), width(0), height(0), + key_frame_request(false), send_stream(true) {} + StreamInfo(VideoEncoder* encoder, + unsigned short width, + unsigned short height, + bool send_stream) + : encoder(encoder), + width(width), + height(height), + key_frame_request(false), + send_stream(send_stream) {} + // Deleted by SimulcastEncoderAdapter::Release(). + VideoEncoder* encoder; + unsigned short width; + unsigned short height; + bool key_frame_request; + bool send_stream; + }; + + // Get the stream bitrate, for the stream |stream_idx|, given the bitrate + // |new_bitrate_kbit|. The function also returns whether there's enough + // bandwidth to send this stream via |send_stream|. + uint32_t GetStreamBitrate(int stream_idx, + uint32_t new_bitrate_kbit, + bool* send_stream) const; + + // Populate the codec settings for each stream. + void PopulateStreamCodec(const webrtc::VideoCodec* inst, + int stream_index, + bool highest_resolution_stream, + webrtc::VideoCodec* stream_codec, + bool* send_stream); + + // Get the stream index according to |encodedImage|. + size_t GetStreamIndex(const EncodedImage& encodedImage); + + bool Initialized() const; + + scoped_ptr factory_; + scoped_ptr screensharing_extra_options_; + VideoCodec codec_; + std::vector streaminfos_; + EncodedImageCallback* encoded_complete_callback_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_ENCODER_ADAPTER_H_ + diff --git a/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc new file mode 100644 index 0000000000..8f2eb7a0b3 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter_unittest.cc @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "testing/gmock/include/gmock/gmock.h" +#include "webrtc/modules/video_coding/codecs/interface/video_codec_interface.h" +#include "webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h" +#include "webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.h" +#include "webrtc/modules/video_coding/codecs/vp8/vp8_factory.h" + +namespace webrtc { +namespace testing { + +static VP8Encoder* CreateTestEncoderAdapter() { + VP8EncoderFactoryConfig::set_use_simulcast_adapter(true); + return VP8Encoder::Create(); +} + +class TestSimulcastEncoderAdapter : public TestVp8Simulcast { + public: + TestSimulcastEncoderAdapter() + : TestVp8Simulcast(CreateTestEncoderAdapter(), + VP8Decoder::Create()) {} + protected: + virtual void SetUp() { + TestVp8Simulcast::SetUp(); + } + virtual void TearDown() { + TestVp8Simulcast::TearDown(); + VP8EncoderFactoryConfig::set_use_simulcast_adapter(false); + } +}; + +TEST_F(TestSimulcastEncoderAdapter, TestKeyFrameRequestsOnAllStreams) { + TestVp8Simulcast::TestKeyFrameRequestsOnAllStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestPaddingAllStreams) { + TestVp8Simulcast::TestPaddingAllStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestPaddingTwoStreams) { + TestVp8Simulcast::TestPaddingTwoStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestPaddingTwoStreamsOneMaxedOut) { + TestVp8Simulcast::TestPaddingTwoStreamsOneMaxedOut(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestPaddingOneStream) { + TestVp8Simulcast::TestPaddingOneStream(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestPaddingOneStreamTwoMaxedOut) { + TestVp8Simulcast::TestPaddingOneStreamTwoMaxedOut(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestSendAllStreams) { + TestVp8Simulcast::TestSendAllStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestDisablingStreams) { + TestVp8Simulcast::TestDisablingStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestSwitchingToOneStream) { + TestVp8Simulcast::TestSwitchingToOneStream(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestSwitchingToOneOddStream) { + TestVp8Simulcast::TestSwitchingToOneOddStream(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestRPSIEncodeDecode) { + TestVp8Simulcast::TestRPSIEncodeDecode(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestStrideEncodeDecode) { + TestVp8Simulcast::TestStrideEncodeDecode(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestSaptioTemporalLayers333PatternEncoder) { + TestVp8Simulcast::TestSaptioTemporalLayers333PatternEncoder(); +} + +TEST_F(TestSimulcastEncoderAdapter, TestSpatioTemporalLayers321PatternEncoder) { + TestVp8Simulcast::TestSpatioTemporalLayers321PatternEncoder(); +} + +// TODO(ronghuawu): Enable this test when SkipEncodingUnusedStreams option is +// implemented for SimulcastEncoderAdapter. +TEST_F(TestSimulcastEncoderAdapter, + DISABLED_TestSkipEncodingUnusedStreams) { + TestVp8Simulcast::TestSkipEncodingUnusedStreams(); +} + +TEST_F(TestSimulcastEncoderAdapter, DISABLED_TestRPSIEncoder) { + TestVp8Simulcast::TestRPSIEncoder(); +} + +class MockVideoEncoder : public VideoEncoder { + public: + int32_t InitEncode(const VideoCodec* codecSettings, + int32_t numberOfCores, + size_t maxPayloadSize) { + codec_ = *codecSettings; + return 0; + } + + int32_t Encode(const I420VideoFrame& inputImage, + const CodecSpecificInfo* codecSpecificInfo, + const std::vector* frame_types) { return 0; } + + int32_t RegisterEncodeCompleteCallback(EncodedImageCallback* callback) { + return 0; + } + + int32_t Release() { + return 0; + } + + int32_t SetRates(uint32_t newBitRate, uint32_t frameRate) { + return 0; + } + + MOCK_METHOD2(SetChannelParameters, + int32_t(uint32_t packetLoss, int rtt)); + + virtual ~MockVideoEncoder() { + } + + const VideoCodec& codec() const { return codec_; } + + private: + VideoCodec codec_; +}; + +class MockVideoEncoderFactory : public VideoEncoderFactory { + public: + virtual VideoEncoder* Create() OVERRIDE { + MockVideoEncoder* encoder = new MockVideoEncoder(); + encoders_.push_back(encoder); + return encoder; + } + + virtual void Destroy(VideoEncoder* encoder) OVERRIDE { + delete encoder; + } + + virtual ~MockVideoEncoderFactory() {} + + const std::vector& encoders() const { return encoders_; } + + private: + std::vector encoders_; +}; + +class TestSimulcastEncoderAdapterFakeHelper { + public: + TestSimulcastEncoderAdapterFakeHelper() + : factory_(new MockVideoEncoderFactory()) {} + + // Can only be called once as the SimulcastEncoderAdapter will take the + // ownership of |factory_|. + VP8Encoder* CreateMockEncoderAdapter() { + scoped_ptr scoped_factory(factory_); + return new SimulcastEncoderAdapter(scoped_factory.Pass()); + } + + void ExpectCallSetChannelParameters(uint32_t packetLoss, int rtt) { + EXPECT_TRUE(!factory_->encoders().empty()); + for (size_t i = 0; i < factory_->encoders().size(); ++i) { + EXPECT_CALL(*factory_->encoders()[i], + SetChannelParameters(packetLoss, rtt)).Times(1); + } + } + + MockVideoEncoderFactory* factory() { return factory_; } + + private: + MockVideoEncoderFactory* factory_; +}; + +static const int kTestTemporalLayerProfile[3] = {3, 2, 1}; + +class TestSimulcastEncoderAdapterFake : public ::testing::Test { + public: + TestSimulcastEncoderAdapterFake() + : helper_(new TestSimulcastEncoderAdapterFakeHelper()), + adapter_(helper_->CreateMockEncoderAdapter()) {} + virtual ~TestSimulcastEncoderAdapterFake() {} + + void SetupCodec() { + TestVp8Simulcast::DefaultSettings( + &codec_, + static_cast(kTestTemporalLayerProfile)); + EXPECT_EQ(0, adapter_->InitEncode(&codec_, 1, 1200)); + } + + void VerifyCodec(const VideoCodec& ref, int stream_index) { + const VideoCodec& target = + helper_->factory()->encoders()[stream_index]->codec(); + EXPECT_EQ(ref.codecType, target.codecType); + EXPECT_EQ(0, strcmp(ref.plName, target.plName)); + EXPECT_EQ(ref.plType, target.plType); + EXPECT_EQ(ref.width, target.width); + EXPECT_EQ(ref.height, target.height); + EXPECT_EQ(ref.startBitrate, target.startBitrate); + EXPECT_EQ(ref.maxBitrate, target.maxBitrate); + EXPECT_EQ(ref.minBitrate, target.minBitrate); + EXPECT_EQ(ref.maxFramerate, target.maxFramerate); + EXPECT_EQ(ref.codecSpecific.VP8.pictureLossIndicationOn, + target.codecSpecific.VP8.pictureLossIndicationOn); + EXPECT_EQ(ref.codecSpecific.VP8.feedbackModeOn, + target.codecSpecific.VP8.feedbackModeOn); + EXPECT_EQ(ref.codecSpecific.VP8.complexity, + target.codecSpecific.VP8.complexity); + EXPECT_EQ(ref.codecSpecific.VP8.resilience, + target.codecSpecific.VP8.resilience); + EXPECT_EQ(ref.codecSpecific.VP8.numberOfTemporalLayers, + target.codecSpecific.VP8.numberOfTemporalLayers); + EXPECT_EQ(ref.codecSpecific.VP8.denoisingOn, + target.codecSpecific.VP8.denoisingOn); + EXPECT_EQ(ref.codecSpecific.VP8.errorConcealmentOn, + target.codecSpecific.VP8.errorConcealmentOn); + EXPECT_EQ(ref.codecSpecific.VP8.automaticResizeOn, + target.codecSpecific.VP8.automaticResizeOn); + EXPECT_EQ(ref.codecSpecific.VP8.frameDroppingOn, + target.codecSpecific.VP8.frameDroppingOn); + EXPECT_EQ(ref.codecSpecific.VP8.keyFrameInterval, + target.codecSpecific.VP8.keyFrameInterval); + EXPECT_EQ(ref.qpMax, target.qpMax); + EXPECT_EQ(0, target.numberOfSimulcastStreams); + EXPECT_EQ(ref.mode, target.mode); + EXPECT_EQ(ref.extra_options, target.extra_options); + + // No need to compare simulcastStream as numberOfSimulcastStreams should + // always be 0. + } + + void InitRefCodec(int stream_index, VideoCodec* ref_codec) { + *ref_codec = codec_; + ref_codec->codecSpecific.VP8.numberOfTemporalLayers = + kTestTemporalLayerProfile[stream_index]; + ref_codec->width = codec_.simulcastStream[stream_index].width; + ref_codec->height = codec_.simulcastStream[stream_index].height; + ref_codec->maxBitrate = codec_.simulcastStream[stream_index].maxBitrate; + ref_codec->minBitrate = codec_.simulcastStream[stream_index].minBitrate; + ref_codec->qpMax = codec_.simulcastStream[stream_index].qpMax; + } + + void VerifyCodecSettings() { + EXPECT_EQ(3u, helper_->factory()->encoders().size()); + VideoCodec ref_codec; + + // stream 0, the lowest resolution stream. + InitRefCodec(0, &ref_codec); + ref_codec.qpMax = 45; + ref_codec.codecSpecific.VP8.complexity = webrtc::kComplexityHigher; + ref_codec.codecSpecific.VP8.denoisingOn = false; + ref_codec.startBitrate = 100; // Should equal to the target bitrate. + VerifyCodec(ref_codec, 0); + + // stream 1 + InitRefCodec(1, &ref_codec); + ref_codec.codecSpecific.VP8.denoisingOn = false; + ref_codec.startBitrate = 300; + VerifyCodec(ref_codec, 1); + + // stream 2, the biggest resolution stream. + InitRefCodec(2, &ref_codec); + ref_codec.startBitrate = 600; + VerifyCodec(ref_codec, 2); + } + + protected: + scoped_ptr helper_; + scoped_ptr adapter_; + VideoCodec codec_; +}; + +TEST_F(TestSimulcastEncoderAdapterFake, InitEncode) { + SetupCodec(); + VerifyCodecSettings(); +} + +TEST_F(TestSimulcastEncoderAdapterFake, SetChannelParameters) { + SetupCodec(); + const uint32_t packetLoss = 5; + const int rtt = 30; + helper_->ExpectCallSetChannelParameters(packetLoss, rtt); + adapter_->SetChannelParameters(packetLoss, rtt); +} + +} // namespace testing +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.cc b/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.cc new file mode 100644 index 0000000000..373a55237f --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.h" + +namespace webrtc { +namespace testing { + +class TestVp8Impl + : public TestVp8Simulcast { + public: + TestVp8Impl() + : TestVp8Simulcast(VP8Encoder::Create(), VP8Decoder::Create()) {} + protected: + virtual void SetUp() { + TestVp8Simulcast::SetUp(); + } + virtual void TearDown() { + TestVp8Simulcast::TearDown(); + } +}; + +TEST_F(TestVp8Impl, TestKeyFrameRequestsOnAllStreams) { + TestVp8Simulcast::TestKeyFrameRequestsOnAllStreams(); +} + +TEST_F(TestVp8Impl, TestPaddingAllStreams) { + TestVp8Simulcast::TestPaddingAllStreams(); +} + +TEST_F(TestVp8Impl, TestPaddingTwoStreams) { + TestVp8Simulcast::TestPaddingTwoStreams(); +} + +TEST_F(TestVp8Impl, TestPaddingTwoStreamsOneMaxedOut) { + TestVp8Simulcast::TestPaddingTwoStreamsOneMaxedOut(); +} + +TEST_F(TestVp8Impl, TestPaddingOneStream) { + TestVp8Simulcast::TestPaddingOneStream(); +} + +TEST_F(TestVp8Impl, TestPaddingOneStreamTwoMaxedOut) { + TestVp8Simulcast::TestPaddingOneStreamTwoMaxedOut(); +} + +TEST_F(TestVp8Impl, TestSendAllStreams) { + TestVp8Simulcast::TestSendAllStreams(); +} + +TEST_F(TestVp8Impl, TestDisablingStreams) { + TestVp8Simulcast::TestDisablingStreams(); +} + +TEST_F(TestVp8Impl, TestSwitchingToOneStream) { + TestVp8Simulcast::TestSwitchingToOneStream(); +} + +TEST_F(TestVp8Impl, TestSwitchingToOneOddStream) { + TestVp8Simulcast::TestSwitchingToOneOddStream(); +} + +TEST_F(TestVp8Impl, TestRPSIEncoder) { + TestVp8Simulcast::TestRPSIEncoder(); +} + +TEST_F(TestVp8Impl, TestRPSIEncodeDecode) { + TestVp8Simulcast::TestRPSIEncodeDecode(); +} + +TEST_F(TestVp8Impl, TestSaptioTemporalLayers333PatternEncoder) { + TestVp8Simulcast::TestSaptioTemporalLayers333PatternEncoder(); +} + +TEST_F(TestVp8Impl, TestSpatioTemporalLayers321PatternEncoder) { + TestVp8Simulcast::TestSpatioTemporalLayers321PatternEncoder(); +} + +TEST_F(TestVp8Impl, TestStrideEncodeDecode) { + TestVp8Simulcast::TestStrideEncodeDecode(); +} + +TEST_F(TestVp8Impl, TestSkipEncodingUnusedStreams) { + TestVp8Simulcast::TestSkipEncodingUnusedStreams(); +} + +} // namespace testing +} // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.h b/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.h new file mode 100644 index 0000000000..a4b982d562 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/simulcast_unittest.h @@ -0,0 +1,1001 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_UNITTEST_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_UNITTEST_H_ + +#include +#include + +#include "webrtc/common.h" +#include "webrtc/experiments.h" +#include "webrtc/common_video/interface/i420_video_frame.h" +#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" +#include "webrtc/modules/video_coding/codecs/interface/mock/mock_video_codec_interface.h" +#include "webrtc/modules/video_coding/codecs/vp8/include/vp8.h" +#include "webrtc/modules/video_coding/codecs/vp8/temporal_layers.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +#include "gtest/gtest.h" + +using ::testing::_; +using ::testing::AllOf; +using ::testing::Field; +using ::testing::Return; + +namespace webrtc { +namespace testing { + +const int kDefaultWidth = 1280; +const int kDefaultHeight = 720; +const int kNumberOfSimulcastStreams = 3; +const int kColorY = 66; +const int kColorU = 22; +const int kColorV = 33; +const int kMaxBitrates[kNumberOfSimulcastStreams] = {150, 600, 1200}; +const int kMinBitrates[kNumberOfSimulcastStreams] = {50, 150, 600}; +const int kTargetBitrates[kNumberOfSimulcastStreams] = {100, 450, 1000}; +const int kDefaultTemporalLayerProfile[3] = {3, 3, 3}; + +template void SetExpectedValues3(T value0, + T value1, + T value2, + T* expected_values) { + expected_values[0] = value0; + expected_values[1] = value1; + expected_values[2] = value2; +} + +class Vp8TestEncodedImageCallback : public EncodedImageCallback { + public: + Vp8TestEncodedImageCallback() + : picture_id_(-1) { + memset(temporal_layer_, -1, sizeof(temporal_layer_)); + memset(layer_sync_, false, sizeof(layer_sync_)); + } + + ~Vp8TestEncodedImageCallback() { + delete [] encoded_key_frame_._buffer; + delete [] encoded_frame_._buffer; + } + + virtual int32_t Encoded(const EncodedImage& encoded_image, + const CodecSpecificInfo* codec_specific_info, + const RTPFragmentationHeader* fragmentation) { + // Only store the base layer. + if (codec_specific_info->codecSpecific.VP8.simulcastIdx == 0) { + if (encoded_image._frameType == kKeyFrame) { + delete [] encoded_key_frame_._buffer; + encoded_key_frame_._buffer = new uint8_t[encoded_image._size]; + encoded_key_frame_._size = encoded_image._size; + encoded_key_frame_._length = encoded_image._length; + encoded_key_frame_._frameType = kKeyFrame; + encoded_key_frame_._completeFrame = encoded_image._completeFrame; + memcpy(encoded_key_frame_._buffer, + encoded_image._buffer, + encoded_image._length); + } else { + delete [] encoded_frame_._buffer; + encoded_frame_._buffer = new uint8_t[encoded_image._size]; + encoded_frame_._size = encoded_image._size; + encoded_frame_._length = encoded_image._length; + memcpy(encoded_frame_._buffer, + encoded_image._buffer, + encoded_image._length); + } + } + picture_id_ = codec_specific_info->codecSpecific.VP8.pictureId; + layer_sync_[codec_specific_info->codecSpecific.VP8.simulcastIdx] = + codec_specific_info->codecSpecific.VP8.layerSync; + temporal_layer_[codec_specific_info->codecSpecific.VP8.simulcastIdx] = + codec_specific_info->codecSpecific.VP8.temporalIdx; + return 0; + } + void GetLastEncodedFrameInfo(int* picture_id, int* temporal_layer, + bool* layer_sync, int stream) { + *picture_id = picture_id_; + *temporal_layer = temporal_layer_[stream]; + *layer_sync = layer_sync_[stream]; + } + void GetLastEncodedKeyFrame(EncodedImage* encoded_key_frame) { + *encoded_key_frame = encoded_key_frame_; + } + void GetLastEncodedFrame(EncodedImage* encoded_frame) { + *encoded_frame = encoded_frame_; + } + + private: + EncodedImage encoded_key_frame_; + EncodedImage encoded_frame_; + int picture_id_; + int temporal_layer_[kNumberOfSimulcastStreams]; + bool layer_sync_[kNumberOfSimulcastStreams]; +}; + +class Vp8TestDecodedImageCallback : public DecodedImageCallback { + public: + Vp8TestDecodedImageCallback() + : decoded_frames_(0) { + } + virtual int32_t Decoded(I420VideoFrame& decoded_image) { + last_decoded_frame_.CopyFrame(decoded_image); + for (int i = 0; i < decoded_image.width(); ++i) { + EXPECT_NEAR(kColorY, decoded_image.buffer(kYPlane)[i], 1); + } + + // TODO(mikhal): Verify the difference between U,V and the original. + for (int i = 0; i < ((decoded_image.width() + 1) / 2); ++i) { + EXPECT_NEAR(kColorU, decoded_image.buffer(kUPlane)[i], 4); + EXPECT_NEAR(kColorV, decoded_image.buffer(kVPlane)[i], 4); + } + decoded_frames_++; + return 0; + } + int DecodedFrames() { + return decoded_frames_; + } + void GetLastDecodedFrame(I420VideoFrame* decoded_frame) { + decoded_frame->SwapFrame(&last_decoded_frame_); + } + + private: + int decoded_frames_; + I420VideoFrame last_decoded_frame_; +}; + +class SkipEncodingUnusedStreamsTest { + public: + std::vector RunTest(VP8Encoder* encoder, + VideoCodec* settings, + uint32_t target_bitrate) { + Config options; + SpyingTemporalLayersFactory* spy_factory = + new SpyingTemporalLayersFactory(); + options.Set(spy_factory); + settings->extra_options = &options; + EXPECT_EQ(0, encoder->InitEncode(settings, 1, 1200)); + + encoder->SetRates(target_bitrate, 30); + + std::vector configured_bitrates; + for (std::vector::const_iterator it = + spy_factory->spying_layers_.begin(); + it != spy_factory->spying_layers_.end(); + ++it) { + configured_bitrates.push_back( + static_cast(*it)->configured_bitrate_); + } + return configured_bitrates; + } + + class SpyingTemporalLayers : public TemporalLayers { + public: + explicit SpyingTemporalLayers(TemporalLayers* layers) + : configured_bitrate_(0), layers_(layers) {} + + virtual ~SpyingTemporalLayers() { delete layers_; } + + virtual int EncodeFlags(uint32_t timestamp) { + return layers_->EncodeFlags(timestamp); + } + + virtual bool ConfigureBitrates(int bitrate_kbit, + int max_bitrate_kbit, + int framerate, + vpx_codec_enc_cfg_t* cfg) OVERRIDE { + configured_bitrate_ = bitrate_kbit; + return layers_->ConfigureBitrates( + bitrate_kbit, max_bitrate_kbit, framerate, cfg); + } + + virtual void PopulateCodecSpecific(bool base_layer_sync, + CodecSpecificInfoVP8* vp8_info, + uint32_t timestamp) OVERRIDE { + layers_->PopulateCodecSpecific(base_layer_sync, vp8_info, timestamp); + } + + virtual void FrameEncoded(unsigned int size, uint32_t timestamp) OVERRIDE { + layers_->FrameEncoded(size, timestamp); + } + + virtual int CurrentLayerId() const OVERRIDE { + return layers_->CurrentLayerId(); + } + + int configured_bitrate_; + TemporalLayers* layers_; + }; + + class SpyingTemporalLayersFactory : public TemporalLayers::Factory { + public: + virtual ~SpyingTemporalLayersFactory() {} + virtual TemporalLayers* Create(int temporal_layers, + uint8_t initial_tl0_pic_idx) const OVERRIDE { + SpyingTemporalLayers* layers = + new SpyingTemporalLayers(TemporalLayers::Factory::Create( + temporal_layers, initial_tl0_pic_idx)); + spying_layers_.push_back(layers); + return layers; + } + + mutable std::vector spying_layers_; + }; +}; + +class TestVp8Simulcast : public ::testing::Test { + public: + TestVp8Simulcast(VP8Encoder* encoder, VP8Decoder* decoder) + : encoder_(encoder), + decoder_(decoder) {} + + // Creates an I420VideoFrame from |plane_colors|. + static void CreateImage(I420VideoFrame* frame, + int plane_colors[kNumOfPlanes]) { + for (int plane_num = 0; plane_num < kNumOfPlanes; ++plane_num) { + int width = (plane_num != kYPlane ? (frame->width() + 1) / 2 : + frame->width()); + int height = (plane_num != kYPlane ? (frame->height() + 1) / 2 : + frame->height()); + PlaneType plane_type = static_cast(plane_num); + uint8_t* data = frame->buffer(plane_type); + // Setting allocated area to zero - setting only image size to + // requested values - will make it easier to distinguish between image + // size and frame size (accounting for stride). + memset(frame->buffer(plane_type), 0, frame->allocated_size(plane_type)); + for (int i = 0; i < height; i++) { + memset(data, plane_colors[plane_num], width); + data += frame->stride(plane_type); + } + } + } + + static void DefaultSettings(VideoCodec* settings, + const int* temporal_layer_profile) { + assert(settings); + memset(settings, 0, sizeof(VideoCodec)); + strncpy(settings->plName, "VP8", 4); + settings->codecType = kVideoCodecVP8; + // 96 to 127 dynamic payload types for video codecs + settings->plType = 120; + settings->startBitrate = 300; + settings->minBitrate = 30; + settings->maxBitrate = 0; + settings->maxFramerate = 30; + settings->width = kDefaultWidth; + settings->height = kDefaultHeight; + settings->numberOfSimulcastStreams = kNumberOfSimulcastStreams; + ASSERT_EQ(3, kNumberOfSimulcastStreams); + ConfigureStream(kDefaultWidth / 4, kDefaultHeight / 4, + kMaxBitrates[0], + kMinBitrates[0], + kTargetBitrates[0], + &settings->simulcastStream[0], + temporal_layer_profile[0]); + ConfigureStream(kDefaultWidth / 2, kDefaultHeight / 2, + kMaxBitrates[1], + kMinBitrates[1], + kTargetBitrates[1], + &settings->simulcastStream[1], + temporal_layer_profile[1]); + ConfigureStream(kDefaultWidth, kDefaultHeight, + kMaxBitrates[2], + kMinBitrates[2], + kTargetBitrates[2], + &settings->simulcastStream[2], + temporal_layer_profile[2]); + settings->codecSpecific.VP8.resilience = kResilientStream; + settings->codecSpecific.VP8.denoisingOn = true; + settings->codecSpecific.VP8.errorConcealmentOn = false; + settings->codecSpecific.VP8.automaticResizeOn = false; + settings->codecSpecific.VP8.feedbackModeOn = false; + settings->codecSpecific.VP8.frameDroppingOn = true; + settings->codecSpecific.VP8.keyFrameInterval = 3000; + } + + static void ConfigureStream(int width, + int height, + int max_bitrate, + int min_bitrate, + int target_bitrate, + SimulcastStream* stream, + int num_temporal_layers) { + assert(stream); + stream->width = width; + stream->height = height; + stream->maxBitrate = max_bitrate; + stream->minBitrate = min_bitrate; + stream->targetBitrate = target_bitrate; + stream->numberOfTemporalLayers = num_temporal_layers; + stream->qpMax = 45; + } + + protected: + virtual void SetUp() { + SetUpCodec(kDefaultTemporalLayerProfile); + } + + virtual void SetUpCodec(const int* temporal_layer_profile) { + encoder_->RegisterEncodeCompleteCallback(&encoder_callback_); + decoder_->RegisterDecodeCompleteCallback(&decoder_callback_); + DefaultSettings(&settings_, temporal_layer_profile); + EXPECT_EQ(0, encoder_->InitEncode(&settings_, 1, 1200)); + EXPECT_EQ(0, decoder_->InitDecode(&settings_, 1)); + int half_width = (kDefaultWidth + 1) / 2; + input_frame_.CreateEmptyFrame(kDefaultWidth, kDefaultHeight, + kDefaultWidth, half_width, half_width); + memset(input_frame_.buffer(kYPlane), 0, + input_frame_.allocated_size(kYPlane)); + memset(input_frame_.buffer(kUPlane), 0, + input_frame_.allocated_size(kUPlane)); + memset(input_frame_.buffer(kVPlane), 0, + input_frame_.allocated_size(kVPlane)); + } + + virtual void TearDown() { + encoder_->Release(); + decoder_->Release(); + } + + void ExpectStreams(VideoFrameType frame_type, int expected_video_streams) { + ASSERT_GE(expected_video_streams, 0); + ASSERT_LE(expected_video_streams, kNumberOfSimulcastStreams); + if (expected_video_streams >= 1) { + EXPECT_CALL(encoder_callback_, Encoded( + AllOf(Field(&EncodedImage::_frameType, frame_type), + Field(&EncodedImage::_encodedWidth, kDefaultWidth / 4), + Field(&EncodedImage::_encodedHeight, kDefaultHeight / 4)), _, _) + ) + .Times(1) + .WillRepeatedly(Return(0)); + } + if (expected_video_streams >= 2) { + EXPECT_CALL(encoder_callback_, Encoded( + AllOf(Field(&EncodedImage::_frameType, frame_type), + Field(&EncodedImage::_encodedWidth, kDefaultWidth / 2), + Field(&EncodedImage::_encodedHeight, kDefaultHeight / 2)), _, _) + ) + .Times(1) + .WillRepeatedly(Return(0)); + } + if (expected_video_streams >= 3) { + EXPECT_CALL(encoder_callback_, Encoded( + AllOf(Field(&EncodedImage::_frameType, frame_type), + Field(&EncodedImage::_encodedWidth, kDefaultWidth), + Field(&EncodedImage::_encodedHeight, kDefaultHeight)), _, _)) + .Times(1) + .WillRepeatedly(Return(0)); + } + if (expected_video_streams < kNumberOfSimulcastStreams) { + EXPECT_CALL(encoder_callback_, Encoded( + AllOf(Field(&EncodedImage::_frameType, kSkipFrame), + Field(&EncodedImage::_length, 0)), _, _)) + .Times(kNumberOfSimulcastStreams - expected_video_streams) + .WillRepeatedly(Return(0)); + } + } + + void VerifyTemporalIdxAndSyncForAllSpatialLayers( + Vp8TestEncodedImageCallback* encoder_callback, + const int* expected_temporal_idx, + const bool* expected_layer_sync, + int num_spatial_layers) { + int picture_id = -1; + int temporal_layer = -1; + bool layer_sync = false; + for (int i = 0; i < num_spatial_layers; i++) { + encoder_callback->GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, i); + EXPECT_EQ(expected_temporal_idx[i], temporal_layer); + EXPECT_EQ(expected_layer_sync[i], layer_sync); + } + } + + // We currently expect all active streams to generate a key frame even though + // a key frame was only requested for some of them. + void TestKeyFrameRequestsOnAllStreams() { + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, kNumberOfSimulcastStreams); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, kNumberOfSimulcastStreams); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + frame_types[0] = kKeyFrame; + ExpectStreams(kKeyFrame, kNumberOfSimulcastStreams); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + std::fill(frame_types.begin(), frame_types.end(), kDeltaFrame); + frame_types[1] = kKeyFrame; + ExpectStreams(kKeyFrame, kNumberOfSimulcastStreams); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + std::fill(frame_types.begin(), frame_types.end(), kDeltaFrame); + frame_types[2] = kKeyFrame; + ExpectStreams(kKeyFrame, kNumberOfSimulcastStreams); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + std::fill(frame_types.begin(), frame_types.end(), kDeltaFrame); + ExpectStreams(kDeltaFrame, kNumberOfSimulcastStreams); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestPaddingAllStreams() { + // We should always encode the base layer. + encoder_->SetRates(kMinBitrates[0] - 1, 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 1); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 1); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestPaddingTwoStreams() { + // We have just enough to get only the first stream and padding for two. + encoder_->SetRates(kMinBitrates[0], 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 1); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 1); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestPaddingTwoStreamsOneMaxedOut() { + // We are just below limit of sending second stream, so we should get + // the first stream maxed out (at |maxBitrate|), and padding for two. + encoder_->SetRates(kTargetBitrates[0] + kMinBitrates[1] - 1, 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 1); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 1); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestPaddingOneStream() { + // We have just enough to send two streams, so padding for one stream. + encoder_->SetRates(kTargetBitrates[0] + kMinBitrates[1], 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 2); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 2); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestPaddingOneStreamTwoMaxedOut() { + // We are just below limit of sending third stream, so we should get + // first stream's rate maxed out at |targetBitrate|, second at |maxBitrate|. + encoder_->SetRates(kTargetBitrates[0] + kTargetBitrates[1] + + kMinBitrates[2] - 1, 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 2); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 2); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestSendAllStreams() { + // We have just enough to send all streams. + encoder_->SetRates(kTargetBitrates[0] + kTargetBitrates[1] + + kMinBitrates[2], 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 3); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 3); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestDisablingStreams() { + // We should get three media streams. + encoder_->SetRates(kMaxBitrates[0] + kMaxBitrates[1] + + kMaxBitrates[2], 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + ExpectStreams(kKeyFrame, 3); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + ExpectStreams(kDeltaFrame, 3); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // We should only get two streams and padding for one. + encoder_->SetRates(kTargetBitrates[0] + kTargetBitrates[1] + + kMinBitrates[2] / 2, 30); + ExpectStreams(kDeltaFrame, 2); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // We should only get the first stream and padding for two. + encoder_->SetRates(kTargetBitrates[0] + kMinBitrates[1] / 2, 30); + ExpectStreams(kDeltaFrame, 1); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // We don't have enough bitrate for the thumbnail stream, but we should get + // it anyway with current configuration. + encoder_->SetRates(kTargetBitrates[0] - 1, 30); + ExpectStreams(kDeltaFrame, 1); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // We should only get two streams and padding for one. + encoder_->SetRates(kTargetBitrates[0] + kTargetBitrates[1] + + kMinBitrates[2] / 2, 30); + // We get a key frame because a new stream is being enabled. + ExpectStreams(kKeyFrame, 2); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // We should get all three streams. + encoder_->SetRates(kTargetBitrates[0] + kTargetBitrates[1] + + kTargetBitrates[2], 30); + // We get a key frame because a new stream is being enabled. + ExpectStreams(kKeyFrame, 3); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void SwitchingToOneStream(int width, int height) { + // Disable all streams except the last and set the bitrate of the last to + // 100 kbps. This verifies the way GTP switches to screenshare mode. + settings_.codecSpecific.VP8.numberOfTemporalLayers = 1; + settings_.maxBitrate = 100; + settings_.startBitrate = 100; + settings_.width = width; + settings_.height = height; + for (int i = 0; i < settings_.numberOfSimulcastStreams - 1; ++i) { + settings_.simulcastStream[i].maxBitrate = 0; + settings_.simulcastStream[i].width = settings_.width; + settings_.simulcastStream[i].height = settings_.height; + } + // Setting input image to new resolution. + int half_width = (settings_.width + 1) / 2; + input_frame_.CreateEmptyFrame(settings_.width, settings_.height, + settings_.width, half_width, half_width); + memset(input_frame_.buffer(kYPlane), 0, + input_frame_.allocated_size(kYPlane)); + memset(input_frame_.buffer(kUPlane), 0, + input_frame_.allocated_size(kUPlane)); + memset(input_frame_.buffer(kVPlane), 0, + input_frame_.allocated_size(kVPlane)); + + // The for loop above did not set the bitrate of the highest layer. + settings_.simulcastStream[settings_.numberOfSimulcastStreams - 1]. + maxBitrate = 0; + // The highest layer has to correspond to the non-simulcast resolution. + settings_.simulcastStream[settings_.numberOfSimulcastStreams - 1]. + width = settings_.width; + settings_.simulcastStream[settings_.numberOfSimulcastStreams - 1]. + height = settings_.height; + EXPECT_EQ(0, encoder_->InitEncode(&settings_, 1, 1200)); + + // Encode one frame and verify. + encoder_->SetRates(kMaxBitrates[0] + kMaxBitrates[1], 30); + std::vector frame_types(kNumberOfSimulcastStreams, + kDeltaFrame); + EXPECT_CALL(encoder_callback_, Encoded( + AllOf(Field(&EncodedImage::_frameType, kKeyFrame), + Field(&EncodedImage::_encodedWidth, width), + Field(&EncodedImage::_encodedHeight, height)), _, _)) + .Times(1) + .WillRepeatedly(Return(0)); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + + // Switch back. + DefaultSettings(&settings_, kDefaultTemporalLayerProfile); + // Start at the lowest bitrate for enabling base stream. + settings_.startBitrate = kMinBitrates[0]; + EXPECT_EQ(0, encoder_->InitEncode(&settings_, 1, 1200)); + encoder_->SetRates(settings_.startBitrate, 30); + ExpectStreams(kKeyFrame, 1); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, &frame_types)); + } + + void TestSwitchingToOneStream() { + SwitchingToOneStream(1024, 768); + } + + void TestSwitchingToOneOddStream() { + SwitchingToOneStream(1023, 769); + } + + void TestRPSIEncoder() { + Vp8TestEncodedImageCallback encoder_callback; + encoder_->RegisterEncodeCompleteCallback(&encoder_callback); + + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + int picture_id = -1; + int temporal_layer = -1; + bool layer_sync = false; + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + EXPECT_EQ(0, temporal_layer); + EXPECT_TRUE(layer_sync); + int key_frame_picture_id = picture_id; + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + EXPECT_EQ(2, temporal_layer); + EXPECT_TRUE(layer_sync); + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + EXPECT_EQ(1, temporal_layer); + EXPECT_TRUE(layer_sync); + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + EXPECT_EQ(2, temporal_layer); + EXPECT_FALSE(layer_sync); + + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecVP8; + codec_specific.codecSpecific.VP8.hasReceivedRPSI = true; + + // Must match last key frame to trigger. + codec_specific.codecSpecific.VP8.pictureIdRPSI = key_frame_picture_id; + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, &codec_specific, NULL)); + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + + EXPECT_EQ(0, temporal_layer); + EXPECT_TRUE(layer_sync); + + // Must match last key frame to trigger, test bad id. + codec_specific.codecSpecific.VP8.pictureIdRPSI = key_frame_picture_id + 17; + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, &codec_specific, NULL)); + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + + EXPECT_EQ(2, temporal_layer); + // The previous frame was a base layer sync (since it was a frame that + // only predicts from key frame and hence resets the temporal pattern), + // so this frame (the next one) must have |layer_sync| set to true. + EXPECT_TRUE(layer_sync); + } + + void TestRPSIEncodeDecode() { + Vp8TestEncodedImageCallback encoder_callback; + Vp8TestDecodedImageCallback decoder_callback; + encoder_->RegisterEncodeCompleteCallback(&encoder_callback); + decoder_->RegisterDecodeCompleteCallback(&decoder_callback); + + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + + // Set color. + int plane_offset[kNumOfPlanes]; + plane_offset[kYPlane] = kColorY; + plane_offset[kUPlane] = kColorU; + plane_offset[kVPlane] = kColorV; + CreateImage(&input_frame_, plane_offset); + + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + int picture_id = -1; + int temporal_layer = -1; + bool layer_sync = false; + encoder_callback.GetLastEncodedFrameInfo(&picture_id, &temporal_layer, + &layer_sync, 0); + EXPECT_EQ(0, temporal_layer); + EXPECT_TRUE(layer_sync); + int key_frame_picture_id = picture_id; + + // Change color. + plane_offset[kYPlane] += 1; + plane_offset[kUPlane] += 1; + plane_offset[kVPlane] += 1; + CreateImage(&input_frame_, plane_offset); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + + // Change color. + plane_offset[kYPlane] += 1; + plane_offset[kUPlane] += 1; + plane_offset[kVPlane] += 1; + CreateImage(&input_frame_, plane_offset); + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + + // Change color. + plane_offset[kYPlane] += 1; + plane_offset[kUPlane] += 1; + plane_offset[kVPlane] += 1; + CreateImage(&input_frame_, plane_offset); + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecVP8; + codec_specific.codecSpecific.VP8.hasReceivedRPSI = true; + // Must match last key frame to trigger. + codec_specific.codecSpecific.VP8.pictureIdRPSI = key_frame_picture_id; + + // Change color back to original. + plane_offset[kYPlane] = kColorY; + plane_offset[kUPlane] = kColorU; + plane_offset[kVPlane] = kColorV; + CreateImage(&input_frame_, plane_offset); + + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, &codec_specific, NULL)); + + EncodedImage encoded_frame; + encoder_callback.GetLastEncodedKeyFrame(&encoded_frame); + decoder_->Decode(encoded_frame, false, NULL); + encoder_callback.GetLastEncodedFrame(&encoded_frame); + decoder_->Decode(encoded_frame, false, NULL); + EXPECT_EQ(2, decoder_callback.DecodedFrames()); + } + + // Test the layer pattern and sync flag for various spatial-temporal patterns. + // 3-3-3 pattern: 3 temporal layers for all spatial streams, so same + // temporal_layer id and layer_sync is expected for all streams. + void TestSaptioTemporalLayers333PatternEncoder() { + Vp8TestEncodedImageCallback encoder_callback; + encoder_->RegisterEncodeCompleteCallback(&encoder_callback); + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + + int expected_temporal_idx[3] = { -1, -1, -1}; + bool expected_layer_sync[3] = {false, false, false}; + + // First frame: #0. + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(0, 0, 0, expected_temporal_idx); + SetExpectedValues3(true, true, true, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #1. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 2, 2, expected_temporal_idx); + SetExpectedValues3(true, true, true, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #2. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(1, 1, 1, expected_temporal_idx); + SetExpectedValues3(true, true, true, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #3. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 2, 2, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #4. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(0, 0, 0, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #5. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 2, 2, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + } + + // Test the layer pattern and sync flag for various spatial-temporal patterns. + // 3-2-1 pattern: 3 temporal layers for lowest resolution, 2 for middle, and + // 1 temporal layer for highest resolution. + // For this profile, we expect the temporal index pattern to be: + // 1st stream: 0, 2, 1, 2, .... + // 2nd stream: 0, 1, 0, 1, ... + // 3rd stream: -1, -1, -1, -1, .... + // Regarding the 3rd stream, note that a stream/encoder with 1 temporal layer + // should always have temporal layer idx set to kNoTemporalIdx = -1. + // Since CodecSpecificInfoVP8.temporalIdx is uint8, this will wrap to 255. + // TODO(marpan): Although this seems safe for now, we should fix this. + void TestSpatioTemporalLayers321PatternEncoder() { + int temporal_layer_profile[3] = {3, 2, 1}; + SetUpCodec(temporal_layer_profile); + Vp8TestEncodedImageCallback encoder_callback; + encoder_->RegisterEncodeCompleteCallback(&encoder_callback); + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + + int expected_temporal_idx[3] = { -1, -1, -1}; + bool expected_layer_sync[3] = {false, false, false}; + + // First frame: #0. + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(0, 0, 255, expected_temporal_idx); + SetExpectedValues3(true, true, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #1. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 1, 255, expected_temporal_idx); + SetExpectedValues3(true, true, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #2. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(1, 0, 255, expected_temporal_idx); + SetExpectedValues3(true, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #3. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 1, 255, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #4. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(0, 0, 255, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + + // Next frame: #5. + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + SetExpectedValues3(2, 1, 255, expected_temporal_idx); + SetExpectedValues3(false, false, false, expected_layer_sync); + VerifyTemporalIdxAndSyncForAllSpatialLayers(&encoder_callback, + expected_temporal_idx, + expected_layer_sync, + 3); + } + + void TestStrideEncodeDecode() { + Vp8TestEncodedImageCallback encoder_callback; + Vp8TestDecodedImageCallback decoder_callback; + encoder_->RegisterEncodeCompleteCallback(&encoder_callback); + decoder_->RegisterDecodeCompleteCallback(&decoder_callback); + + encoder_->SetRates(kMaxBitrates[2], 30); // To get all three streams. + // Setting two (possibly) problematic use cases for stride: + // 1. stride > width 2. stride_y != stride_uv/2 + int stride_y = kDefaultWidth + 20; + int stride_uv = ((kDefaultWidth + 1) / 2) + 5; + input_frame_.CreateEmptyFrame(kDefaultWidth, kDefaultHeight, + stride_y, stride_uv, stride_uv); + // Set color. + int plane_offset[kNumOfPlanes]; + plane_offset[kYPlane] = kColorY; + plane_offset[kUPlane] = kColorU; + plane_offset[kVPlane] = kColorV; + CreateImage(&input_frame_, plane_offset); + + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + + // Change color. + plane_offset[kYPlane] += 1; + plane_offset[kUPlane] += 1; + plane_offset[kVPlane] += 1; + CreateImage(&input_frame_, plane_offset); + input_frame_.set_timestamp(input_frame_.timestamp() + 3000); + EXPECT_EQ(0, encoder_->Encode(input_frame_, NULL, NULL)); + + EncodedImage encoded_frame; + // Only encoding one frame - so will be a key frame. + encoder_callback.GetLastEncodedKeyFrame(&encoded_frame); + EXPECT_EQ(0, decoder_->Decode(encoded_frame, false, NULL)); + encoder_callback.GetLastEncodedFrame(&encoded_frame); + decoder_->Decode(encoded_frame, false, NULL); + EXPECT_EQ(2, decoder_callback.DecodedFrames()); + } + + void TestSkipEncodingUnusedStreams() { + SkipEncodingUnusedStreamsTest test; + std::vector configured_bitrate = + test.RunTest(encoder_.get(), + &settings_, + 1); // Target bit rate 1, to force all streams but the + // base one to be exceeding bandwidth constraints. + EXPECT_EQ(static_cast(kNumberOfSimulcastStreams), + configured_bitrate.size()); + + unsigned int min_bitrate = + std::max(settings_.simulcastStream[0].minBitrate, settings_.minBitrate); + int stream = 0; + for (std::vector::const_iterator it = + configured_bitrate.begin(); + it != configured_bitrate.end(); + ++it) { + if (stream == 0) { + EXPECT_EQ(min_bitrate, *it); + } else { + EXPECT_EQ(0u, *it); + } + ++stream; + } + } + + scoped_ptr encoder_; + MockEncodedImageCallback encoder_callback_; + scoped_ptr decoder_; + MockDecodedImageCallback decoder_callback_; + VideoCodec settings_; + I420VideoFrame input_frame_; +}; + +} // namespace testing +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_SIMULCAST_UNITTEST_H_ diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8.gyp b/webrtc/modules/video_coding/codecs/vp8/vp8.gyp index 621c244cdf..a86b401042 100644 --- a/webrtc/modules/video_coding/codecs/vp8/vp8.gyp +++ b/webrtc/modules/video_coding/codecs/vp8/vp8.gyp @@ -27,21 +27,30 @@ }], ], 'sources': [ - 'reference_picture_selection.h', - 'reference_picture_selection.cc', - 'include/vp8.h', - 'include/vp8_common_types.h', - 'vp8_factory.cc', - 'vp8_impl.cc', 'default_temporal_layers.cc', 'default_temporal_layers.h', + 'include/vp8.h', + 'include/vp8_common_types.h', 'realtime_temporal_layers.cc', + 'reference_picture_selection.cc', + 'reference_picture_selection.h', + 'screenshare_layers.cc', + 'screenshare_layers.h', + 'simulcast_encoder_adapter.cc', + 'simulcast_encoder_adapter.h', 'temporal_layers.h', + 'vp8_factory.cc', + 'vp8_factory.h', + 'vp8_impl.cc', + 'vp8_impl.h', ], # Disable warnings to enable Win64 build, issue 1323. 'msvs_disabled_warnings': [ 4267, # size_t to int truncation. ], + 'include_dirs': [ + '<(libyuv_dir)/include', + ], }, ], # targets 'conditions': [ diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8_factory.cc b/webrtc/modules/video_coding/codecs/vp8/vp8_factory.cc index 995191e3c0..eb274c6a90 100644 --- a/webrtc/modules/video_coding/codecs/vp8/vp8_factory.cc +++ b/webrtc/modules/video_coding/codecs/vp8/vp8_factory.cc @@ -6,15 +6,37 @@ * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. - * */ +#include "webrtc/modules/video_coding/codecs/vp8/vp8_factory.h" + +#include "webrtc/modules/video_coding/codecs/vp8/simulcast_encoder_adapter.h" #include "webrtc/modules/video_coding/codecs/vp8/vp8_impl.h" namespace webrtc { +bool VP8EncoderFactoryConfig::use_simulcast_adapter_ = false; + +class VP8EncoderImplFactory : public VideoEncoderFactory { + public: + virtual VideoEncoder* Create() OVERRIDE { + return new VP8EncoderImpl(); + } + + virtual void Destroy(VideoEncoder* encoder) OVERRIDE { + delete encoder; + } + + virtual ~VP8EncoderImplFactory() {} +}; + VP8Encoder* VP8Encoder::Create() { - return new VP8EncoderImpl(); + if (VP8EncoderFactoryConfig::use_simulcast_adapter()) { + scoped_ptr factory(new VP8EncoderImplFactory()); + return new SimulcastEncoderAdapter(factory.Pass()); + } else { + return new VP8EncoderImpl(); + } } VP8Decoder* VP8Decoder::Create() { diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8_factory.h b/webrtc/modules/video_coding/codecs/vp8/vp8_factory.h new file mode 100644 index 0000000000..84745ea5a1 --- /dev/null +++ b/webrtc/modules/video_coding/codecs/vp8/vp8_factory.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_FACTORY_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_FACTORY_H_ + +namespace webrtc { + +// VP8EncoderFactoryConfig is the interface to control the VP8Encoder::Create +// to create VP8EncoderImpl or SimulcastEncoderAdapter+VP8EncoderImpl. +// TODO(ronghuawu): Remove when SimulcastEncoderAdapter+VP8EncoderImpl is ready +// to replace VP8EncoderImpl. +class VP8EncoderFactoryConfig { + public: + static void set_use_simulcast_adapter(bool use_simulcast_adapter) { + use_simulcast_adapter_ = use_simulcast_adapter; + } + static bool use_simulcast_adapter() { return use_simulcast_adapter_; } + + private: + static bool use_simulcast_adapter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_FACTORY_H_ + diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc index 5345c805e0..21b07bc45a 100644 --- a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc +++ b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc @@ -6,9 +6,6 @@ * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. - * - * This file contains the WEBRTC VP8 wrapper implementation - * */ #include "webrtc/modules/video_coding/codecs/vp8/vp8_impl.h" @@ -16,107 +13,336 @@ #include #include #include -#include +#include -#include "vpx/vpx_encoder.h" -#include "vpx/vpx_decoder.h" -#include "vpx/vp8cx.h" -#include "vpx/vp8dx.h" +// NOTE(ajm): Path provided by gyp. +#include "libyuv/scale.h" // NOLINT #include "webrtc/common.h" +#include "webrtc/common_types.h" #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" +#include "webrtc/experiments.h" #include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/video_coding/codecs/interface/video_codec_interface.h" +#include "webrtc/modules/video_coding/codecs/vp8/include/vp8_common_types.h" +#include "webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h" #include "webrtc/modules/video_coding/codecs/vp8/temporal_layers.h" -#include "webrtc/modules/video_coding/codecs/vp8/reference_picture_selection.h" #include "webrtc/system_wrappers/interface/tick_util.h" #include "webrtc/system_wrappers/interface/trace_event.h" -enum { kVp8ErrorPropagationTh = 30 }; - namespace webrtc { +namespace { + +enum { kVp8ErrorPropagationTh = 30 }; +enum { kVp832ByteAlign = 32 }; + +// VP8 denoiser states. +enum denoiserState { + kDenoiserOff, + kDenoiserOnYOnly, + kDenoiserOnYUV, + kDenoiserOnYUVAggressive, + // Adaptive mode defaults to kDenoiserOnYUV on key frame, but may switch + // to kDenoiserOnYUVAggressive based on a computed noise metric. + kDenoiserOnAdaptive +}; + +// Greatest common divisior +int GCD(int a, int b) { + int c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + return b; +} + +uint32_t SumStreamTargetBitrate(int streams, const VideoCodec& codec) { + uint32_t bitrate_sum = 0; + for (int i = 0; i < streams; ++i) { + bitrate_sum += codec.simulcastStream[i].targetBitrate; + } + return bitrate_sum; +} + +uint32_t SumStreamMaxBitrate(int streams, const VideoCodec& codec) { + uint32_t bitrate_sum = 0; + for (int i = 0; i < streams; ++i) { + bitrate_sum += codec.simulcastStream[i].maxBitrate; + } + return bitrate_sum; +} + +int NumberOfStreams(const VideoCodec& codec) { + int streams = + codec.numberOfSimulcastStreams < 1 ? 1 : codec.numberOfSimulcastStreams; + uint32_t simulcast_max_bitrate = SumStreamMaxBitrate(streams, codec); + if (simulcast_max_bitrate == 0) { + streams = 1; + } + return streams; +} + +bool ValidSimulcastResolutions(const VideoCodec& codec, int num_streams) { + if (codec.width != codec.simulcastStream[num_streams - 1].width || + codec.height != codec.simulcastStream[num_streams - 1].height) { + return false; + } + for (int i = 0; i < num_streams; ++i) { + if (codec.width * codec.simulcastStream[i].height != + codec.height * codec.simulcastStream[i].width) { + return false; + } + } + return true; +} +} // namespace + +const float kTl1MaxTimeToDropFrames = 20.0f; VP8EncoderImpl::VP8EncoderImpl() - : encoded_image_(), - encoded_complete_callback_(NULL), + : encoded_complete_callback_(NULL), inited_(false), timestamp_(0), - picture_id_(0), feedback_mode_(false), - cpu_speed_(-6), // default value + qp_max_(56), // Setting for max quantizer. rc_max_intra_target_(0), token_partitions_(VP8_ONE_TOKENPARTITION), - rps_(new ReferencePictureSelection), - temporal_layers_(NULL), - encoder_(NULL), - config_(NULL), - raw_(NULL) { - memset(&codec_, 0, sizeof(codec_)); + down_scale_requested_(false), + down_scale_bitrate_(0), + tl0_frame_dropper_(), + tl1_frame_dropper_(kTl1MaxTimeToDropFrames), + key_frame_request_(kMaxSimulcastStreams, false) { uint32_t seed = static_cast(TickTime::MillisecondTimestamp()); srand(seed); + + picture_id_.reserve(kMaxSimulcastStreams); + last_key_frame_picture_id_.reserve(kMaxSimulcastStreams); + temporal_layers_.reserve(kMaxSimulcastStreams); + raw_images_.reserve(kMaxSimulcastStreams); + encoded_images_.reserve(kMaxSimulcastStreams); + send_stream_.reserve(kMaxSimulcastStreams); + cpu_speed_.assign(kMaxSimulcastStreams, -6); // Set default to -6. + encoders_.reserve(kMaxSimulcastStreams); + configurations_.reserve(kMaxSimulcastStreams); + downsampling_factors_.reserve(kMaxSimulcastStreams); } VP8EncoderImpl::~VP8EncoderImpl() { Release(); - delete rps_; } int VP8EncoderImpl::Release() { - if (encoded_image_._buffer != NULL) { - delete [] encoded_image_._buffer; - encoded_image_._buffer = NULL; + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + while (!encoded_images_.empty()) { + EncodedImage& image = encoded_images_.back(); + delete [] image._buffer; + encoded_images_.pop_back(); } - if (encoder_ != NULL) { - if (vpx_codec_destroy(encoder_)) { - return WEBRTC_VIDEO_CODEC_MEMORY; + while (!encoders_.empty()) { + vpx_codec_ctx_t& encoder = encoders_.back(); + if (vpx_codec_destroy(&encoder)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; } - delete encoder_; - encoder_ = NULL; + encoders_.pop_back(); } - if (config_ != NULL) { - delete config_; - config_ = NULL; + configurations_.clear(); + send_stream_.clear(); + cpu_speed_.clear(); + while (!raw_images_.empty()) { + vpx_img_free(&raw_images_.back()); + raw_images_.pop_back(); } - if (raw_ != NULL) { - vpx_img_free(raw_); - raw_ = NULL; + while (!temporal_layers_.empty()) { + delete temporal_layers_.back(); + temporal_layers_.pop_back(); } - delete temporal_layers_; - temporal_layers_ = NULL; inited_ = false; - return WEBRTC_VIDEO_CODEC_OK; + return ret_val; } int VP8EncoderImpl::SetRates(uint32_t new_bitrate_kbit, - uint32_t new_framerate) { + uint32_t new_framerate) { if (!inited_) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - if (encoder_->err) { + if (encoders_[0].err) { return WEBRTC_VIDEO_CODEC_ERROR; } if (new_framerate < 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } - // update bit rate if (codec_.maxBitrate > 0 && new_bitrate_kbit > codec_.maxBitrate) { new_bitrate_kbit = codec_.maxBitrate; } - config_->rc_target_bitrate = new_bitrate_kbit; // in kbit/s - temporal_layers_->ConfigureBitrates(new_bitrate_kbit, codec_.maxBitrate, - new_framerate, config_); - codec_.maxFramerate = new_framerate; - quality_scaler_.ReportFramerate(new_framerate); - - // update encoder context - if (vpx_codec_enc_config_set(encoder_, config_)) { - return WEBRTC_VIDEO_CODEC_ERROR; + if (new_bitrate_kbit < codec_.minBitrate) { + new_bitrate_kbit = codec_.minBitrate; } + if (codec_.numberOfSimulcastStreams > 0 && + new_bitrate_kbit < codec_.simulcastStream[0].minBitrate) { + new_bitrate_kbit = codec_.simulcastStream[0].minBitrate; + } + codec_.maxFramerate = new_framerate; + + if (encoders_.size() == 1) { + // 1:1. + // Calculate a rough limit for when to trigger a potental down scale. + uint32_t k_pixels_per_frame = codec_.width * codec_.height / 1000; + // TODO(pwestin): we currently lack CAMA, this is a temporary fix to work + // around the current limitations. + // Only trigger keyframes if we are allowed to scale down. + if (configurations_[0].rc_resize_allowed) { + if (!down_scale_requested_) { + if (k_pixels_per_frame > new_bitrate_kbit) { + down_scale_requested_ = true; + down_scale_bitrate_ = new_bitrate_kbit; + key_frame_request_[0] = true; + } + } else { + if (new_bitrate_kbit > (2 * down_scale_bitrate_) || + new_bitrate_kbit < (down_scale_bitrate_ / 2)) { + down_scale_requested_ = false; + } + } + } + } else { + // If we have more than 1 stream, reduce the qp_max for the low resolution + // stream if frame rate is not too low. The trade-off with lower qp_max is + // possibly more dropped frames, so we only do this if the frame rate is + // above some threshold (base temporal layer is down to 1/4 for 3 layers). + // We may want to condition this on bitrate later. + if (new_framerate > 20) { + configurations_[encoders_.size() - 1].rc_max_quantizer = 45; + } else { + // Go back to default value set in InitEncode. + configurations_[encoders_.size() - 1].rc_max_quantizer = qp_max_; + } + } + + bool send_stream = true; + int stream_bitrate = 0; + size_t stream_idx = encoders_.size() - 1; + for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { + if (encoders_.size() == 1) { + stream_bitrate = new_bitrate_kbit; + } else { + stream_bitrate = GetStreamBitrate(stream_idx, + new_bitrate_kbit, + &send_stream); + SetStreamState(send_stream, stream_idx); + } + + unsigned int target_bitrate = stream_bitrate; + unsigned int max_bitrate = codec_.maxBitrate; + int framerate = new_framerate; + // TODO(holmer): This is a temporary hack for screensharing, where we + // interpret the startBitrate as the encoder target bitrate. This is + // to allow for a different max bitrate, so if the codec can't meet + // the target we still allow it to overshoot up to the max before dropping + // frames. This hack should be improved. + if (codec_.targetBitrate > 0 && + (codec_.codecSpecific.VP8.numberOfTemporalLayers == 2 || + codec_.simulcastStream[0].numberOfTemporalLayers == 2)) { + int tl0_bitrate = std::min(codec_.targetBitrate, target_bitrate); + max_bitrate = std::min(codec_.maxBitrate, target_bitrate); + target_bitrate = tl0_bitrate; + framerate = -1; + } + configurations_[i].rc_target_bitrate = target_bitrate; + temporal_layers_[stream_idx]->ConfigureBitrates(target_bitrate, + max_bitrate, + framerate, + &configurations_[i]); + if (vpx_codec_enc_config_set(&encoders_[i], &configurations_[i])) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + quality_scaler_.ReportFramerate(new_framerate); return WEBRTC_VIDEO_CODEC_OK; } +int VP8EncoderImpl::GetStreamBitrate(int stream_idx, + uint32_t new_bitrate_kbit, + bool* send_stream) const { + // The bitrate needed to start sending this stream is given by the + // minimum bitrate allowed for encoding this stream, plus the sum target + // rates of all lower streams. + uint32_t sum_target_lower_streams = (stream_idx == 0) ? 0 : + SumStreamTargetBitrate(stream_idx, codec_); + uint32_t bitrate_to_send_this_layer = + codec_.simulcastStream[stream_idx].minBitrate + sum_target_lower_streams; + if (new_bitrate_kbit >= bitrate_to_send_this_layer) { + // We have enough bandwidth to send this stream. + *send_stream = true; + // Bitrate for this stream is the new bitrate (|new_bitrate_kbit|) minus the + // sum target rates of the lower streams, and capped to a maximum bitrate. + // The maximum cap depends on whether we send the next higher stream. + // If we will be sending the next higher stream, |max_rate| is given by + // current stream's |targetBitrate|, otherwise it's capped by |maxBitrate|. + if (stream_idx < codec_.numberOfSimulcastStreams - 1) { + uint32 max_rate = codec_.simulcastStream[stream_idx].maxBitrate; + if (new_bitrate_kbit >= SumStreamTargetBitrate(stream_idx + 1, codec_) + + codec_.simulcastStream[stream_idx + 1].minBitrate) { + max_rate = codec_.simulcastStream[stream_idx].targetBitrate; + } + return std::min(new_bitrate_kbit - sum_target_lower_streams, max_rate); + } else { + // For the highest stream (highest resolution), the |targetBitRate| and + // |maxBitrate| are not used. Any excess bitrate (above the targets of + // all lower streams) is given to this (highest resolution) stream. + return new_bitrate_kbit - sum_target_lower_streams; + } + } else { + // Not enough bitrate for this stream. + // Return our max bitrate of |stream_idx| - 1, but we don't send it. We need + // to keep this resolution coding in order for the multi-encoder to work. + *send_stream = false; + return 0; + } +} + +void VP8EncoderImpl::SetStreamState(bool send_stream, + int stream_idx) { + if (send_stream && !send_stream_[stream_idx]) { + // Need a key frame if we have not sent this stream before. + key_frame_request_[stream_idx] = true; + } + send_stream_[stream_idx] = send_stream; +} + +void VP8EncoderImpl::SetupTemporalLayers(int num_streams, + int num_temporal_layers, + const VideoCodec& codec) { + const Config default_options; + const TemporalLayers::Factory& tl_factory = + (codec.extra_options ? codec.extra_options : &default_options) + ->Get(); + if (num_streams == 1) { + if (codec.mode == kScreensharing) { + // Special mode when screensharing on a single stream. + temporal_layers_.push_back(new ScreenshareLayers(num_temporal_layers, + rand(), + &tl0_frame_dropper_, + &tl1_frame_dropper_)); + } else { + temporal_layers_.push_back( + tl_factory.Create(num_temporal_layers, rand())); + } + } else { + for (int i = 0; i < num_streams; ++i) { + // TODO(andresp): crash if layers is invalid. + int layers = codec.simulcastStream[i].numberOfTemporalLayers; + if (layers < 1) layers = 1; + temporal_layers_.push_back(tl_factory.Create(layers, rand())); + } + } +} + int VP8EncoderImpl::InitEncode(const VideoCodec* inst, - int number_of_cores, - size_t /*max_payload_size*/) { + int number_of_cores, + size_t /*maxPayloadSize */) { if (inst == NULL) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } @@ -127,179 +353,339 @@ int VP8EncoderImpl::InitEncode(const VideoCodec* inst, if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } - if (inst->width < 1 || inst->height < 1) { + if (inst->width <= 1 || inst->height <= 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (number_of_cores < 1) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } - feedback_mode_ = inst->codecSpecific.VP8.feedbackModeOn; - + if (inst->codecSpecific.VP8.feedbackModeOn && + inst->numberOfSimulcastStreams > 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->codecSpecific.VP8.automaticResizeOn && + inst->numberOfSimulcastStreams > 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } int retVal = Release(); if (retVal < 0) { return retVal; } - if (encoder_ == NULL) { - encoder_ = new vpx_codec_ctx_t; - } - if (config_ == NULL) { - config_ = new vpx_codec_enc_cfg_t; + + int number_of_streams = NumberOfStreams(*inst); + bool doing_simulcast = (number_of_streams > 1); + + if (doing_simulcast && !ValidSimulcastResolutions(*inst, number_of_streams)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } + + int num_temporal_layers = doing_simulcast ? + inst->simulcastStream[0].numberOfTemporalLayers : + inst->codecSpecific.VP8.numberOfTemporalLayers; + + // TODO(andresp): crash if num temporal layers is bananas. + if (num_temporal_layers < 1) num_temporal_layers = 1; + SetupTemporalLayers(number_of_streams, num_temporal_layers, *inst); + + feedback_mode_ = inst->codecSpecific.VP8.feedbackModeOn; + timestamp_ = 0; + codec_ = *inst; - if (&codec_ != inst) { - codec_ = *inst; + // Code expects simulcastStream resolutions to be correct, make sure they are + // filled even when there are no simulcast layers. + if (codec_.numberOfSimulcastStreams == 0) { + codec_.simulcastStream[0].width = codec_.width; + codec_.simulcastStream[0].height = codec_.height; } - // TODO(andresp): assert(inst->extra_options) and cleanup. - Config default_options; - const Config& options = - inst->extra_options ? *inst->extra_options : default_options; + picture_id_.resize(number_of_streams); + last_key_frame_picture_id_.resize(number_of_streams); + encoded_images_.resize(number_of_streams); + encoders_.resize(number_of_streams); + configurations_.resize(number_of_streams); + downsampling_factors_.resize(number_of_streams); + raw_images_.resize(number_of_streams); + send_stream_.resize(number_of_streams); + send_stream_[0] = true; // For non-simulcast case. + cpu_speed_.resize(number_of_streams); + std::fill(key_frame_request_.begin(), key_frame_request_.end(), false); - int num_temporal_layers = inst->codecSpecific.VP8.numberOfTemporalLayers > 1 ? - inst->codecSpecific.VP8.numberOfTemporalLayers : 1; - assert(temporal_layers_ == NULL); - temporal_layers_ = options.Get() - .Create(num_temporal_layers, rand()); - // random start 16 bits is enough. - picture_id_ = static_cast(rand()) & 0x7FFF; - - // allocate memory for encoded image - if (encoded_image_._buffer != NULL) { - delete [] encoded_image_._buffer; + int idx = number_of_streams - 1; + for (int i = 0; i < (number_of_streams - 1); ++i, --idx) { + int gcd = GCD(inst->simulcastStream[idx].width, + inst->simulcastStream[idx-1].width); + downsampling_factors_[i].num = inst->simulcastStream[idx].width / gcd; + downsampling_factors_[i].den = inst->simulcastStream[idx - 1].width / gcd; + send_stream_[i] = false; + } + if (number_of_streams > 1) { + send_stream_[number_of_streams - 1] = false; + downsampling_factors_[number_of_streams - 1].num = 1; + downsampling_factors_[number_of_streams - 1].den = 1; + } + for (int i = 0; i < number_of_streams; ++i) { + // Random start, 16 bits is enough. + picture_id_[i] = static_cast(rand()) & 0x7FFF; + last_key_frame_picture_id_[i] = -1; + // allocate memory for encoded image + if (encoded_images_[i]._buffer != NULL) { + delete [] encoded_images_[i]._buffer; + } + encoded_images_[i]._size = CalcBufferSize(kI420, + codec_.width, codec_.height); + encoded_images_[i]._buffer = new uint8_t[encoded_images_[i]._size]; + encoded_images_[i]._completeFrame = true; } - encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height); - encoded_image_._buffer = new uint8_t[encoded_image_._size]; - encoded_image_._completeFrame = true; - - // Creating a wrapper to the image - setting image data to NULL. Actual - // pointer will be set in encode. Setting align to 1, as it is meaningless - // (actual memory is not allocated). - raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, - 1, NULL); // populate encoder configuration with default values - if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), config_, 0)) { + if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), + &configurations_[0], 0)) { return WEBRTC_VIDEO_CODEC_ERROR; } - config_->g_w = codec_.width; - config_->g_h = codec_.height; - config_->rc_target_bitrate = inst->startBitrate; // in kbit/s - temporal_layers_->ConfigureBitrates(inst->startBitrate, inst->maxBitrate, - inst->maxFramerate, config_); // setting the time base of the codec - config_->g_timebase.num = 1; - config_->g_timebase.den = 90000; + configurations_[0].g_timebase.num = 1; + configurations_[0].g_timebase.den = 90000; + configurations_[0].g_lag_in_frames = 0; // 0- no frame lagging // Set the error resilience mode according to user settings. switch (inst->codecSpecific.VP8.resilience) { case kResilienceOff: - config_->g_error_resilient = 0; - if (num_temporal_layers > 1) { - // Must be on for temporal layers (i.e., |num_temporal_layers| > 1). - config_->g_error_resilient = 1; - } + // TODO(marpan): We should set keep error resilience off for this mode, + // independent of temporal layer settings, and make sure we set + // |codecSpecific.VP8.resilience| = |kResilientStream| at higher level + // code if we want to get error resilience on. + configurations_[0].g_error_resilient = 1; break; case kResilientStream: - config_->g_error_resilient = 1; // TODO(holmer): Replace with + configurations_[0].g_error_resilient = 1; // TODO(holmer): Replace with // VPX_ERROR_RESILIENT_DEFAULT when we // drop support for libvpx 9.6.0. break; case kResilientFrames: #ifdef INDEPENDENT_PARTITIONS - config_->g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT | + configurations_[0]-g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT | VPX_ERROR_RESILIENT_PARTITIONS; break; #else return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; // Not supported #endif } - config_->g_lag_in_frames = 0; // 0- no frame lagging - - if (codec_.width * codec_.height >= 1920 * 1080 && number_of_cores > 8) { - config_->g_threads = 8; // 8 threads for 1080p on high perf machines. - } else if (codec_.width * codec_.height > 1280 * 960 && - number_of_cores >= 6) { - config_->g_threads = 3; // 3 threads for 1080p. - } else if (codec_.width * codec_.height > 640 * 480 && number_of_cores >= 3) { - config_->g_threads = 2; // 2 threads for qHD/HD. - } else { - config_->g_threads = 1; // 1 thread for VGA or less - } // rate control settings - config_->rc_dropframe_thresh = inst->codecSpecific.VP8.frameDroppingOn ? - 30 : 0; - config_->rc_end_usage = VPX_CBR; - config_->g_pass = VPX_RC_ONE_PASS; - // Handle resizing outside of libvpx. - config_->rc_resize_allowed = 0; - config_->rc_min_quantizer = 2; - config_->rc_max_quantizer = inst->qpMax; - config_->rc_undershoot_pct = 100; - config_->rc_overshoot_pct = 15; - config_->rc_buf_initial_sz = 500; - config_->rc_buf_optimal_sz = 600; - config_->rc_buf_sz = 1000; - // set the maximum target size of any key-frame. - rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); + configurations_[0].rc_dropframe_thresh = + inst->codecSpecific.VP8.frameDroppingOn ? 30 : 0; + configurations_[0].rc_end_usage = VPX_CBR; + configurations_[0].g_pass = VPX_RC_ONE_PASS; + // TODO(hellner): investigate why the following two lines produce + // automaticResizeOn value of 3 when running + // WebRtcVideoMediaChannelTest.GetStatsMultipleSendStreams inside the talk + // framework. + // configurations_[0].rc_resize_allowed = + // inst->codecSpecific.VP8.automaticResizeOn ? 1 : 0; + configurations_[0].rc_resize_allowed = 0; + // Handle resizing outside of libvpx when doing single-stream. + if (inst->codecSpecific.VP8.automaticResizeOn && number_of_streams > 1) { + configurations_[0].rc_resize_allowed = 1; + } + configurations_[0].rc_min_quantizer = 2; + if (inst->qpMax >= configurations_[0].rc_min_quantizer) { + qp_max_ = inst->qpMax; + } + configurations_[0].rc_max_quantizer = qp_max_; + configurations_[0].rc_undershoot_pct = 100; + configurations_[0].rc_overshoot_pct = 15; + configurations_[0].rc_buf_initial_sz = 500; + configurations_[0].rc_buf_optimal_sz = 600; + configurations_[0].rc_buf_sz = 1000; + + // Set the maximum target size of any key-frame. + rc_max_intra_target_ = MaxIntraTarget(configurations_[0].rc_buf_optimal_sz); if (feedback_mode_) { // Disable periodic key frames if we get feedback from the decoder // through SLI and RPSI. - config_->kf_mode = VPX_KF_DISABLED; - } else if (inst->codecSpecific.VP8.keyFrameInterval > 0) { - config_->kf_mode = VPX_KF_AUTO; - config_->kf_max_dist = inst->codecSpecific.VP8.keyFrameInterval; + configurations_[0].kf_mode = VPX_KF_DISABLED; + } else if (inst->codecSpecific.VP8.keyFrameInterval > 0) { + configurations_[0].kf_mode = VPX_KF_AUTO; + configurations_[0].kf_max_dist = inst->codecSpecific.VP8.keyFrameInterval; } else { - config_->kf_mode = VPX_KF_DISABLED; + configurations_[0].kf_mode = VPX_KF_DISABLED; } + + // Allow the user to set the complexity for the base stream. switch (inst->codecSpecific.VP8.complexity) { case kComplexityHigh: - cpu_speed_ = -5; + cpu_speed_[0] = -5; break; case kComplexityHigher: - cpu_speed_ = -4; + cpu_speed_[0] = -4; break; case kComplexityMax: - cpu_speed_ = -3; + cpu_speed_[0] = -3; break; default: - cpu_speed_ = -6; + cpu_speed_[0] = -6; break; } + // Setting complexity for non-base streams based on resolution. + // Base stream (layer 0) is of highest resolution. + for (int i = 1; i < number_of_streams; ++i) { + int pixels_per_frame = + inst->simulcastStream[number_of_streams - 1 - i].width * + inst->simulcastStream[number_of_streams - 1 - i].height; + cpu_speed_[i] = cpu_speed_[0]; + // Increase complexity if below a CIF (default -6) + if (pixels_per_frame < 352 * 288) { + cpu_speed_[i] = -4; + } + } #if defined(WEBRTC_ARCH_ARM) // On mobile platform, always set to -12 to leverage between cpu usage // and video quality - cpu_speed_ = -12; + for (int i = 0; i < number_of_streams; ++i) { + cpu_speed_[i] = -12; + } #endif - rps_->Init(); + configurations_[0].g_w = inst->width; + configurations_[0].g_h = inst->height; + + // Determine number of threads based on the image size and #cores. + // TODO(fbarchard): Consider number of Simulcast layers. + configurations_[0].g_threads = NumberOfThreads(configurations_[0].g_w, + configurations_[0].g_h, + number_of_cores); + + // Creating a wrapper to the image - setting image data to NULL. + // Actual pointer will be set in encode. Setting align to 1, as it + // is meaningless (no memory allocation is done here). + vpx_img_wrap(&raw_images_[0], VPX_IMG_FMT_I420, inst->width, inst->height, + 1, NULL); + + if (encoders_.size() == 1) { + configurations_[0].rc_target_bitrate = inst->startBitrate; + temporal_layers_[0]->ConfigureBitrates(inst->startBitrate, + inst->maxBitrate, + inst->maxFramerate, + &configurations_[0]); + } else { + // Note the order we use is different from webm, we have lowest resolution + // at position 0 and they have highest resolution at position 0. + int stream_idx = encoders_.size() - 1; + bool send_stream = true; + int stream_bitrate = GetStreamBitrate(stream_idx, + inst->startBitrate, + &send_stream); + SetStreamState(send_stream, stream_idx); + configurations_[0].rc_target_bitrate = stream_bitrate; + temporal_layers_[stream_idx]->ConfigureBitrates(stream_bitrate, + inst->maxBitrate, + inst->maxFramerate, + &configurations_[0]); + --stream_idx; + for (size_t i = 1; i < encoders_.size(); ++i, --stream_idx) { + memcpy(&configurations_[i], &configurations_[0], + sizeof(configurations_[0])); + + configurations_[i].g_w = inst->simulcastStream[stream_idx].width; + configurations_[i].g_h = inst->simulcastStream[stream_idx].height; + + // Use 1 thread for lower resolutions. + configurations_[i].g_threads = 1; + + // Setting alignment to 32 - as that ensures at least 16 for all + // planes (32 for Y, 16 for U,V). Libvpx sets the requested stride for + // the y plane, but only half of it to the u and v planes. + vpx_img_alloc(&raw_images_[i], VPX_IMG_FMT_I420, + inst->simulcastStream[stream_idx].width, + inst->simulcastStream[stream_idx].height, kVp832ByteAlign); + int stream_bitrate = GetStreamBitrate(stream_idx, + inst->startBitrate, + &send_stream); + SetStreamState(send_stream, stream_idx); + configurations_[i].rc_target_bitrate = stream_bitrate; + temporal_layers_[stream_idx]->ConfigureBitrates(stream_bitrate, + inst->maxBitrate, + inst->maxFramerate, + &configurations_[i]); + } + } + + rps_.Init(); quality_scaler_.Init(codec_.qpMax); quality_scaler_.ReportFramerate(codec_.maxFramerate); - return InitAndSetControlSettings(inst); + + return InitAndSetControlSettings(); } -int VP8EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { - vpx_codec_flags_t flags = 0; - // TODO(holmer): We should make a smarter decision on the number of - // partitions. Eight is probably not the optimal number for low resolution - // video. - flags |= VPX_CODEC_USE_OUTPUT_PARTITION; - if (vpx_codec_enc_init(encoder_, vpx_codec_vp8_cx(), config_, flags)) { - return WEBRTC_VIDEO_CODEC_UNINITIALIZED; +int VP8EncoderImpl::NumberOfThreads(int width, int height, int cpus) { + if (width * height >= 1920 * 1080 && cpus > 8) { + return 8; // 8 threads for 1080p on high perf machines. + } else if (width * height > 1280 * 960 && cpus >= 6) { + // 3 threads for 1080p. + return 3; + } else if (width * height > 640 * 480 && cpus >= 3) { + // 2 threads for qHD/HD. + return 2; + } else { + // 1 thread for VGA or less. + return 1; } - vpx_codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); - vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_); - vpx_codec_control(encoder_, VP8E_SET_TOKEN_PARTITIONS, - static_cast(token_partitions_)); -#if !defined(WEBRTC_ARCH_ARM) - // TODO(fbarchard): Enable Noise reduction for ARM once optimized. - vpx_codec_control(encoder_, VP8E_SET_NOISE_SENSITIVITY, - inst->codecSpecific.VP8.denoisingOn ? 1 : 0); -#endif - vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, - rc_max_intra_target_); - inited_ = true; +} +int VP8EncoderImpl::InitAndSetControlSettings() { + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + + if (encoders_.size() > 1) { + int error = vpx_codec_enc_init_multi(&encoders_[0], + vpx_codec_vp8_cx(), + &configurations_[0], + encoders_.size(), + flags, + &downsampling_factors_[0]); + if (error) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } else { + if (vpx_codec_enc_init(&encoders_[0], + vpx_codec_vp8_cx(), + &configurations_[0], + flags)) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + } + // Enable denoising for the highest resolution stream, and for + // the second highest resolution if we are doing more than 2 + // spatial layers/streams. + // TODO(holmer): Investigate possibility of adding a libvpx API + // for getting the denoised frame from the encoder and using that + // when encoding lower resolution streams. Would it work with the + // multi-res encoding feature? + denoiserState denoiser_state = kDenoiserOnYOnly; +#ifdef WEBRTC_ARCH_ARM + denoiser_state = kDenoiserOnYOnly; +#else + denoiser_state = kDenoiserOnAdaptive; +#endif + vpx_codec_control(&encoders_[0], VP8E_SET_NOISE_SENSITIVITY, + codec_.codecSpecific.VP8.denoisingOn ? + denoiser_state : kDenoiserOff); + if (encoders_.size() > 2) { + vpx_codec_control(&encoders_[1], VP8E_SET_NOISE_SENSITIVITY, + codec_.codecSpecific.VP8.denoisingOn ? + denoiser_state : kDenoiserOff); + } + for (size_t i = 0; i < encoders_.size(); ++i) { + vpx_codec_control(&(encoders_[i]), VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&(encoders_[i]), VP8E_SET_CPUUSED, cpu_speed_[i]); + vpx_codec_control(&(encoders_[i]), VP8E_SET_TOKEN_PARTITIONS, + static_cast(token_partitions_)); + vpx_codec_control(&(encoders_[i]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + vpx_codec_control(&(encoders_[i]), VP8E_SET_SCREEN_CONTENT_MODE, + codec_.mode == kScreensharing); + } + inited_ = true; return WEBRTC_VIDEO_CODEC_OK; } @@ -319,74 +705,163 @@ uint32_t VP8EncoderImpl::MaxIntraTarget(uint32_t optimalBuffersize) { return (targetPct < minIntraTh) ? minIntraTh: targetPct; } -int VP8EncoderImpl::Encode(const I420VideoFrame& input_frame, - const CodecSpecificInfo* codec_specific_info, - const std::vector* frame_types) { - TRACE_EVENT1("webrtc", "VP8::Encode", "timestamp", input_frame.timestamp()); +int VP8EncoderImpl::Encode( + const I420VideoFrame& frame, + const CodecSpecificInfo* codec_specific_info, + const std::vector* frame_types) { + TRACE_EVENT1("webrtc", "VP8::Encode", "timestamp", frame.timestamp()); if (!inited_) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - if (input_frame.IsZeroSize()) { + if (frame.IsZeroSize()) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } if (encoded_complete_callback_ == NULL) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - VideoFrameType frame_type = kDeltaFrame; - // We only support one stream at the moment. - if (frame_types && frame_types->size() > 0) { - frame_type = (*frame_types)[0]; + // Only apply scaling to improve for single-layer streams. The scaling metrics + // use framedrops as a signal and is only applicable when we drop frames. + const bool use_quality_scaler = encoders_.size() == 1 && + configurations_[0].rc_dropframe_thresh > 0 && + codec_.codecSpecific.VP8.automaticResizeOn; + const I420VideoFrame& input_image = + use_quality_scaler ? quality_scaler_.GetScaledFrame(frame) : frame; + + if (use_quality_scaler && (input_image.width() != codec_.width || + input_image.height() != codec_.height)) { + int ret = UpdateCodecFrameSize(input_image); + if (ret < 0) + return ret; } - const I420VideoFrame& frame = - config_->rc_dropframe_thresh > 0 && - codec_.codecSpecific.VP8.automaticResizeOn - ? quality_scaler_.GetScaledFrame(input_frame) - : input_frame; + // Image in vpx_image_t format. + // Input image is const. VP8's raw image is not defined as const. + raw_images_[0].planes[VPX_PLANE_Y] = + const_cast(input_image.buffer(kYPlane)); + raw_images_[0].planes[VPX_PLANE_U] = + const_cast(input_image.buffer(kUPlane)); + raw_images_[0].planes[VPX_PLANE_V] = + const_cast(input_image.buffer(kVPlane)); - // Check for change in frame size. - if (frame.width() != codec_.width || - frame.height() != codec_.height) { - int ret = UpdateCodecFrameSize(frame); + raw_images_[0].stride[VPX_PLANE_Y] = input_image.stride(kYPlane); + raw_images_[0].stride[VPX_PLANE_U] = input_image.stride(kUPlane); + raw_images_[0].stride[VPX_PLANE_V] = input_image.stride(kVPlane); + + for (size_t i = 1; i < encoders_.size(); ++i) { + // Scale the image down a number of times by downsampling factor + libyuv::I420Scale( + raw_images_[i-1].planes[VPX_PLANE_Y], + raw_images_[i-1].stride[VPX_PLANE_Y], + raw_images_[i-1].planes[VPX_PLANE_U], + raw_images_[i-1].stride[VPX_PLANE_U], + raw_images_[i-1].planes[VPX_PLANE_V], + raw_images_[i-1].stride[VPX_PLANE_V], + raw_images_[i-1].d_w, raw_images_[i-1].d_h, + raw_images_[i].planes[VPX_PLANE_Y], raw_images_[i].stride[VPX_PLANE_Y], + raw_images_[i].planes[VPX_PLANE_U], raw_images_[i].stride[VPX_PLANE_U], + raw_images_[i].planes[VPX_PLANE_V], raw_images_[i].stride[VPX_PLANE_V], + raw_images_[i].d_w, raw_images_[i].d_h, libyuv::kFilterBilinear); + } + vpx_enc_frame_flags_t flags[kMaxSimulcastStreams]; + for (size_t i = 0; i < encoders_.size(); ++i) { + int ret = temporal_layers_[i]->EncodeFlags(input_image.timestamp()); if (ret < 0) { - return ret; + // Drop this frame. + return WEBRTC_VIDEO_CODEC_OK; + } + flags[i] = ret; + } + bool send_key_frame = false; + for (size_t i = 0; i < key_frame_request_.size() && i < send_stream_.size(); + ++i) { + if (key_frame_request_[i] && send_stream_[i]) { + send_key_frame = true; + break; } } - // Image in vpx_image_t format. - // Input frame is const. VP8's raw frame is not defined as const. - raw_->planes[VPX_PLANE_Y] = const_cast(frame.buffer(kYPlane)); - raw_->planes[VPX_PLANE_U] = const_cast(frame.buffer(kUPlane)); - raw_->planes[VPX_PLANE_V] = const_cast(frame.buffer(kVPlane)); - // TODO(mikhal): Stride should be set in initialization. - raw_->stride[VPX_PLANE_Y] = frame.stride(kYPlane); - raw_->stride[VPX_PLANE_U] = frame.stride(kUPlane); - raw_->stride[VPX_PLANE_V] = frame.stride(kVPlane); - - int flags = temporal_layers_->EncodeFlags(frame.timestamp()); - - bool send_keyframe = (frame_type == kKeyFrame); - if (send_keyframe) { + if (!send_key_frame && frame_types) { + for (size_t i = 0; i < frame_types->size() && i < send_stream_.size(); + ++i) { + if ((*frame_types)[i] == kKeyFrame && send_stream_[i]) { + send_key_frame = true; + break; + } + } + } + // The flag modification below (due to forced key frame, RPS, etc.,) for now + // will be the same for all encoders/spatial layers. + // TODO(marpan/holmer): Allow for key frame request to be set per encoder. + bool only_predict_from_key_frame = false; + if (send_key_frame) { + // Adapt the size of the key frame when in screenshare with 1 temporal + // layer. + if (encoders_.size() == 1 && codec_.mode == kScreensharing + && codec_.codecSpecific.VP8.numberOfTemporalLayers <= 1) { + const uint32_t forceKeyFrameIntraTh = 100; + vpx_codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + forceKeyFrameIntraTh); + } // Key frame request from caller. // Will update both golden and alt-ref. - flags = VPX_EFLAG_FORCE_KF; - } else if (feedback_mode_ && codec_specific_info) { - // Handle RPSI and SLI messages and set up the appropriate encode flags. - bool sendRefresh = false; - if (codec_specific_info->codecType == kVideoCodecVP8) { + for (size_t i = 0; i < encoders_.size(); ++i) { + flags[i] = VPX_EFLAG_FORCE_KF; + } + std::fill(key_frame_request_.begin(), key_frame_request_.end(), false); + } else if (codec_specific_info && + codec_specific_info->codecType == kVideoCodecVP8) { + if (feedback_mode_) { + // Handle RPSI and SLI messages and set up the appropriate encode flags. + bool sendRefresh = false; if (codec_specific_info->codecSpecific.VP8.hasReceivedRPSI) { - rps_->ReceivedRPSI( + rps_.ReceivedRPSI( codec_specific_info->codecSpecific.VP8.pictureIdRPSI); } if (codec_specific_info->codecSpecific.VP8.hasReceivedSLI) { - sendRefresh = rps_->ReceivedSLI(frame.timestamp()); + sendRefresh = rps_.ReceivedSLI(input_image.timestamp()); + } + for (size_t i = 0; i < encoders_.size(); ++i) { + flags[i] = rps_.EncodeFlags(picture_id_[i], sendRefresh, + input_image.timestamp()); + } + } else { + if (codec_specific_info->codecSpecific.VP8.hasReceivedRPSI) { + // Is this our last key frame? If not ignore. + // |picture_id_| is defined per spatial stream/layer, so check that + // |RPSI| matches the last key frame from any of the spatial streams. + // If so, then all spatial streams for this encoding will predict from + // its long-term reference (last key frame). + int RPSI = codec_specific_info->codecSpecific.VP8.pictureIdRPSI; + for (size_t i = 0; i < encoders_.size(); ++i) { + if (last_key_frame_picture_id_[i] == RPSI) { + // Request for a long term reference frame. + // Note 1: overwrites any temporal settings. + // Note 2: VP8_EFLAG_NO_UPD_ENTROPY is not needed as that flag is + // set by error_resilient mode. + for (size_t j = 0; j < encoders_.size(); ++j) { + flags[j] = VP8_EFLAG_NO_UPD_ARF; + flags[j] |= VP8_EFLAG_NO_REF_GF; + flags[j] |= VP8_EFLAG_NO_REF_LAST; + } + only_predict_from_key_frame = true; + break; + } + } } } - flags = rps_->EncodeFlags(picture_id_, sendRefresh, - frame.timestamp()); } - + // Set the encoder frame flags and temporal layer_id for each spatial stream. + // Note that |temporal_layers_| are defined starting from lowest resolution at + // position 0 to highest resolution at position |encoders_.size() - 1|, + // whereas |encoder_| is from highest to lowest resolution. + size_t stream_idx = encoders_.size() - 1; + for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { + vpx_codec_control(&encoders_[i], VP8E_SET_FRAME_FLAGS, flags[stream_idx]); + vpx_codec_control(&encoders_[i], + VP8E_SET_TEMPORAL_LAYER_ID, + temporal_layers_[stream_idx]->CurrentLayerId()); + } // TODO(holmer): Ideally the duration should be the timestamp diff of this // frame and the next frame to be encoded, which we don't have. Instead we // would like to use the duration of the previous frame. Unfortunately the @@ -394,113 +869,163 @@ int VP8EncoderImpl::Encode(const I420VideoFrame& input_frame, // frame rate to calculate an average duration for now. assert(codec_.maxFramerate > 0); uint32_t duration = 90000 / codec_.maxFramerate; - if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags, - VPX_DL_REALTIME)) { + + // Note we must pass 0 for |flags| field in encode call below since they are + // set above in |vpx_codec_control| function for each encoder/spatial layer. + int error = vpx_codec_encode(&encoders_[0], &raw_images_[0], timestamp_, + duration, 0, VPX_DL_REALTIME); + // Reset specific intra frame thresholds, following the key frame. + if (send_key_frame) { + vpx_codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + } + if (error) { return WEBRTC_VIDEO_CODEC_ERROR; } timestamp_ += duration; - - return GetEncodedPartitions(frame); + return GetEncodedPartitions(input_image, only_predict_from_key_frame); } -int VP8EncoderImpl::UpdateCodecFrameSize(const I420VideoFrame& input_image) { +// TODO(pbos): Make sure this works for properly for >1 encoders. +int VP8EncoderImpl::UpdateCodecFrameSize( + const I420VideoFrame& input_image) { codec_.width = input_image.width(); codec_.height = input_image.height(); - raw_->w = codec_.width; - raw_->h = codec_.height; - raw_->d_w = codec_.width; - raw_->d_h = codec_.height; - - raw_->stride[VPX_PLANE_Y] = input_image.stride(kYPlane); - raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane); - raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane); - vpx_img_set_rect(raw_, 0, 0, codec_.width, codec_.height); + raw_images_[0].w = codec_.width; + raw_images_[0].h = codec_.height; + raw_images_[0].d_w = codec_.width; + raw_images_[0].d_h = codec_.height; + vpx_img_set_rect(&raw_images_[0], 0, 0, codec_.width, codec_.height); // Update encoder context for new frame size. // Change of frame size will automatically trigger a key frame. - config_->g_w = codec_.width; - config_->g_h = codec_.height; - if (vpx_codec_enc_config_set(encoder_, config_)) { + configurations_[0].g_w = codec_.width; + configurations_[0].g_h = codec_.height; + if (vpx_codec_enc_config_set(&encoders_[0], &configurations_[0])) { return WEBRTC_VIDEO_CODEC_ERROR; } return WEBRTC_VIDEO_CODEC_OK; } -void VP8EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, - const vpx_codec_cx_pkt& pkt, - uint32_t timestamp) { +void VP8EncoderImpl::PopulateCodecSpecific( + CodecSpecificInfo* codec_specific, + const vpx_codec_cx_pkt_t& pkt, + int stream_idx, + uint32_t timestamp, + bool only_predicting_from_key_frame) { assert(codec_specific != NULL); codec_specific->codecType = kVideoCodecVP8; - CodecSpecificInfoVP8 *vp8Info = &(codec_specific->codecSpecific.VP8); - vp8Info->pictureId = picture_id_; - vp8Info->simulcastIdx = 0; + CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8); + vp8Info->pictureId = picture_id_[stream_idx]; + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + last_key_frame_picture_id_[stream_idx] = picture_id_[stream_idx]; + } + vp8Info->simulcastIdx = stream_idx; vp8Info->keyIdx = kNoKeyIdx; // TODO(hlundin) populate this - vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0; - temporal_layers_->PopulateCodecSpecific( - (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false, vp8Info, - timestamp); - picture_id_ = (picture_id_ + 1) & 0x7FFF; // prepare next + vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) ? + true : false; + bool base_layer_sync_point = (pkt.data.frame.flags & VPX_FRAME_IS_KEY) || + only_predicting_from_key_frame; + temporal_layers_[stream_idx]->PopulateCodecSpecific(base_layer_sync_point, + vp8Info, + timestamp); + // Prepare next. + picture_id_[stream_idx] = (picture_id_[stream_idx] + 1) & 0x7FFF; } -int VP8EncoderImpl::GetEncodedPartitions(const I420VideoFrame& input_image) { - vpx_codec_iter_t iter = NULL; - int part_idx = 0; - encoded_image_._length = 0; - encoded_image_._frameType = kDeltaFrame; - RTPFragmentationHeader frag_info; - frag_info.VerifyAndAllocateFragmentationHeader((1 << token_partitions_) + 1); - CodecSpecificInfo codec_specific; - - const vpx_codec_cx_pkt_t *pkt = NULL; - while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) { - switch (pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: { - memcpy(&encoded_image_._buffer[encoded_image_._length], - pkt->data.frame.buf, - pkt->data.frame.sz); - frag_info.fragmentationOffset[part_idx] = encoded_image_._length; - frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz; - frag_info.fragmentationPlType[part_idx] = 0; // not known here - frag_info.fragmentationTimeDiff[part_idx] = 0; - encoded_image_._length += pkt->data.frame.sz; - assert(encoded_image_._length <= encoded_image_._size); - ++part_idx; - break; +int VP8EncoderImpl::GetEncodedPartitions( + const I420VideoFrame& input_image, + bool only_predicting_from_key_frame) { + int stream_idx = static_cast(encoders_.size()) - 1; + for (size_t encoder_idx = 0; encoder_idx < encoders_.size(); + ++encoder_idx, --stream_idx) { + vpx_codec_iter_t iter = NULL; + int part_idx = 0; + encoded_images_[encoder_idx]._length = 0; + encoded_images_[encoder_idx]._frameType = kDeltaFrame; + RTPFragmentationHeader frag_info; + // token_partitions_ is number of bits used. + frag_info.VerifyAndAllocateFragmentationHeader((1 << token_partitions_) + + 1); + CodecSpecificInfo codec_specific; + const vpx_codec_cx_pkt_t *pkt = NULL; + while ((pkt = vpx_codec_get_cx_data(&encoders_[encoder_idx], + &iter)) != NULL) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + uint32_t length = encoded_images_[encoder_idx]._length; + memcpy(&encoded_images_[encoder_idx]._buffer[length], + pkt->data.frame.buf, + pkt->data.frame.sz); + frag_info.fragmentationOffset[part_idx] = length; + frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz; + frag_info.fragmentationPlType[part_idx] = 0; // not known here + frag_info.fragmentationTimeDiff[part_idx] = 0; + encoded_images_[encoder_idx]._length += pkt->data.frame.sz; + assert(length <= encoded_images_[encoder_idx]._size); + ++part_idx; + break; + } + default: + break; } - default: { + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + // check if encoded frame is a key frame + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + encoded_images_[encoder_idx]._frameType = kKeyFrame; + rps_.EncodedKeyFrame(picture_id_[stream_idx]); + } + PopulateCodecSpecific(&codec_specific, *pkt, stream_idx, + input_image.timestamp(), + only_predicting_from_key_frame); break; } } - // End of frame - if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { - // check if encoded frame is a key frame - if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { - encoded_image_._frameType = kKeyFrame; - rps_->EncodedKeyFrame(picture_id_); + encoded_images_[encoder_idx]._timeStamp = input_image.timestamp(); + encoded_images_[encoder_idx].capture_time_ms_ = + input_image.render_time_ms(); + temporal_layers_[stream_idx]->FrameEncoded( + encoded_images_[encoder_idx]._length, + encoded_images_[encoder_idx]._timeStamp); + if (send_stream_[stream_idx]) { + if (encoded_images_[encoder_idx]._length > 0) { + TRACE_COUNTER_ID1("webrtc", "EncodedFrameSize", encoder_idx, + encoded_images_[encoder_idx]._length); + encoded_images_[encoder_idx]._encodedHeight = + codec_.simulcastStream[stream_idx].height; + encoded_images_[encoder_idx]._encodedWidth = + codec_.simulcastStream[stream_idx].width; + encoded_complete_callback_->Encoded(encoded_images_[encoder_idx], + &codec_specific, &frag_info); } - PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp()); - break; + } else { + // Required in case padding is applied to dropped frames. + encoded_images_[encoder_idx]._length = 0; + encoded_images_[encoder_idx]._frameType = kSkipFrame; + codec_specific.codecType = kVideoCodecVP8; + CodecSpecificInfoVP8* vp8Info = &(codec_specific.codecSpecific.VP8); + vp8Info->pictureId = picture_id_[stream_idx]; + vp8Info->simulcastIdx = stream_idx; + vp8Info->keyIdx = kNoKeyIdx; + encoded_complete_callback_->Encoded(encoded_images_[encoder_idx], + &codec_specific, NULL); } } - if (encoded_image_._length > 0) { - TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length); - encoded_image_._timeStamp = input_image.timestamp(); - encoded_image_.capture_time_ms_ = input_image.render_time_ms(); - encoded_image_._encodedHeight = codec_.height; - encoded_image_._encodedWidth = codec_.width; - encoded_complete_callback_->Encoded(encoded_image_, &codec_specific, - &frag_info); - int qp; - vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER_64, &qp); - quality_scaler_.ReportEncodedFrame(qp); - } else { - quality_scaler_.ReportDroppedFrame(); + if (encoders_.size() == 1 && send_stream_[0]) { + if (encoded_images_[0]._length > 0) { + int qp; + vpx_codec_control(&encoders_[0], VP8E_GET_LAST_QUANTIZER_64, &qp); + quality_scaler_.ReportEncodedFrame(qp); + } else { + quality_scaler_.ReportDroppedFrame(); + } } return WEBRTC_VIDEO_CODEC_OK; } -int VP8EncoderImpl::SetChannelParameters(uint32_t /*packet_loss*/, int rtt) { - rps_->SetRtt(rtt); +int VP8EncoderImpl::SetChannelParameters(uint32_t packetLoss, int rtt) { + rps_.SetRtt(rtt); return WEBRTC_VIDEO_CODEC_OK; } @@ -510,6 +1035,7 @@ int VP8EncoderImpl::RegisterEncodeCompleteCallback( return WEBRTC_VIDEO_CODEC_OK; } + VP8DecoderImpl::VP8DecoderImpl() : decode_complete_callback_(NULL), inited_(false), @@ -519,9 +1045,9 @@ VP8DecoderImpl::VP8DecoderImpl() image_format_(VPX_IMG_FMT_NONE), ref_frame_(NULL), propagation_cnt_(-1), - mfqe_enabled_(false), + last_frame_width_(0), + last_frame_height_(0), key_frame_required_(true) { - memset(&codec_, 0, sizeof(codec_)); } VP8DecoderImpl::~VP8DecoderImpl() { @@ -535,22 +1061,19 @@ int VP8DecoderImpl::Reset() { } InitDecode(&codec_, 1); propagation_cnt_ = -1; - mfqe_enabled_ = false; return WEBRTC_VIDEO_CODEC_OK; } -int VP8DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { - if (inst == NULL) { - return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; - } +int VP8DecoderImpl::InitDecode(const VideoCodec* inst, + int number_of_cores) { int ret_val = Release(); if (ret_val < 0) { return ret_val; } if (decoder_ == NULL) { - decoder_ = new vpx_dec_ctx_t; + decoder_ = new vpx_codec_ctx_t; } - if (inst->codecType == kVideoCodecVP8) { + if (inst && inst->codecType == kVideoCodecVP8) { feedback_mode_ = inst->codecSpecific.VP8.feedbackModeOn; } vpx_codec_dec_cfg_t cfg; @@ -558,12 +1081,9 @@ int VP8DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { cfg.threads = 1; cfg.h = cfg.w = 0; // set after decode - vpx_codec_flags_t flags = 0; +vpx_codec_flags_t flags = 0; #ifndef WEBRTC_ARCH_ARM flags = VPX_CODEC_USE_POSTPROC; - if (inst->codecSpecific.VP8.errorConcealmentOn) { - flags |= VPX_CODEC_USE_ERROR_CONCEALMENT; - } #ifdef INDEPENDENT_PARTITIONS flags |= VPX_CODEC_USE_INPUT_PARTITION; #endif @@ -573,34 +1093,23 @@ int VP8DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { return WEBRTC_VIDEO_CODEC_MEMORY; } -#ifndef WEBRTC_ARCH_ARM - vp8_postproc_cfg_t ppcfg; - ppcfg.post_proc_flag = VP8_DEMACROBLOCK | VP8_DEBLOCK; - // Strength of deblocking filter. Valid range:[0,16] - ppcfg.deblocking_level = 3; - vpx_codec_control(decoder_, VP8_SET_POSTPROC, &ppcfg); -#endif - - if (&codec_ != inst) { - // Save VideoCodec instance for later; mainly for duplicating the decoder. + // Save VideoCodec instance for later; mainly for duplicating the decoder. + if (&codec_ != inst) codec_ = *inst; - } - propagation_cnt_ = -1; inited_ = true; // Always start with a complete key frame. key_frame_required_ = true; - return WEBRTC_VIDEO_CODEC_OK; } int VP8DecoderImpl::Decode(const EncodedImage& input_image, - bool missing_frames, - const RTPFragmentationHeader* fragmentation, - const CodecSpecificInfo* codec_specific_info, - int64_t /*render_time_ms*/) { + bool missing_frames, + const RTPFragmentationHeader* fragmentation, + const CodecSpecificInfo* codec_specific_info, + int64_t /*render_time_ms*/) { if (!inited_) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } @@ -621,20 +1130,18 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, #endif #ifndef WEBRTC_ARCH_ARM - if (!mfqe_enabled_ && codec_specific_info && - codec_specific_info->codecSpecific.VP8.temporalIdx > 0) { - // Enable MFQE if we are receiving layers. - // temporalIdx is set in the jitter buffer according to what the RTP - // header says. - mfqe_enabled_ = true; - vp8_postproc_cfg_t ppcfg; - ppcfg.post_proc_flag = VP8_MFQE | VP8_DEMACROBLOCK | VP8_DEBLOCK; - ppcfg.deblocking_level = 3; - vpx_codec_control(decoder_, VP8_SET_POSTPROC, &ppcfg); + vp8_postproc_cfg_t ppcfg; + // MFQE enabled to reduce key frame popping. + ppcfg.post_proc_flag = VP8_MFQE | VP8_DEBLOCK; + // For VGA resolutions and lower, enable the demacroblocker postproc. + if (last_frame_width_ * last_frame_height_ <= 640 * 360) { + ppcfg.post_proc_flag |= VP8_DEMACROBLOCK; } + // Strength of deblocking filter. Valid range:[0,16] + ppcfg.deblocking_level = 3; + vpx_codec_control(decoder_, VP8_SET_POSTPROC, &ppcfg); #endif - // Always start with a complete key frame. if (key_frame_required_) { if (input_image._frameType != kKeyFrame) @@ -650,14 +1157,16 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, // the feedback mode is enabled (RPS). // Reset on a key frame refresh. if (!feedback_mode_) { - if (input_image._frameType == kKeyFrame && input_image._completeFrame) + if (input_image._frameType == kKeyFrame && input_image._completeFrame) { propagation_cnt_ = -1; // Start count on first loss. - else if ((!input_image._completeFrame || missing_frames) && - propagation_cnt_ == -1) + } else if ((!input_image._completeFrame || missing_frames) && + propagation_cnt_ == -1) { propagation_cnt_ = 0; - if (propagation_cnt_ >= 0) + } + if (propagation_cnt_ >= 0) { propagation_cnt_++; + } } vpx_codec_iter_t iter = NULL; @@ -673,8 +1182,7 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, propagation_cnt_ = 0; return WEBRTC_VIDEO_CODEC_ERROR; } - // We don't render this frame. - vpx_codec_get_frame(decoder_, &iter); + img = vpx_codec_get_frame(decoder_, &iter); iter = NULL; } @@ -691,14 +1199,12 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, if (input_image._length == 0) { buffer = NULL; // Triggers full frame concealment. } - if (vpx_codec_decode(decoder_, - buffer, - input_image._length, - 0, + if (vpx_codec_decode(decoder_, buffer, input_image._length, 0, VPX_DL_REALTIME)) { // Reset to avoid requesting key frames too often. - if (propagation_cnt_ > 0) + if (propagation_cnt_ > 0) { propagation_cnt_ = 0; + } return WEBRTC_VIDEO_CODEC_ERROR; } #endif @@ -711,7 +1217,6 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, last_keyframe_._buffer = NULL; last_keyframe_._size = 0; } - uint8_t* temp_buffer = last_keyframe_._buffer; // Save buffer ptr. uint32_t temp_size = last_keyframe_._size; // Save size. last_keyframe_ = input_image; // Shallow copy. @@ -738,10 +1243,9 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, if (feedback_mode_) { // Whenever we receive an incomplete key frame all reference buffers will // be corrupt. If that happens we must request new key frames until we - // decode a complete. + // decode a complete key frame. if (input_image._frameType == kKeyFrame && !input_image._completeFrame) return WEBRTC_VIDEO_CODEC_ERROR; - // Check for reference updates and last reference buffer corruption and // signal successful reference propagation or frame corruption to the // encoder. @@ -749,8 +1253,9 @@ int VP8DecoderImpl::Decode(const EncodedImage& input_image, if (vpx_codec_control(decoder_, VP8D_GET_LAST_REF_UPDATES, &reference_updates)) { // Reset to avoid requesting key frames too often. - if (propagation_cnt_ > 0) + if (propagation_cnt_ > 0) { propagation_cnt_ = 0; + } return WEBRTC_VIDEO_CODEC_ERROR; } int corrupted = 0; @@ -791,7 +1296,7 @@ int VP8DecoderImpl::DecodePartitions( for (int i = 0; i < fragmentation->fragmentationVectorSize; ++i) { const uint8_t* partition = input_image._buffer + fragmentation->fragmentationOffset[i]; - const size_t partition_length = + const uint32_t partition_length = fragmentation->fragmentationLength[i]; if (vpx_codec_decode(decoder_, partition, @@ -809,16 +1314,18 @@ int VP8DecoderImpl::DecodePartitions( } int VP8DecoderImpl::ReturnFrame(const vpx_image_t* img, - uint32_t timestamp, - int64_t ntp_time_ms) { + uint32_t timestamp, + int64_t ntp_time_ms) { if (img == NULL) { // Decoder OK and NULL image => No show frame return WEBRTC_VIDEO_CODEC_NO_OUTPUT; } - int half_height = (img->d_h + 1) / 2; + last_frame_width_ = img->d_w; + last_frame_height_ = img->d_h; + // Allocate memory for decoded image. int size_y = img->stride[VPX_PLANE_Y] * img->d_h; - int size_u = img->stride[VPX_PLANE_U] * half_height; - int size_v = img->stride[VPX_PLANE_V] * half_height; + int size_u = img->stride[VPX_PLANE_U] * (img->d_h + 1) / 2; + int size_v = img->stride[VPX_PLANE_V] * (img->d_h + 1) / 2; // TODO(mikhal): This does a copy - need to SwapBuffers. decoded_image_.CreateFrame(size_y, img->planes[VPX_PLANE_Y], size_u, img->planes[VPX_PLANE_U], @@ -881,7 +1388,7 @@ VideoDecoder* VP8DecoderImpl::Copy() { return NULL; } // Create a new VideoDecoder object - VP8DecoderImpl *copy = new VP8DecoderImpl; + VP8DecoderImpl* copy = new VP8DecoderImpl; // Initialize the new decoder if (copy->InitDecode(&codec_, 1) != WEBRTC_VIDEO_CODEC_OK) { @@ -911,12 +1418,13 @@ VideoDecoder* VP8DecoderImpl::Copy() { if (!ref_frame_) { ref_frame_ = new vpx_ref_frame_t; - - unsigned int align = 16; + // Setting alignment to 32 - as that ensures at least 16 for all + // planes (32 for Y, 16 for U,V) - libvpx sets the requested stride + // for the y plane, but only half of it to the u and v planes. if (!vpx_img_alloc(&ref_frame_->img, static_cast(image_format_), decoded_image_.width(), decoded_image_.height(), - align)) { + kVp832ByteAlign)) { assert(false); delete copy; return NULL; @@ -944,15 +1452,15 @@ VideoDecoder* VP8DecoderImpl::Copy() { return static_cast(copy); } -int VP8DecoderImpl::CopyReference(VP8Decoder* copyTo) { +int VP8DecoderImpl::CopyReference(VP8DecoderImpl* copy) { // The type of frame to copy should be set in ref_frame_->frame_type // before the call to this function. if (vpx_codec_control(decoder_, VP8_COPY_REFERENCE, ref_frame_) != VPX_CODEC_OK) { return -1; } - if (vpx_codec_control(static_cast(copyTo)->decoder_, - VP8_SET_REFERENCE, ref_frame_) != VPX_CODEC_OK) { + if (vpx_codec_control(copy->decoder_, VP8_SET_REFERENCE, ref_frame_) + != VPX_CODEC_OK) { return -1; } return 0; diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.h b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.h index 06f2a26ce7..82b2f24c3b 100644 --- a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.h +++ b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.h @@ -10,24 +10,28 @@ * WEBRTC VP8 wrapper interface */ -#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_IMPL_H_ -#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_IMPL_H_ +#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_IMPL_H_ +#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_IMPL_H_ +#include + +// NOTE: This include order must remain to avoid compile errors, even though +// it breaks the style guide. +#include "vpx/vpx_encoder.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vp8cx.h" +#include "vpx/vp8dx.h" + +#include "webrtc/common_video/interface/i420_video_frame.h" +#include "webrtc/modules/video_coding/codecs/interface/video_codec_interface.h" #include "webrtc/modules/video_coding/codecs/vp8/include/vp8.h" +#include "webrtc/modules/video_coding/codecs/vp8/reference_picture_selection.h" +#include "webrtc/modules/video_coding/utility/include/frame_dropper.h" #include "webrtc/modules/video_coding/utility/quality_scaler.h" -// VPX forward declaration -typedef struct vpx_codec_ctx vpx_codec_ctx_t; -typedef struct vpx_codec_ctx vpx_dec_ctx_t; -typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t; -typedef struct vpx_image vpx_image_t; -typedef struct vpx_ref_frame vpx_ref_frame_t; -struct vpx_codec_cx_pkt; - namespace webrtc { class TemporalLayers; -class ReferencePictureSelection; class VP8EncoderImpl : public VP8Encoder { public: @@ -52,44 +56,64 @@ class VP8EncoderImpl : public VP8Encoder { virtual int SetRates(uint32_t new_bitrate_kbit, uint32_t frame_rate); private: + void SetupTemporalLayers(int num_streams, int num_temporal_layers, + const VideoCodec& codec); + + // Determine number of encoder threads to use. + int NumberOfThreads(int width, int height, int number_of_cores); + // Call encoder initialize function and set control settings. - int InitAndSetControlSettings(const VideoCodec* inst); + int InitAndSetControlSettings(); // Update frame size for codec. int UpdateCodecFrameSize(const I420VideoFrame& input_image); void PopulateCodecSpecific(CodecSpecificInfo* codec_specific, const vpx_codec_cx_pkt& pkt, - uint32_t timestamp); + int stream_idx, + uint32_t timestamp, + bool only_predicting_from_key_frame); - int GetEncodedPartitions(const I420VideoFrame& input_image); + int GetEncodedPartitions(const I420VideoFrame& input_image, + bool only_predicting_from_key_frame); + + // Get the stream bitrate, for the stream |stream_idx|, given the bitrate + // |new_bitrate_kbit|. + int GetStreamBitrate(int stream_idx, + uint32_t new_bitrate_kbit, + bool* send_stream) const; + + // Set the stream state for stream |stream_idx|. + void SetStreamState(bool send_stream, int stream_idx); - // Determine maximum target for Intra frames - // - // Input: - // - optimal_buffer_size : Optimal buffer size - // Return Value : Max target size for Intra frames represented as - // percentage of the per frame bandwidth uint32_t MaxIntraTarget(uint32_t optimal_buffer_size); - EncodedImage encoded_image_; EncodedImageCallback* encoded_complete_callback_; VideoCodec codec_; bool inited_; int64_t timestamp_; - uint16_t picture_id_; bool feedback_mode_; - int cpu_speed_; + int qp_max_; uint32_t rc_max_intra_target_; int token_partitions_; - ReferencePictureSelection* rps_; - TemporalLayers* temporal_layers_; - vpx_codec_ctx_t* encoder_; - vpx_codec_enc_cfg_t* config_; - vpx_image_t* raw_; + ReferencePictureSelection rps_; + std::vector temporal_layers_; + bool down_scale_requested_; + uint32_t down_scale_bitrate_; + FrameDropper tl0_frame_dropper_; + FrameDropper tl1_frame_dropper_; + std::vector picture_id_; + std::vector last_key_frame_picture_id_; + std::vector key_frame_request_; + std::vector send_stream_; + std::vector cpu_speed_; + std::vector raw_images_; + std::vector encoded_images_; + std::vector encoders_; + std::vector configurations_; + std::vector downsampling_factors_; QualityScaler quality_scaler_; -}; // end of VP8Encoder class - +}; // end of VP8EncoderImpl class class VP8DecoderImpl : public VP8Decoder { public: @@ -117,7 +141,7 @@ class VP8DecoderImpl : public VP8Decoder { // Copy reference image from this _decoder to the _decoder in copyTo. Set // which frame type to copy in _refFrame->frame_type before the call to // this function. - int CopyReference(VP8Decoder* copy); + int CopyReference(VP8DecoderImpl* copy); int DecodePartitions(const EncodedImage& input_image, const RTPFragmentationHeader* fragmentation); @@ -130,15 +154,17 @@ class VP8DecoderImpl : public VP8Decoder { DecodedImageCallback* decode_complete_callback_; bool inited_; bool feedback_mode_; - vpx_dec_ctx_t* decoder_; + vpx_codec_ctx_t* decoder_; VideoCodec codec_; EncodedImage last_keyframe_; int image_format_; vpx_ref_frame_t* ref_frame_; int propagation_cnt_; - bool mfqe_enabled_; + int last_frame_width_; + int last_frame_height_; bool key_frame_required_; -}; // end of VP8Decoder class +}; // end of VP8DecoderImpl class } // namespace webrtc -#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_IMPL_H_ +#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP8_VP8_IMPL_H_ +