Vp9 flexible mode fixes

- Enable vp9 flexible mode in VideoEngine if 3 spatial layers are set.
- Enable flexible mode in loopback tools and quality tests.
- Reset first active spatial layer on keyframe in encoder.
- Ensure duplicate references are not set by the sender in video header.
- Set references manually for flexible mode in vp9 encoder.
- Delay new activated layers until next base layer frame.
- On receive side put each spatial layer as a separate frame to FrameBuffer
  and return several frames combined from FrameBuffer.

Bug: webrtc:10049,webrtc:9794,webrtc:9784
Change-Id: I01e69f134cc145deba666ccc92deb1d37a324ede
Reviewed-on: https://webrtc-review.googlesource.com/c/112289
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25895}
This commit is contained in:
Ilya Nikolaevskiy 2018-12-04 15:54:52 +01:00 committed by Commit Bot
parent 77894ccb5d
commit 5546aef682
19 changed files with 694 additions and 198 deletions

View File

@ -79,6 +79,9 @@ class EncodedFrame : public webrtc::VCMEncodedFrame {
size_t num_references = 0;
int64_t references[kMaxFrameReferences];
bool inter_layer_predicted = false;
// Is this subframe the last one in the superframe (In RTP stream that would
// mean that the last packet has a marker bit set).
bool is_last_spatial_layer = true;
};
} // namespace video_coding

View File

@ -68,6 +68,10 @@ class RTC_EXPORT EncodedImage {
}
size_t size() const { return _length; }
void set_size(size_t new_size) {
RTC_DCHECK_LE(new_size, _size);
_length = new_size;
}
size_t capacity() const { return _size; }
void set_buffer(uint8_t* buffer, size_t capacity) {

View File

@ -383,6 +383,9 @@ WebRtcVideoChannel::WebRtcVideoSendStream::ConfigureVideoEncoderSettings(
if (!is_screencast) {
// Limit inter-layer prediction to key pictures.
vp9_settings.interLayerPred = webrtc::InterLayerPredMode::kOnKeyPic;
} else {
// 3 spatial layers vp9 screenshare needs flexible mode.
vp9_settings.flexibleMode = vp9_settings.numberOfSpatialLayers > 2;
}
return new rtc::RefCountedObject<
webrtc::VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings);

View File

@ -124,9 +124,9 @@ TEST(VideoCodecTestLibvpx, ChangeBitrateVP9) {
{500, 30, kNumFramesLong}};
std::vector<RateControlThresholds> rc_thresholds = {
{5, 1, 0, 1, 0.5, 0.1, 0, 1},
{15, 2, 0, 1, 0.5, 0.1, 0, 0},
{10, 1, 0, 1, 0.5, 0.1, 0, 0}};
{5, 2, 0, 1, 0.5, 0.1, 0, 1},
{15, 3, 0, 1, 0.5, 0.1, 0, 0},
{10, 2, 0, 1, 0.5, 0.1, 0, 0}};
std::vector<QualityThresholds> quality_thresholds = {
{34, 33, 0.90, 0.88}, {38, 35, 0.95, 0.91}, {35, 34, 0.93, 0.90}};

View File

@ -23,9 +23,9 @@ namespace webrtc {
namespace {
const size_t kMinVp9SvcBitrateKbps = 30;
const size_t kMaxNumLayersForScreenSharing = 2;
const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0};
const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500};
const size_t kMaxNumLayersForScreenSharing = 3;
const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0, 30.0};
const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500, 1250};
} // namespace
std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,

View File

@ -48,12 +48,13 @@ TEST(SvcConfig, ScreenSharing) {
std::vector<SpatialLayer> spatial_layers =
GetSvcConfig(1920, 1080, 30, 3, 3, true);
EXPECT_EQ(spatial_layers.size(), 2UL);
EXPECT_EQ(spatial_layers.size(), 3UL);
for (const SpatialLayer& layer : spatial_layers) {
for (size_t i = 0; i < 3; ++i) {
const SpatialLayer& layer = spatial_layers[i];
EXPECT_EQ(layer.width, 1920);
EXPECT_EQ(layer.height, 1080);
EXPECT_EQ(layer.maxFramerate, 5);
EXPECT_EQ(layer.maxFramerate, (i < 2) ? 5 : 30);
EXPECT_EQ(layer.numberOfTemporalLayers, 1);
EXPECT_LE(layer.minBitrate, layer.maxBitrate);
EXPECT_LE(layer.minBitrate, layer.targetBitrate);

View File

@ -151,7 +151,7 @@ TEST(SvcRateAllocatorTest, MinBitrateToGetQualityLayer) {
const SpatialLayer* layers = codec.spatialLayers;
EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 2U);
EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U);
VideoBitrateAllocation allocation =
allocator.GetAllocation(layers[0].minBitrate * 1000, 30);

View File

@ -91,15 +91,16 @@ class TestVp9Impl : public VideoCodecUnitTest {
}
}
void ConfigureSvc(size_t num_spatial_layers) {
void ConfigureSvc(size_t num_spatial_layers, size_t num_temporal_layers = 1) {
codec_settings_.VP9()->numberOfSpatialLayers =
static_cast<unsigned char>(num_spatial_layers);
codec_settings_.VP9()->numberOfTemporalLayers = 1;
codec_settings_.VP9()->numberOfTemporalLayers = num_temporal_layers;
codec_settings_.VP9()->frameDroppingOn = false;
std::vector<SpatialLayer> layers = GetSvcConfig(
codec_settings_.width, codec_settings_.height,
codec_settings_.maxFramerate, num_spatial_layers, 1, false);
std::vector<SpatialLayer> layers =
GetSvcConfig(codec_settings_.width, codec_settings_.height,
codec_settings_.maxFramerate, num_spatial_layers,
num_temporal_layers, false);
for (size_t i = 0; i < layers.size(); ++i) {
codec_settings_.spatialLayers[i] = layers[i];
}
@ -401,6 +402,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
std::vector<EncodedImage> encoded_frame;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
frame_num == 0);
}
}
@ -418,6 +421,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
std::vector<EncodedImage> encoded_frame;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
frame_num == 0);
}
}
}
@ -581,6 +586,248 @@ TEST_F(TestVp9Impl,
}
}
TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
const size_t num_spatial_layers = 3;
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
// framerate.
const size_t num_frames_to_encode_before_drop = 1;
// Chosen by hand, exactly 5 frames are dropped for input fps=30 and max
// framerate = 5.
const size_t num_dropped_frames = 5;
codec_settings_.maxFramerate = 30;
ConfigureSvc(num_spatial_layers);
codec_settings_.spatialLayers[0].maxFramerate = 5.0;
// use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
// framerate capping we would still get back at least a middle layer. It
// simplifies the test.
codec_settings_.spatialLayers[1].maxFramerate = 30.0;
codec_settings_.spatialLayers[2].maxFramerate = 30.0;
codec_settings_.VP9()->frameDroppingOn = false;
codec_settings_.mode = VideoCodecMode::kScreensharing;
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
codec_settings_.VP9()->flexibleMode = true;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
// Enable all but the last layer.
VideoBitrateAllocation bitrate_allocation;
for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
bitrate_allocation.SetBitrate(
sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
}
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
// Encode enough frames to force drop due to framerate capping.
for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
++frame_num) {
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
}
// Enable the last layer.
bitrate_allocation.SetBitrate(
num_spatial_layers - 1, 0,
codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate *
1000);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
for (size_t frame_num = 0; frame_num < num_dropped_frames; ++frame_num) {
SetWaitForEncodedFramesThreshold(1);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
// First layer is dropped due to frame rate cap. The last layer should not
// be enabled yet.
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
}
SetWaitForEncodedFramesThreshold(2);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
// Now all 3 layers should be encoded.
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
EXPECT_EQ(encoded_frames.size(), 3u);
// Scalability structure has to be triggered.
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
}
TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) {
const size_t num_spatial_layers = 3;
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
// framerate.
const size_t num_frames_to_encode_before_drop = 1;
// Chosen by hand, exactly 5 frames are dropped for input fps=30 and max
// framerate = 5.
const size_t num_dropped_frames = 5;
codec_settings_.maxFramerate = 30;
ConfigureSvc(num_spatial_layers);
codec_settings_.spatialLayers[0].maxFramerate = 5.0;
// use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
// framerate capping we would still get back at least a middle layer. It
// simplifies the test.
codec_settings_.spatialLayers[1].maxFramerate = 30.0;
codec_settings_.spatialLayers[2].maxFramerate = 30.0;
codec_settings_.VP9()->frameDroppingOn = false;
codec_settings_.mode = VideoCodecMode::kScreensharing;
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
codec_settings_.VP9()->flexibleMode = true;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
// All layers are enabled from the start.
VideoBitrateAllocation bitrate_allocation;
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
bitrate_allocation.SetBitrate(
sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
}
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
// Encode enough frames to force drop due to framerate capping.
for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
++frame_num) {
SetWaitForEncodedFramesThreshold(num_spatial_layers);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
}
// Now the first layer should not have frames in it.
for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) {
SetWaitForEncodedFramesThreshold(2);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
// First layer is dropped due to frame rate cap. The last layer should not
// be enabled yet.
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
// First layer is skipped.
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
}
// Disable the last layer.
bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
// Still expected to drop first layer. Last layer has to be disable also.
for (size_t frame_num = num_dropped_frames - 2;
frame_num < num_dropped_frames; ++frame_num) {
// Expect back one frame.
SetWaitForEncodedFramesThreshold(1);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
// First layer is dropped due to frame rate cap. The last layer should not
// be enabled yet.
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
// First layer is skipped.
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
// No SS data on non-base spatial layer.
EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
}
SetWaitForEncodedFramesThreshold(2);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
// First layer is not skipped now.
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0);
// SS data should be present.
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
}
TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) {
const size_t num_spatial_layers = 3;
const size_t num_temporal_layers = 2;
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
// framerate.
ConfigureSvc(num_spatial_layers, num_temporal_layers);
codec_settings_.VP9()->frameDroppingOn = false;
codec_settings_.mode = VideoCodecMode::kRealtimeVideo;
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
codec_settings_.VP9()->flexibleMode = false;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
// Enable all the layers.
VideoBitrateAllocation bitrate_allocation;
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
bitrate_allocation.SetBitrate(
sl_idx, tl_idx,
codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 /
num_temporal_layers);
}
}
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_specific_info;
// Encode one TL0 frame
SetWaitForEncodedFramesThreshold(num_spatial_layers);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
// Disable the last layer.
for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0);
}
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
// Next is TL1 frame. The last layer is disabled immediately, but SS structure
// is not provided here.
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u);
// Next is TL0 frame, which should have delayed SS structure.
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
EXPECT_TRUE(codec_specific_info[0]
.codecSpecific.VP9.spatial_layer_resolution_present);
EXPECT_EQ(
codec_specific_info[0].codecSpecific.VP9.width[num_spatial_layers - 1],
0u);
}
TEST_F(TestVp9Impl,
LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) {
ConfigureSvc(3);
@ -766,6 +1013,7 @@ TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) {
codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
codec_settings_.VP9()->frameDroppingOn = false;
codec_settings_.VP9()->flexibleMode = true;
VideoBitrateAllocation bitrate_allocation;
for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {

View File

@ -49,6 +49,9 @@ uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
int kMaxNumTiles4kVideo = 8;
// Maximum allowed PID difference for variable frame-rate mode.
const int kMaxAllowedPidDIff = 8;
// Only positive speeds, range for real-time coding currently is: 5 - 8.
// Lower means slower/better quality, higher means fastest/lower quality.
int GetCpuSpeed(int width, int height) {
@ -124,6 +127,18 @@ ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
}
return ColorSpace(primaries, transfer, matrix, range);
}
bool MoreLayersEnabled(const VideoBitrateAllocation& first,
const VideoBitrateAllocation& second) {
for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
if (first.GetSpatialLayerSum(sl_idx) > 0 &&
second.GetSpatialLayerSum(sl_idx) == 0) {
return true;
}
}
return false;
}
} // namespace
void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
@ -154,12 +169,12 @@ VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec)
field_trial::IsEnabled("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation")),
is_svc_(false),
inter_layer_pred_(InterLayerPredMode::kOn),
external_ref_control_(
field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl")),
external_ref_control_(false), // Set in InitEncode because of tests.
trusted_rate_controller_(
field_trial::IsEnabled(kVp9TrustedRateControllerFieldTrial)),
full_superframe_drop_(true),
first_frame_in_picture_(true),
ss_info_needed_(false),
is_flexible_mode_(false) {
memset(&codec_, 0, sizeof(codec_));
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
@ -314,14 +329,8 @@ int VP9EncoderImpl::SetRateAllocation(
codec_.maxFramerate = frame_rate;
if (!SetSvcRates(bitrate_allocation)) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
requested_bitrate_allocation_ = bitrate_allocation;
// Update encoder context
if (vpx_codec_enc_config_set(encoder_, config_)) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
return WEBRTC_VIDEO_CODEC_OK;
}
@ -461,6 +470,27 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
is_flexible_mode_ = inst->VP9().flexibleMode;
inter_layer_pred_ = inst->VP9().interLayerPred;
different_framerates_used_ = false;
for (size_t sl_idx = 1; sl_idx < num_spatial_layers_; ++sl_idx) {
if (std::abs(codec_.spatialLayers[sl_idx].maxFramerate -
codec_.spatialLayers[0].maxFramerate) > 1e-9) {
different_framerates_used_ = true;
}
}
if (different_framerates_used_ && !is_flexible_mode_) {
RTC_LOG(LS_ERROR) << "Flexible mode required for different framerates on "
"different spatial layers";
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
// External reference control is required for different frame rate on spatial
// layers because libvpx generates rtp incompatible references in this case.
external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") ||
different_framerates_used_;
if (num_temporal_layers_ == 1) {
gof_.SetGofInfoVP9(kTemporalStructureMode1);
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
@ -493,8 +523,14 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
if (external_ref_control_) {
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
if (num_temporal_layers_ > 1 && different_framerates_used_) {
// External reference control for several temporal layers with different
// frame rates on spatial layers is not implemented yet.
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
}
ref_buf_.clear();
return InitAndSetControlSettings(inst);
@ -575,9 +611,9 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
}
SvcRateAllocator init_allocator(codec_);
VideoBitrateAllocation allocation = init_allocator.GetAllocation(
current_bitrate_allocation_ = init_allocator.GetAllocation(
inst->startBitrate * 1000, inst->maxFramerate);
if (!SetSvcRates(allocation)) {
if (!SetSvcRates(current_bitrate_allocation_)) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
@ -595,6 +631,7 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
inst->VP9().adaptiveQpMode ? 3 : 0);
vpx_codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
vpx_codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
if (is_svc_) {
vpx_codec_control(encoder_, VP9E_SET_SVC, 1);
@ -696,21 +733,21 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
}
}
if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) {
// Skip encoding spatial layer frames if their target frame rate is lower
// than actual input frame rate.
vpx_svc_layer_id_t layer_id = {0};
vpx_svc_layer_id_t layer_id = {0};
if (!force_key_frame_) {
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
const uint32_t frame_timestamp_ms =
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
if (VideoCodecMode::kScreensharing == codec_.mode) {
const uint32_t frame_timestamp_ms =
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
++layer_id.spatial_layer_id;
} else {
break;
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
++layer_id.spatial_layer_id;
} else {
break;
}
}
}
@ -719,8 +756,42 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
// Drop entire picture.
return WEBRTC_VIDEO_CODEC_OK;
}
}
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
for (int sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
}
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
if (requested_bitrate_allocation_) {
bool more_layers_requested = MoreLayersEnabled(
*requested_bitrate_allocation_, current_bitrate_allocation_);
bool less_layers_requested = MoreLayersEnabled(
current_bitrate_allocation_, *requested_bitrate_allocation_);
// In SVC can enable new layers only if all lower layers are encoded and at
// the base temporal layer.
// This will delay rate allocation change until the next frame on the base
// spatial layer.
// In KSVC or simulcast modes KF will be generated for a new layer, so can
// update allocation any time.
bool can_upswitch =
inter_layer_pred_ != InterLayerPredMode::kOn ||
(layer_id.spatial_layer_id == 0 && layer_id.temporal_layer_id == 0);
if (!more_layers_requested || can_upswitch) {
current_bitrate_allocation_ = *requested_bitrate_allocation_;
requested_bitrate_allocation_ = absl::nullopt;
if (!SetSvcRates(current_bitrate_allocation_)) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (less_layers_requested || more_layers_requested) {
ss_info_needed_ = true;
}
}
}
if (vpx_codec_enc_config_set(encoder_, config_)) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
@ -780,7 +851,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
}
if (external_ref_control_) {
vpx_svc_ref_frame_config_t ref_config = SetReferences(force_key_frame_);
vpx_svc_ref_frame_config_t ref_config =
SetReferences(force_key_frame_, layer_id.spatial_layer_id);
if (VideoCodecMode::kScreensharing == codec_.mode) {
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
@ -840,9 +912,22 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
vp9_info->ss_data_available =
(pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
pics_since_key_ = 0;
} else if (first_frame_in_picture_) {
++pics_since_key_;
}
vpx_svc_layer_id_t layer_id = {0};
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
if (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
layer_id.spatial_layer_id == 0) {
// Force SS info after the layers configuration has changed.
vp9_info->ss_data_available = true;
ss_info_needed_ = false;
}
RTC_CHECK_GT(num_temporal_layers_, 0);
RTC_CHECK_GT(num_active_spatial_layers_, 0);
if (num_temporal_layers_ == 1) {
@ -864,12 +949,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
// TODO(asapersson): this info has to be obtained from the encoder.
vp9_info->temporal_up_switch = false;
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
pics_since_key_ = 0;
} else if (first_frame_in_picture_) {
++pics_since_key_;
}
const bool is_key_pic = (pics_since_key_ == 0);
const bool is_inter_layer_pred_allowed =
(inter_layer_pred_ == InterLayerPredMode::kOn ||
@ -901,8 +980,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
vp9_info->gof_idx = kNoGofIdx;
FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
vp9_info);
// TODO(webrtc:9794): Add fake reference to empty reference list to
// workaround the frame buffer issue on receiver.
} else {
vp9_info->gof_idx =
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
@ -985,6 +1062,8 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
size_t max_ref_temporal_layer_id = 0;
std::vector<size_t> ref_pid_list;
vp9_info->num_ref_pics = 0;
for (const RefFrameBuffer& ref_buf : ref_buf_list) {
RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
@ -997,6 +1076,16 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
}
RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
// Encoder may reference several spatial layers on the same previous
// frame in case if some spatial layers are skipped on the current frame.
// We shouldn't put duplicate references as it may break some old
// clients and isn't RTP compatible.
if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
ref_buf.pic_num) != ref_pid_list.end()) {
continue;
}
ref_pid_list.push_back(ref_buf.pic_num);
const size_t p_diff = pic_num - ref_buf.pic_num;
RTC_DCHECK_LE(p_diff, 127UL);
@ -1038,20 +1127,13 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
if (enc_layer_conf.update_last[layer_id.spatial_layer_id]) {
ref_buf_[enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]] =
frame_buf;
for (size_t i = 0; i < kNumVp9Buffers; ++i) {
if (enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id] &
(1 << i)) {
ref_buf_[i] = frame_buf;
}
}
if (enc_layer_conf.update_alt_ref[layer_id.spatial_layer_id]) {
ref_buf_[enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]] =
frame_buf;
}
if (enc_layer_conf.update_golden[layer_id.spatial_layer_id]) {
ref_buf_[enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]] =
frame_buf;
}
} else {
RTC_DCHECK_EQ(num_spatial_layers_, 1);
RTC_DCHECK_EQ(num_temporal_layers_, 1);
@ -1061,7 +1143,9 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
}
}
vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(
bool is_key_pic,
size_t first_active_spatial_layer_id) {
// kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
@ -1083,8 +1167,10 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
// for temporal references plus 1 buffer for spatial reference. 7 buffers
// in total.
for (size_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
const size_t gof_idx = pics_since_key_ % gof_.num_frames_in_gof;
for (size_t sl_idx = first_active_spatial_layer_id;
sl_idx < num_active_spatial_layers_; ++sl_idx) {
const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
if (!is_key_pic) {
// Set up temporal reference.
@ -1096,36 +1182,47 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
// Sanity check that reference picture number is smaller than current
// picture number.
const size_t curr_pic_num = pics_since_key_ + 1;
RTC_DCHECK_LT(ref_buf_[buf_idx].pic_num, curr_pic_num);
const size_t pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
// Incorrect spatial layer may be in the buffer due to a key-frame.
const bool same_spatial_layer =
ref_buf_[buf_idx].spatial_layer_id == sl_idx;
bool correct_pid = false;
if (different_framerates_used_) {
correct_pid = pid_diff < kMaxAllowedPidDIff;
} else {
// Below code assumes single temporal referecence.
RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
}
// Below code assumes single temporal referecence.
RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
if (pid_diff == gof_.pid_diff[gof_idx][0]) {
if (same_spatial_layer && correct_pid) {
ref_config.lst_fb_idx[sl_idx] = buf_idx;
ref_config.reference_last[sl_idx] = 1;
} else {
// This reference doesn't match with one specified by GOF. This can
// only happen if spatial layer is enabled dynamically without key
// frame. Spatial prediction is supposed to be enabled in this case.
RTC_DCHECK(is_inter_layer_pred_allowed);
RTC_DCHECK(is_inter_layer_pred_allowed &&
sl_idx > first_active_spatial_layer_id);
}
}
if (is_inter_layer_pred_allowed && sl_idx > 0) {
if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
// Set up spatial reference.
RTC_DCHECK(last_updated_buf_idx);
ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
ref_config.reference_golden[sl_idx] = 1;
} else {
RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || sl_idx == 0 ||
RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
sl_idx == first_active_spatial_layer_id ||
inter_layer_pred_ == InterLayerPredMode::kOff);
}
last_updated_buf_idx.reset();
if (gof_.temporal_idx[gof_idx] <= num_temporal_layers_ - 1) {
if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
num_temporal_layers_ == 1) {
last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
// Ensure last frame buffer is not used for temporal prediction (it is

View File

@ -70,7 +70,9 @@ class VP9EncoderImpl : public VP9Encoder {
CodecSpecificInfoVP9* vp9_info);
void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
const size_t pic_num);
vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic);
vpx_svc_ref_frame_config_t SetReferences(
bool is_key_pic,
size_t first_active_spatial_layer_id);
bool ExplicitlyConfiguredSpatialLayers() const;
bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation);
@ -110,6 +112,7 @@ class VP9EncoderImpl : public VP9Encoder {
GofInfoVP9 gof_; // Contains each frame's temporal information for
// non-flexible mode.
bool force_key_frame_;
bool different_framerates_used_;
size_t pics_since_key_;
uint8_t num_temporal_layers_;
uint8_t num_spatial_layers_; // Number of configured SLs
@ -121,6 +124,9 @@ class VP9EncoderImpl : public VP9Encoder {
const bool trusted_rate_controller_;
const bool full_superframe_drop_;
bool first_frame_in_picture_;
VideoBitrateAllocation current_bitrate_allocation_;
absl::optional<VideoBitrateAllocation> requested_bitrate_allocation_;
bool ss_info_needed_;
std::vector<FramerateController> framerate_controller_;

View File

@ -67,9 +67,11 @@ class VCMEncodedFrame : protected EncodedImage {
/**
* Frame RTP timestamp (90kHz)
*/
using EncodedImage::Timestamp;
using EncodedImage::set_size;
using EncodedImage::SetTimestamp;
using EncodedImage::size;
using EncodedImage::Timestamp;
/**
* Get render time in milliseconds
*/
@ -90,6 +92,7 @@ class VCMEncodedFrame : protected EncodedImage {
* Get video timing
*/
EncodedImage::Timing video_timing() const { return timing_; }
EncodedImage::Timing* video_timing_mutable() { return &timing_; }
/**
* True if this frame is complete, false otherwise
*/
@ -109,8 +112,10 @@ class VCMEncodedFrame : protected EncodedImage {
* the object.
*/
const CodecSpecificInfo* CodecSpecific() const { return &_codecSpecificInfo; }
void SetCodecSpecific(const CodecSpecificInfo* codec_specific) {
_codecSpecificInfo = *codec_specific;
}
protected:
/**
* Verifies that current allocated buffer size is larger than or equal to the
* input size.
@ -121,6 +126,7 @@ class VCMEncodedFrame : protected EncodedImage {
*/
void VerifyAndAllocate(size_t minimumSize);
protected:
void Reset();
void CopyCodecSpecific(const RTPVideoHeader* header);

View File

@ -87,10 +87,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
wait_ms = max_wait_time_ms;
// Need to hold |crit_| in order to use |frames_|, therefore we
// Need to hold |crit_| in order to access frames_to_decode_. therefore we
// set it here in the loop instead of outside the loop in order to not
// acquire the lock unnecesserily.
next_frame_it_ = frames_.end();
// acquire the lock unnecessarily.
frames_to_decode_.clear();
// |frame_it| points to the first frame after the
// |last_decoded_frame_it_|.
@ -128,7 +128,53 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
continue;
}
next_frame_it_ = frame_it;
// Only ever return all parts of a superframe. Therefore skip this
// frame if it's not a beginning of a superframe.
if (frame->inter_layer_predicted) {
continue;
}
// Gather all remaining frames for the same superframe.
std::vector<FrameMap::iterator> current_superframe;
current_superframe.push_back(frame_it);
bool last_layer_completed =
frame_it->second.frame->is_last_spatial_layer;
FrameMap::iterator next_frame_it = frame_it;
while (true) {
++next_frame_it;
if (next_frame_it == frames_.end() ||
next_frame_it->first.picture_id != frame->id.picture_id ||
!next_frame_it->second.continuous) {
break;
}
// Check if the next frame has some undecoded references other than
// the previous frame in the same superframe.
size_t num_allowed_undecoded_refs =
(next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
if (next_frame_it->second.num_missing_decodable >
num_allowed_undecoded_refs) {
break;
}
// All frames in the superframe should have the same timestamp.
if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
RTC_LOG(LS_WARNING)
<< "Frames in a single superframe have different"
" timestamps. Skipping undecodable superframe.";
break;
}
current_superframe.push_back(next_frame_it);
last_layer_completed =
next_frame_it->second.frame->is_last_spatial_layer;
}
// Check if the current superframe is complete.
// TODO(bugs.webrtc.org/10064): consider returning all available to
// decode frames even if the superframe is not complete yet.
if (!last_layer_completed) {
continue;
}
frames_to_decode_ = std::move(current_superframe);
if (frame->RenderTime() == -1) {
frame->SetRenderTime(
timing_->RenderTimeMs(frame->Timestamp(), now_ms));
@ -154,9 +200,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
{
rtc::CritScope lock(&crit_);
now_ms = clock_->TimeInMilliseconds();
if (next_frame_it_ != frames_.end()) {
std::unique_ptr<EncodedFrame> frame =
std::move(next_frame_it_->second.frame);
std::vector<EncodedFrame*> frames_out;
for (const FrameMap::iterator& frame_it : frames_to_decode_) {
RTC_DCHECK(frame_it != frames_.end());
EncodedFrame* frame = frame_it->second.frame.release();
if (!frame->delayed_by_retransmission()) {
int64_t frame_delay;
@ -187,14 +234,22 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
UpdateJitterDelay();
UpdateTimingFrameInfo();
PropagateDecodability(next_frame_it_->second);
PropagateDecodability(frame_it->second);
AdvanceLastDecodedFrame(next_frame_it_);
AdvanceLastDecodedFrame(frame_it);
last_decoded_frame_timestamp_ = frame->Timestamp();
*frame_out = std::move(frame);
frames_out.push_back(frame);
}
if (!frames_out.empty()) {
if (frames_out.size() == 1) {
frame_out->reset(frames_out[0]);
} else {
frame_out->reset(CombineAndDeleteFrames(frames_out));
}
return kFrameFound;
}
}
} // rtc::Critscope lock(&crit_)
if (latest_return_time_ms - now_ms > 0) {
// If |next_frame_it_ == frames_.end()| and there is still time left, it
@ -203,7 +258,6 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
// remaining time and then return.
return NextFrame(latest_return_time_ms - now_ms, frame_out);
}
return kTimeout;
}
@ -606,11 +660,38 @@ void FrameBuffer::ClearFramesAndHistory() {
frames_.clear();
last_decoded_frame_it_ = frames_.end();
last_continuous_frame_it_ = frames_.end();
next_frame_it_ = frames_.end();
frames_to_decode_.clear();
num_frames_history_ = 0;
num_frames_buffered_ = 0;
}
EncodedFrame* FrameBuffer::CombineAndDeleteFrames(
const std::vector<EncodedFrame*>& frames) const {
RTC_DCHECK(!frames.empty());
EncodedFrame* frame = frames[0];
size_t total_length = 0;
for (size_t i = 0; i < frames.size(); ++i) {
total_length += frames[i]->size();
}
frame->VerifyAndAllocate(total_length);
uint8_t* buffer = frame->MutableBuffer();
// Append all remaining frames to the first one.
size_t used_buffer_bytes = frame->size();
for (size_t i = 1; i < frames.size(); ++i) {
EncodedFrame* frame_to_append = frames[i];
memcpy(buffer + used_buffer_bytes, frame_to_append->Buffer(),
frame_to_append->size());
used_buffer_bytes += frame_to_append->size();
frame->video_timing_mutable()->network2_timestamp_ms =
frame_to_append->video_timing().network2_timestamp_ms;
frame->video_timing_mutable()->receive_finish_ms =
frame_to_append->video_timing().receive_finish_ms;
delete frame_to_append;
}
frame->set_size(total_length);
return frame;
}
FrameBuffer::FrameInfo::FrameInfo() = default;
FrameBuffer::FrameInfo::FrameInfo(FrameInfo&&) = default;
FrameBuffer::FrameInfo::~FrameInfo() = default;

View File

@ -15,6 +15,7 @@
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "api/video/encoded_frame.h"
#include "modules/video_coding/include/video_coding_defines.h"
@ -156,6 +157,13 @@ class FrameBuffer {
bool HasBadRenderTiming(const EncodedFrame& frame, int64_t now_ms)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
// The cleaner solution would be to have the NextFrame function return a
// vector of frames, but until the decoding pipeline can support decoding
// multiple frames at the same time we combine all frames to one frame and
// return it. See bugs.webrtc.org/10064
EncodedFrame* CombineAndDeleteFrames(
const std::vector<EncodedFrame*>& frames) const;
FrameMap frames_ RTC_GUARDED_BY(crit_);
rtc::CriticalSection crit_;
@ -167,7 +175,7 @@ class FrameBuffer {
absl::optional<uint32_t> last_decoded_frame_timestamp_ RTC_GUARDED_BY(crit_);
FrameMap::iterator last_decoded_frame_it_ RTC_GUARDED_BY(crit_);
FrameMap::iterator last_continuous_frame_it_ RTC_GUARDED_BY(crit_);
FrameMap::iterator next_frame_it_ RTC_GUARDED_BY(crit_);
std::vector<FrameMap::iterator> frames_to_decode_ RTC_GUARDED_BY(crit_);
int num_frames_history_ RTC_GUARDED_BY(crit_);
int num_frames_buffered_ RTC_GUARDED_BY(crit_);
bool stopped_ RTC_GUARDED_BY(crit_);

View File

@ -124,6 +124,7 @@ class TestFrameBuffer2 : public ::testing::Test {
static constexpr int kFps1 = 1000;
static constexpr int kFps10 = kFps1 / 10;
static constexpr int kFps20 = kFps1 / 20;
static constexpr size_t kFrameSize = 10;
TestFrameBuffer2()
: clock_(0),
@ -150,6 +151,7 @@ class TestFrameBuffer2 : public ::testing::Test {
uint8_t spatial_layer,
int64_t ts_ms,
bool inter_layer_predicted,
bool last_spatial_layer,
T... refs) {
static_assert(sizeof...(refs) <= kMaxReferences,
"To many references specified for EncodedFrame.");
@ -162,6 +164,10 @@ class TestFrameBuffer2 : public ::testing::Test {
frame->SetTimestamp(ts_ms * 90);
frame->num_references = references.size();
frame->inter_layer_predicted = inter_layer_predicted;
frame->is_last_spatial_layer = last_spatial_layer;
// Add some data to buffer.
frame->VerifyAndAllocate(kFrameSize);
frame->SetSize(kFrameSize);
for (size_t r = 0; r < references.size(); ++r)
frame->references[r] = references[r];
@ -194,6 +200,13 @@ class TestFrameBuffer2 : public ::testing::Test {
ASSERT_EQ(spatial_layer, frames_[index]->id.spatial_layer);
}
void CheckFrameSize(size_t index, size_t size) {
rtc::CritScope lock(&crit_);
ASSERT_LT(index, frames_.size());
ASSERT_TRUE(frames_[index]);
ASSERT_EQ(frames_[index]->size(), size);
}
void CheckNoFrame(size_t index) {
rtc::CritScope lock(&crit_);
ASSERT_LT(index, frames_.size());
@ -246,7 +259,7 @@ TEST_F(TestFrameBuffer2, WaitForFrame) {
uint32_t ts = Rand();
ExtractFrame(50);
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 0, ts, false, true);
CheckFrame(0, pid, 0);
}
@ -254,13 +267,11 @@ TEST_F(TestFrameBuffer2, OneSuperFrame) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
ExtractFrame();
InsertFrame(pid, 1, ts, true);
InsertFrame(pid, 0, ts, false, false);
InsertFrame(pid, 1, ts, true, true);
ExtractFrame();
CheckFrame(0, pid, 0);
CheckFrame(1, pid, 1);
}
TEST_F(TestFrameBuffer2, SetPlayoutDelay) {
@ -293,8 +304,8 @@ TEST_F(TestFrameBuffer2, DISABLED_OneUnorderedSuperFrame) {
uint32_t ts = Rand();
ExtractFrame(50);
InsertFrame(pid, 1, ts, true);
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 1, ts, true, true);
InsertFrame(pid, 0, ts, false, false);
ExtractFrame();
CheckFrame(0, pid, 0);
@ -305,14 +316,14 @@ TEST_F(TestFrameBuffer2, DISABLED_OneLayerStreamReordered) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 0, ts, false, true);
ExtractFrame();
CheckFrame(0, pid, 0);
for (int i = 1; i < 10; i += 2) {
ExtractFrame(50);
InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, pid + i);
InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, true, pid + i);
clock_.AdvanceTimeMilliseconds(kFps10);
InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1);
InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1);
clock_.AdvanceTimeMilliseconds(kFps10);
ExtractFrame();
CheckFrame(i, pid + i, 0);
@ -330,9 +341,9 @@ TEST_F(TestFrameBuffer2, MissingFrame) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid + 2, 0, ts, false, pid);
InsertFrame(pid + 3, 0, ts, false, pid + 1, pid + 2);
InsertFrame(pid, 0, ts, false, true);
InsertFrame(pid + 2, 0, ts, false, true, pid);
InsertFrame(pid + 3, 0, ts, false, true, pid + 1, pid + 2);
ExtractFrame();
ExtractFrame();
ExtractFrame();
@ -346,11 +357,11 @@ TEST_F(TestFrameBuffer2, OneLayerStream) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 0, ts, false, true);
ExtractFrame();
CheckFrame(0, pid, 0);
for (int i = 1; i < 10; ++i) {
InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1);
InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1);
ExtractFrame();
clock_.AdvanceTimeMilliseconds(kFps10);
CheckFrame(i, pid + i, 0);
@ -361,12 +372,13 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid + 1, 0, ts + kFps20, false, pid);
InsertFrame(pid, 0, ts, false, true);
InsertFrame(pid + 1, 0, ts + kFps20, false, true, pid);
for (int i = 2; i < 10; i += 2) {
uint32_t ts_tl0 = ts + i / 2 * kFps10;
InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 2);
InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, pid + i, pid + i - 1);
InsertFrame(pid + i, 0, ts_tl0, false, true, pid + i - 2);
InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, true, pid + i,
pid + i - 1);
}
for (int i = 0; i < 10; ++i) {
@ -386,49 +398,15 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) {
CheckNoFrame(9);
}
TEST_F(TestFrameBuffer2, DropSpatialLayerSlowDecoder) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 1, ts, false);
for (int i = 1; i < 6; ++i) {
uint32_t ts_tl0 = ts + i * kFps10;
InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 1);
InsertFrame(pid + i, 1, ts_tl0, false, pid + i - 1);
}
ExtractFrame();
ExtractFrame();
clock_.AdvanceTimeMilliseconds(57);
for (int i = 2; i < 12; ++i) {
ExtractFrame();
clock_.AdvanceTimeMilliseconds(57);
}
CheckFrame(0, pid, 0);
CheckFrame(1, pid, 1);
CheckFrame(2, pid + 1, 0);
CheckFrame(3, pid + 1, 1);
CheckFrame(4, pid + 2, 0);
CheckFrame(5, pid + 2, 1);
CheckFrame(6, pid + 3, 0);
CheckFrame(7, pid + 4, 0);
CheckFrame(8, pid + 5, 0);
CheckNoFrame(9);
CheckNoFrame(10);
CheckNoFrame(11);
}
TEST_F(TestFrameBuffer2, InsertLateFrame) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 0, ts, false, true);
ExtractFrame();
InsertFrame(pid + 2, 0, ts, false);
InsertFrame(pid + 2, 0, ts, false, true);
ExtractFrame();
InsertFrame(pid + 1, 0, ts, false, pid);
InsertFrame(pid + 1, 0, ts, false, true, pid);
ExtractFrame();
CheckFrame(0, pid, 0);
@ -441,12 +419,12 @@ TEST_F(TestFrameBuffer2, ProtectionMode) {
uint32_t ts = Rand();
EXPECT_CALL(jitter_estimator_, GetJitterEstimate(1.0));
InsertFrame(pid, 0, ts, false);
InsertFrame(pid, 0, ts, false, true);
ExtractFrame();
buffer_->SetProtectionMode(kProtectionNackFEC);
EXPECT_CALL(jitter_estimator_, GetJitterEstimate(0.0));
InsertFrame(pid + 1, 0, ts, false);
InsertFrame(pid + 1, 0, ts, false, true);
ExtractFrame();
}
@ -454,45 +432,45 @@ TEST_F(TestFrameBuffer2, NoContinuousFrame) {
uint16_t pid = Rand();
uint32_t ts = Rand();
EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, pid));
EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, true, pid));
}
TEST_F(TestFrameBuffer2, LastContinuousFrameSingleLayer) {
uint16_t pid = Rand();
uint32_t ts = Rand();
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1));
EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, pid));
EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, pid + 3));
EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false));
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true));
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, true, pid + 1));
EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, true, pid));
EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, true, pid + 3));
EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false, true));
}
TEST_F(TestFrameBuffer2, LastContinuousFrameTwoLayers) {
uint16_t pid = Rand();
uint32_t ts = Rand();
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true));
EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, pid));
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1));
EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, pid + 1));
EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, pid + 2));
EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, pid));
EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, pid + 2));
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, false));
EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true, true));
EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, true, pid));
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, false, pid + 1));
EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, true, pid + 1));
EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, false, pid + 2));
EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, false, pid));
EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, true, pid + 2));
}
TEST_F(TestFrameBuffer2, PictureIdJumpBack) {
uint16_t pid = Rand();
uint32_t ts = Rand();
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, pid));
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true));
EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, true, pid));
ExtractFrame();
CheckFrame(0, pid, 0);
// Jump back in pid but increase ts.
EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false));
EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false, true));
ExtractFrame();
ExtractFrame();
CheckFrame(1, pid - 1, 0);
@ -511,6 +489,7 @@ TEST_F(TestFrameBuffer2, StatsCallback) {
{
std::unique_ptr<FrameObjectFake> frame(new FrameObjectFake());
frame->VerifyAndAllocate(kFrameSize);
frame->SetSize(kFrameSize);
frame->id.picture_id = pid;
frame->id.spatial_layer = 0;
@ -526,42 +505,42 @@ TEST_F(TestFrameBuffer2, StatsCallback) {
}
TEST_F(TestFrameBuffer2, ForwardJumps) {
EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false));
EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false, true));
ExtractFrame();
EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, 5453));
EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, true, 5453));
ExtractFrame();
EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false));
EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false, true));
ExtractFrame();
EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false));
EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false, true));
ExtractFrame();
EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, 29804));
EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, true, 29804));
ExtractFrame();
EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, 29805));
EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, true, 29805));
ExtractFrame();
EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false));
EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false, true));
ExtractFrame();
EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false));
EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false, true));
ExtractFrame();
}
TEST_F(TestFrameBuffer2, DuplicateFrames) {
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false));
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true));
ExtractFrame();
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false));
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true));
}
// TODO(philipel): implement more unittests related to invalid references.
TEST_F(TestFrameBuffer2, InvalidReferences) {
EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, 2));
EXPECT_EQ(1, InsertFrame(1, 0, 2000, false));
EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, true, 2));
EXPECT_EQ(1, InsertFrame(1, 0, 2000, false, true));
ExtractFrame();
EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, 1));
EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, true, 1));
}
TEST_F(TestFrameBuffer2, KeyframeRequired) {
EXPECT_EQ(1, InsertFrame(1, 0, 1000, false));
EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, 1));
EXPECT_EQ(3, InsertFrame(3, 0, 3000, false));
EXPECT_EQ(1, InsertFrame(1, 0, 1000, false, true));
EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, true, 1));
EXPECT_EQ(3, InsertFrame(3, 0, 3000, false, true));
ExtractFrame();
ExtractFrame(0, true);
ExtractFrame();
@ -575,42 +554,81 @@ TEST_F(TestFrameBuffer2, KeyframeClearsFullBuffer) {
const int kMaxBufferSize = 600;
for (int i = 1; i <= kMaxBufferSize; ++i)
EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, i - 1));
EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, true, i - 1));
ExtractFrame();
CheckNoFrame(0);
EXPECT_EQ(
kMaxBufferSize + 1,
InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000, false));
EXPECT_EQ(kMaxBufferSize + 1,
InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000,
false, true));
ExtractFrame();
CheckFrame(1, kMaxBufferSize + 1, 0);
}
TEST_F(TestFrameBuffer2, DontUpdateOnUndecodableFrame) {
InsertFrame(1, 0, 0, false);
InsertFrame(1, 0, 0, false, true);
ExtractFrame(0, true);
InsertFrame(3, 0, 0, false, 2, 0);
InsertFrame(3, 0, 0, false, 0);
InsertFrame(2, 0, 0, false);
InsertFrame(3, 0, 0, false, true, 2, 0);
InsertFrame(3, 0, 0, false, true, 0);
InsertFrame(2, 0, 0, false, true);
ExtractFrame(0, true);
ExtractFrame(0, true);
}
TEST_F(TestFrameBuffer2, DontDecodeOlderTimestamp) {
InsertFrame(2, 0, 1, false);
InsertFrame(1, 0, 2, false); // Older picture id but newer timestamp.
InsertFrame(2, 0, 1, false, true);
InsertFrame(1, 0, 2, false, true); // Older picture id but newer timestamp.
ExtractFrame(0);
ExtractFrame(0);
CheckFrame(0, 1, 0);
CheckNoFrame(1);
InsertFrame(3, 0, 4, false);
InsertFrame(4, 0, 3, false); // Newer picture id but older timestamp.
InsertFrame(3, 0, 4, false, true);
InsertFrame(4, 0, 3, false, true); // Newer picture id but older timestamp.
ExtractFrame(0);
ExtractFrame(0);
CheckFrame(2, 3, 0);
CheckNoFrame(3);
}
TEST_F(TestFrameBuffer2, CombineFramesToSuperframe) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false, false);
InsertFrame(pid, 1, ts, true, true);
ExtractFrame(0);
ExtractFrame(0);
CheckFrame(0, pid, 0);
CheckNoFrame(1);
// Two frames should be combined and returned together.
CheckFrameSize(0, kFrameSize * 2);
}
TEST_F(TestFrameBuffer2, HigherSpatialLayerNonDecodable) {
uint16_t pid = Rand();
uint32_t ts = Rand();
InsertFrame(pid, 0, ts, false, false);
InsertFrame(pid, 1, ts, true, true);
ExtractFrame(0);
CheckFrame(0, pid, 0);
InsertFrame(pid + 1, 1, ts + kFps20, false, true, pid);
InsertFrame(pid + 2, 0, ts + kFps10, false, false, pid);
InsertFrame(pid + 2, 1, ts + kFps10, true, true, pid + 1);
clock_.AdvanceTimeMilliseconds(1000);
// Frame pid+1 is decodable but too late.
// In superframe pid+2 frame sid=0 is decodable, but frame sid=1 is not.
// Incorrect implementation might skip pid+1 frame and output undecodable
// pid+2 instead.
ExtractFrame();
ExtractFrame();
CheckFrame(1, pid + 1, 1);
CheckFrame(2, pid + 2, 0);
}
} // namespace video_coding
} // namespace webrtc

View File

@ -104,6 +104,7 @@ RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer,
timing_.receive_finish_ms = last_packet->receive_time_ms;
}
timing_.flags = last_packet->video_header.video_timing.flags;
is_last_spatial_layer = last_packet->markerBit;
}
RtpFrameObject::~RtpFrameObject() {

View File

@ -489,12 +489,24 @@ RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9(
UnwrapPictureIds(frame);
return kHandOff;
}
} else {
if (frame->frame_type() == kVideoFrameKey) {
} else if (frame->frame_type() == kVideoFrameKey) {
if (frame->id.spatial_layer == 0) {
RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
return kDrop;
}
const auto gof_info_it = gof_info_.find(unwrapped_tl0);
if (gof_info_it == gof_info_.end())
return kStash;
info = &gof_info_it->second;
if (frame->frame_type() == kVideoFrameKey) {
frame->num_references = 0;
FrameReceivedVp9(frame->id.picture_id, info);
UnwrapPictureIds(frame);
return kHandOff;
}
} else {
auto gof_info_it = gof_info_.find(
(codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);

View File

@ -496,6 +496,14 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) {
VideoSendTiming::kInvalid;
webrtc_rtp_header.video_header().is_last_packet_in_frame =
webrtc_rtp_header.header.markerBit;
if (parsed_payload.video_header().codec == kVideoCodecVP9) {
const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
parsed_payload.video_header().video_type_header);
webrtc_rtp_header.video_header().is_last_packet_in_frame |=
codec_header.end_of_frame;
webrtc_rtp_header.video_header().is_first_packet_in_frame |=
codec_header.beginning_of_frame;
}
packet.GetExtension<VideoOrientation>(
&webrtc_rtp_header.video_header().rotation);

View File

@ -670,6 +670,10 @@ void VideoQualityTest::SetupVideo(Transport* send_transport,
vp9_settings.numberOfSpatialLayers = static_cast<unsigned char>(
params_.ss[video_idx].num_spatial_layers);
vp9_settings.interLayerPred = params_.ss[video_idx].inter_layer_pred;
// High FPS vp9 screenshare requires flexible mode.
if (params_.video[video_idx].fps > 5) {
vp9_settings.flexibleMode = true;
}
video_encoder_configs_[video_idx].encoder_specific_settings =
new rtc::RefCountedObject<
VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings);

View File

@ -381,10 +381,6 @@ void VideoReceiveStream::RequestKeyFrame() {
void VideoReceiveStream::OnCompleteFrame(
std::unique_ptr<video_coding::EncodedFrame> frame) {
// TODO(webrtc:9249): Workaround to allow decoding of VP9 SVC stream with
// partially enabled inter-layer prediction.
frame->id.spatial_layer = 0;
// TODO(https://bugs.webrtc.org/9974): Consider removing this workaround.
int64_t time_now_ms = rtc::TimeMillis();
if (last_complete_frame_time_ms_ > 0 &&