Vp9 flexible mode fixes
- Enable vp9 flexible mode in VideoEngine if 3 spatial layers are set. - Enable flexible mode in loopback tools and quality tests. - Reset first active spatial layer on keyframe in encoder. - Ensure duplicate references are not set by the sender in video header. - Set references manually for flexible mode in vp9 encoder. - Delay new activated layers until next base layer frame. - On receive side put each spatial layer as a separate frame to FrameBuffer and return several frames combined from FrameBuffer. Bug: webrtc:10049,webrtc:9794,webrtc:9784 Change-Id: I01e69f134cc145deba666ccc92deb1d37a324ede Reviewed-on: https://webrtc-review.googlesource.com/c/112289 Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Sergey Silkin <ssilkin@webrtc.org> Reviewed-by: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Niels Moller <nisse@webrtc.org> Cr-Commit-Position: refs/heads/master@{#25895}
This commit is contained in:
parent
77894ccb5d
commit
5546aef682
@ -79,6 +79,9 @@ class EncodedFrame : public webrtc::VCMEncodedFrame {
|
||||
size_t num_references = 0;
|
||||
int64_t references[kMaxFrameReferences];
|
||||
bool inter_layer_predicted = false;
|
||||
// Is this subframe the last one in the superframe (In RTP stream that would
|
||||
// mean that the last packet has a marker bit set).
|
||||
bool is_last_spatial_layer = true;
|
||||
};
|
||||
|
||||
} // namespace video_coding
|
||||
|
||||
@ -68,6 +68,10 @@ class RTC_EXPORT EncodedImage {
|
||||
}
|
||||
|
||||
size_t size() const { return _length; }
|
||||
void set_size(size_t new_size) {
|
||||
RTC_DCHECK_LE(new_size, _size);
|
||||
_length = new_size;
|
||||
}
|
||||
size_t capacity() const { return _size; }
|
||||
|
||||
void set_buffer(uint8_t* buffer, size_t capacity) {
|
||||
|
||||
@ -383,6 +383,9 @@ WebRtcVideoChannel::WebRtcVideoSendStream::ConfigureVideoEncoderSettings(
|
||||
if (!is_screencast) {
|
||||
// Limit inter-layer prediction to key pictures.
|
||||
vp9_settings.interLayerPred = webrtc::InterLayerPredMode::kOnKeyPic;
|
||||
} else {
|
||||
// 3 spatial layers vp9 screenshare needs flexible mode.
|
||||
vp9_settings.flexibleMode = vp9_settings.numberOfSpatialLayers > 2;
|
||||
}
|
||||
return new rtc::RefCountedObject<
|
||||
webrtc::VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings);
|
||||
|
||||
@ -124,9 +124,9 @@ TEST(VideoCodecTestLibvpx, ChangeBitrateVP9) {
|
||||
{500, 30, kNumFramesLong}};
|
||||
|
||||
std::vector<RateControlThresholds> rc_thresholds = {
|
||||
{5, 1, 0, 1, 0.5, 0.1, 0, 1},
|
||||
{15, 2, 0, 1, 0.5, 0.1, 0, 0},
|
||||
{10, 1, 0, 1, 0.5, 0.1, 0, 0}};
|
||||
{5, 2, 0, 1, 0.5, 0.1, 0, 1},
|
||||
{15, 3, 0, 1, 0.5, 0.1, 0, 0},
|
||||
{10, 2, 0, 1, 0.5, 0.1, 0, 0}};
|
||||
|
||||
std::vector<QualityThresholds> quality_thresholds = {
|
||||
{34, 33, 0.90, 0.88}, {38, 35, 0.95, 0.91}, {35, 34, 0.93, 0.90}};
|
||||
|
||||
@ -23,9 +23,9 @@ namespace webrtc {
|
||||
namespace {
|
||||
const size_t kMinVp9SvcBitrateKbps = 30;
|
||||
|
||||
const size_t kMaxNumLayersForScreenSharing = 2;
|
||||
const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0};
|
||||
const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500};
|
||||
const size_t kMaxNumLayersForScreenSharing = 3;
|
||||
const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0, 30.0};
|
||||
const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500, 1250};
|
||||
} // namespace
|
||||
|
||||
std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
|
||||
|
||||
@ -48,12 +48,13 @@ TEST(SvcConfig, ScreenSharing) {
|
||||
std::vector<SpatialLayer> spatial_layers =
|
||||
GetSvcConfig(1920, 1080, 30, 3, 3, true);
|
||||
|
||||
EXPECT_EQ(spatial_layers.size(), 2UL);
|
||||
EXPECT_EQ(spatial_layers.size(), 3UL);
|
||||
|
||||
for (const SpatialLayer& layer : spatial_layers) {
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
const SpatialLayer& layer = spatial_layers[i];
|
||||
EXPECT_EQ(layer.width, 1920);
|
||||
EXPECT_EQ(layer.height, 1080);
|
||||
EXPECT_EQ(layer.maxFramerate, 5);
|
||||
EXPECT_EQ(layer.maxFramerate, (i < 2) ? 5 : 30);
|
||||
EXPECT_EQ(layer.numberOfTemporalLayers, 1);
|
||||
EXPECT_LE(layer.minBitrate, layer.maxBitrate);
|
||||
EXPECT_LE(layer.minBitrate, layer.targetBitrate);
|
||||
|
||||
@ -151,7 +151,7 @@ TEST(SvcRateAllocatorTest, MinBitrateToGetQualityLayer) {
|
||||
|
||||
const SpatialLayer* layers = codec.spatialLayers;
|
||||
|
||||
EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 2U);
|
||||
EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U);
|
||||
|
||||
VideoBitrateAllocation allocation =
|
||||
allocator.GetAllocation(layers[0].minBitrate * 1000, 30);
|
||||
|
||||
@ -91,15 +91,16 @@ class TestVp9Impl : public VideoCodecUnitTest {
|
||||
}
|
||||
}
|
||||
|
||||
void ConfigureSvc(size_t num_spatial_layers) {
|
||||
void ConfigureSvc(size_t num_spatial_layers, size_t num_temporal_layers = 1) {
|
||||
codec_settings_.VP9()->numberOfSpatialLayers =
|
||||
static_cast<unsigned char>(num_spatial_layers);
|
||||
codec_settings_.VP9()->numberOfTemporalLayers = 1;
|
||||
codec_settings_.VP9()->numberOfTemporalLayers = num_temporal_layers;
|
||||
codec_settings_.VP9()->frameDroppingOn = false;
|
||||
|
||||
std::vector<SpatialLayer> layers = GetSvcConfig(
|
||||
codec_settings_.width, codec_settings_.height,
|
||||
codec_settings_.maxFramerate, num_spatial_layers, 1, false);
|
||||
std::vector<SpatialLayer> layers =
|
||||
GetSvcConfig(codec_settings_.width, codec_settings_.height,
|
||||
codec_settings_.maxFramerate, num_spatial_layers,
|
||||
num_temporal_layers, false);
|
||||
for (size_t i = 0; i < layers.size(); ++i) {
|
||||
codec_settings_.spatialLayers[i] = layers[i];
|
||||
}
|
||||
@ -401,6 +402,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
|
||||
std::vector<EncodedImage> encoded_frame;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
|
||||
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
|
||||
frame_num == 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -418,6 +421,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
|
||||
std::vector<EncodedImage> encoded_frame;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
|
||||
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
|
||||
frame_num == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -581,6 +586,248 @@ TEST_F(TestVp9Impl,
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
|
||||
const size_t num_spatial_layers = 3;
|
||||
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
|
||||
// framerate.
|
||||
const size_t num_frames_to_encode_before_drop = 1;
|
||||
// Chosen by hand, exactly 5 frames are dropped for input fps=30 and max
|
||||
// framerate = 5.
|
||||
const size_t num_dropped_frames = 5;
|
||||
|
||||
codec_settings_.maxFramerate = 30;
|
||||
ConfigureSvc(num_spatial_layers);
|
||||
codec_settings_.spatialLayers[0].maxFramerate = 5.0;
|
||||
// use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
|
||||
// framerate capping we would still get back at least a middle layer. It
|
||||
// simplifies the test.
|
||||
codec_settings_.spatialLayers[1].maxFramerate = 30.0;
|
||||
codec_settings_.spatialLayers[2].maxFramerate = 30.0;
|
||||
codec_settings_.VP9()->frameDroppingOn = false;
|
||||
codec_settings_.mode = VideoCodecMode::kScreensharing;
|
||||
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
|
||||
codec_settings_.VP9()->flexibleMode = true;
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
|
||||
0 /* max payload size (unused) */));
|
||||
|
||||
// Enable all but the last layer.
|
||||
VideoBitrateAllocation bitrate_allocation;
|
||||
for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
|
||||
bitrate_allocation.SetBitrate(
|
||||
sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
|
||||
}
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
// Encode enough frames to force drop due to framerate capping.
|
||||
for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
|
||||
++frame_num) {
|
||||
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
}
|
||||
|
||||
// Enable the last layer.
|
||||
bitrate_allocation.SetBitrate(
|
||||
num_spatial_layers - 1, 0,
|
||||
codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate *
|
||||
1000);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
for (size_t frame_num = 0; frame_num < num_dropped_frames; ++frame_num) {
|
||||
SetWaitForEncodedFramesThreshold(1);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
// First layer is dropped due to frame rate cap. The last layer should not
|
||||
// be enabled yet.
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
}
|
||||
|
||||
SetWaitForEncodedFramesThreshold(2);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
// Now all 3 layers should be encoded.
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
EXPECT_EQ(encoded_frames.size(), 3u);
|
||||
// Scalability structure has to be triggered.
|
||||
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
|
||||
}
|
||||
|
||||
TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) {
|
||||
const size_t num_spatial_layers = 3;
|
||||
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
|
||||
// framerate.
|
||||
const size_t num_frames_to_encode_before_drop = 1;
|
||||
// Chosen by hand, exactly 5 frames are dropped for input fps=30 and max
|
||||
// framerate = 5.
|
||||
const size_t num_dropped_frames = 5;
|
||||
|
||||
codec_settings_.maxFramerate = 30;
|
||||
ConfigureSvc(num_spatial_layers);
|
||||
codec_settings_.spatialLayers[0].maxFramerate = 5.0;
|
||||
// use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
|
||||
// framerate capping we would still get back at least a middle layer. It
|
||||
// simplifies the test.
|
||||
codec_settings_.spatialLayers[1].maxFramerate = 30.0;
|
||||
codec_settings_.spatialLayers[2].maxFramerate = 30.0;
|
||||
codec_settings_.VP9()->frameDroppingOn = false;
|
||||
codec_settings_.mode = VideoCodecMode::kScreensharing;
|
||||
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
|
||||
codec_settings_.VP9()->flexibleMode = true;
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
|
||||
0 /* max payload size (unused) */));
|
||||
|
||||
// All layers are enabled from the start.
|
||||
VideoBitrateAllocation bitrate_allocation;
|
||||
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
|
||||
bitrate_allocation.SetBitrate(
|
||||
sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
|
||||
}
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
// Encode enough frames to force drop due to framerate capping.
|
||||
for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop;
|
||||
++frame_num) {
|
||||
SetWaitForEncodedFramesThreshold(num_spatial_layers);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
}
|
||||
|
||||
// Now the first layer should not have frames in it.
|
||||
for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) {
|
||||
SetWaitForEncodedFramesThreshold(2);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
// First layer is dropped due to frame rate cap. The last layer should not
|
||||
// be enabled yet.
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
// First layer is skipped.
|
||||
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
|
||||
}
|
||||
|
||||
// Disable the last layer.
|
||||
bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
// Still expected to drop first layer. Last layer has to be disable also.
|
||||
for (size_t frame_num = num_dropped_frames - 2;
|
||||
frame_num < num_dropped_frames; ++frame_num) {
|
||||
// Expect back one frame.
|
||||
SetWaitForEncodedFramesThreshold(1);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
// First layer is dropped due to frame rate cap. The last layer should not
|
||||
// be enabled yet.
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
// First layer is skipped.
|
||||
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1);
|
||||
// No SS data on non-base spatial layer.
|
||||
EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
|
||||
}
|
||||
|
||||
SetWaitForEncodedFramesThreshold(2);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
// First layer is not skipped now.
|
||||
EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0);
|
||||
// SS data should be present.
|
||||
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
|
||||
}
|
||||
|
||||
TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) {
|
||||
const size_t num_spatial_layers = 3;
|
||||
const size_t num_temporal_layers = 2;
|
||||
// Chosen by hand, the 2nd frame is dropped with configured per-layer max
|
||||
// framerate.
|
||||
ConfigureSvc(num_spatial_layers, num_temporal_layers);
|
||||
codec_settings_.VP9()->frameDroppingOn = false;
|
||||
codec_settings_.mode = VideoCodecMode::kRealtimeVideo;
|
||||
codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
|
||||
codec_settings_.VP9()->flexibleMode = false;
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
|
||||
0 /* max payload size (unused) */));
|
||||
|
||||
// Enable all the layers.
|
||||
VideoBitrateAllocation bitrate_allocation;
|
||||
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
|
||||
for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
|
||||
bitrate_allocation.SetBitrate(
|
||||
sl_idx, tl_idx,
|
||||
codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 /
|
||||
num_temporal_layers);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
std::vector<EncodedImage> encoded_frames;
|
||||
std::vector<CodecSpecificInfo> codec_specific_info;
|
||||
|
||||
// Encode one TL0 frame
|
||||
SetWaitForEncodedFramesThreshold(num_spatial_layers);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
|
||||
|
||||
// Disable the last layer.
|
||||
for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
|
||||
bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0);
|
||||
}
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->SetRateAllocation(bitrate_allocation,
|
||||
codec_settings_.maxFramerate));
|
||||
|
||||
// Next is TL1 frame. The last layer is disabled immediately, but SS structure
|
||||
// is not provided here.
|
||||
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u);
|
||||
|
||||
// Next is TL0 frame, which should have delayed SS structure.
|
||||
SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
|
||||
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
|
||||
EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u);
|
||||
EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
|
||||
EXPECT_TRUE(codec_specific_info[0]
|
||||
.codecSpecific.VP9.spatial_layer_resolution_present);
|
||||
EXPECT_EQ(
|
||||
codec_specific_info[0].codecSpecific.VP9.width[num_spatial_layers - 1],
|
||||
0u);
|
||||
}
|
||||
|
||||
TEST_F(TestVp9Impl,
|
||||
LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) {
|
||||
ConfigureSvc(3);
|
||||
@ -766,6 +1013,7 @@ TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) {
|
||||
|
||||
codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
|
||||
codec_settings_.VP9()->frameDroppingOn = false;
|
||||
codec_settings_.VP9()->flexibleMode = true;
|
||||
|
||||
VideoBitrateAllocation bitrate_allocation;
|
||||
for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
|
||||
|
||||
@ -49,6 +49,9 @@ uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
|
||||
|
||||
int kMaxNumTiles4kVideo = 8;
|
||||
|
||||
// Maximum allowed PID difference for variable frame-rate mode.
|
||||
const int kMaxAllowedPidDIff = 8;
|
||||
|
||||
// Only positive speeds, range for real-time coding currently is: 5 - 8.
|
||||
// Lower means slower/better quality, higher means fastest/lower quality.
|
||||
int GetCpuSpeed(int width, int height) {
|
||||
@ -124,6 +127,18 @@ ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
|
||||
}
|
||||
return ColorSpace(primaries, transfer, matrix, range);
|
||||
}
|
||||
|
||||
bool MoreLayersEnabled(const VideoBitrateAllocation& first,
|
||||
const VideoBitrateAllocation& second) {
|
||||
for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
|
||||
if (first.GetSpatialLayerSum(sl_idx) > 0 &&
|
||||
second.GetSpatialLayerSum(sl_idx) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
|
||||
@ -154,12 +169,12 @@ VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec)
|
||||
field_trial::IsEnabled("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation")),
|
||||
is_svc_(false),
|
||||
inter_layer_pred_(InterLayerPredMode::kOn),
|
||||
external_ref_control_(
|
||||
field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl")),
|
||||
external_ref_control_(false), // Set in InitEncode because of tests.
|
||||
trusted_rate_controller_(
|
||||
field_trial::IsEnabled(kVp9TrustedRateControllerFieldTrial)),
|
||||
full_superframe_drop_(true),
|
||||
first_frame_in_picture_(true),
|
||||
ss_info_needed_(false),
|
||||
is_flexible_mode_(false) {
|
||||
memset(&codec_, 0, sizeof(codec_));
|
||||
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
|
||||
@ -314,14 +329,8 @@ int VP9EncoderImpl::SetRateAllocation(
|
||||
|
||||
codec_.maxFramerate = frame_rate;
|
||||
|
||||
if (!SetSvcRates(bitrate_allocation)) {
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
requested_bitrate_allocation_ = bitrate_allocation;
|
||||
|
||||
// Update encoder context
|
||||
if (vpx_codec_enc_config_set(encoder_, config_)) {
|
||||
return WEBRTC_VIDEO_CODEC_ERROR;
|
||||
}
|
||||
return WEBRTC_VIDEO_CODEC_OK;
|
||||
}
|
||||
|
||||
@ -461,6 +470,27 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
||||
|
||||
is_flexible_mode_ = inst->VP9().flexibleMode;
|
||||
|
||||
inter_layer_pred_ = inst->VP9().interLayerPred;
|
||||
|
||||
different_framerates_used_ = false;
|
||||
for (size_t sl_idx = 1; sl_idx < num_spatial_layers_; ++sl_idx) {
|
||||
if (std::abs(codec_.spatialLayers[sl_idx].maxFramerate -
|
||||
codec_.spatialLayers[0].maxFramerate) > 1e-9) {
|
||||
different_framerates_used_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (different_framerates_used_ && !is_flexible_mode_) {
|
||||
RTC_LOG(LS_ERROR) << "Flexible mode required for different framerates on "
|
||||
"different spatial layers";
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
|
||||
// External reference control is required for different frame rate on spatial
|
||||
// layers because libvpx generates rtp incompatible references in this case.
|
||||
external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") ||
|
||||
different_framerates_used_;
|
||||
|
||||
if (num_temporal_layers_ == 1) {
|
||||
gof_.SetGofInfoVP9(kTemporalStructureMode1);
|
||||
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
|
||||
@ -493,8 +523,14 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
|
||||
inter_layer_pred_ = inst->VP9().interLayerPred;
|
||||
|
||||
if (external_ref_control_) {
|
||||
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
if (num_temporal_layers_ > 1 && different_framerates_used_) {
|
||||
// External reference control for several temporal layers with different
|
||||
// frame rates on spatial layers is not implemented yet.
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
}
|
||||
ref_buf_.clear();
|
||||
|
||||
return InitAndSetControlSettings(inst);
|
||||
@ -575,9 +611,9 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
||||
}
|
||||
|
||||
SvcRateAllocator init_allocator(codec_);
|
||||
VideoBitrateAllocation allocation = init_allocator.GetAllocation(
|
||||
current_bitrate_allocation_ = init_allocator.GetAllocation(
|
||||
inst->startBitrate * 1000, inst->maxFramerate);
|
||||
if (!SetSvcRates(allocation)) {
|
||||
if (!SetSvcRates(current_bitrate_allocation_)) {
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
|
||||
@ -595,6 +631,7 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
||||
inst->VP9().adaptiveQpMode ? 3 : 0);
|
||||
|
||||
vpx_codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
|
||||
vpx_codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
|
||||
|
||||
if (is_svc_) {
|
||||
vpx_codec_control(encoder_, VP9E_SET_SVC, 1);
|
||||
@ -696,21 +733,21 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
||||
}
|
||||
}
|
||||
|
||||
if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) {
|
||||
// Skip encoding spatial layer frames if their target frame rate is lower
|
||||
// than actual input frame rate.
|
||||
vpx_svc_layer_id_t layer_id = {0};
|
||||
vpx_svc_layer_id_t layer_id = {0};
|
||||
if (!force_key_frame_) {
|
||||
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
|
||||
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
|
||||
|
||||
const uint32_t frame_timestamp_ms =
|
||||
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
|
||||
if (VideoCodecMode::kScreensharing == codec_.mode) {
|
||||
const uint32_t frame_timestamp_ms =
|
||||
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
|
||||
|
||||
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
|
||||
++layer_id.spatial_layer_id;
|
||||
} else {
|
||||
break;
|
||||
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
|
||||
++layer_id.spatial_layer_id;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -719,8 +756,42 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
||||
// Drop entire picture.
|
||||
return WEBRTC_VIDEO_CODEC_OK;
|
||||
}
|
||||
}
|
||||
|
||||
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
|
||||
for (int sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
|
||||
}
|
||||
|
||||
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
|
||||
|
||||
if (requested_bitrate_allocation_) {
|
||||
bool more_layers_requested = MoreLayersEnabled(
|
||||
*requested_bitrate_allocation_, current_bitrate_allocation_);
|
||||
bool less_layers_requested = MoreLayersEnabled(
|
||||
current_bitrate_allocation_, *requested_bitrate_allocation_);
|
||||
// In SVC can enable new layers only if all lower layers are encoded and at
|
||||
// the base temporal layer.
|
||||
// This will delay rate allocation change until the next frame on the base
|
||||
// spatial layer.
|
||||
// In KSVC or simulcast modes KF will be generated for a new layer, so can
|
||||
// update allocation any time.
|
||||
bool can_upswitch =
|
||||
inter_layer_pred_ != InterLayerPredMode::kOn ||
|
||||
(layer_id.spatial_layer_id == 0 && layer_id.temporal_layer_id == 0);
|
||||
if (!more_layers_requested || can_upswitch) {
|
||||
current_bitrate_allocation_ = *requested_bitrate_allocation_;
|
||||
requested_bitrate_allocation_ = absl::nullopt;
|
||||
if (!SetSvcRates(current_bitrate_allocation_)) {
|
||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||
}
|
||||
if (less_layers_requested || more_layers_requested) {
|
||||
ss_info_needed_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vpx_codec_enc_config_set(encoder_, config_)) {
|
||||
return WEBRTC_VIDEO_CODEC_ERROR;
|
||||
}
|
||||
|
||||
RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
|
||||
@ -780,7 +851,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
||||
}
|
||||
|
||||
if (external_ref_control_) {
|
||||
vpx_svc_ref_frame_config_t ref_config = SetReferences(force_key_frame_);
|
||||
vpx_svc_ref_frame_config_t ref_config =
|
||||
SetReferences(force_key_frame_, layer_id.spatial_layer_id);
|
||||
|
||||
if (VideoCodecMode::kScreensharing == codec_.mode) {
|
||||
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
@ -840,9 +912,22 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
vp9_info->ss_data_available =
|
||||
(pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
|
||||
|
||||
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||
pics_since_key_ = 0;
|
||||
} else if (first_frame_in_picture_) {
|
||||
++pics_since_key_;
|
||||
}
|
||||
|
||||
vpx_svc_layer_id_t layer_id = {0};
|
||||
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
||||
|
||||
if (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
|
||||
layer_id.spatial_layer_id == 0) {
|
||||
// Force SS info after the layers configuration has changed.
|
||||
vp9_info->ss_data_available = true;
|
||||
ss_info_needed_ = false;
|
||||
}
|
||||
|
||||
RTC_CHECK_GT(num_temporal_layers_, 0);
|
||||
RTC_CHECK_GT(num_active_spatial_layers_, 0);
|
||||
if (num_temporal_layers_ == 1) {
|
||||
@ -864,12 +949,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
// TODO(asapersson): this info has to be obtained from the encoder.
|
||||
vp9_info->temporal_up_switch = false;
|
||||
|
||||
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||
pics_since_key_ = 0;
|
||||
} else if (first_frame_in_picture_) {
|
||||
++pics_since_key_;
|
||||
}
|
||||
|
||||
const bool is_key_pic = (pics_since_key_ == 0);
|
||||
const bool is_inter_layer_pred_allowed =
|
||||
(inter_layer_pred_ == InterLayerPredMode::kOn ||
|
||||
@ -901,8 +980,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
vp9_info->gof_idx = kNoGofIdx;
|
||||
FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
|
||||
vp9_info);
|
||||
// TODO(webrtc:9794): Add fake reference to empty reference list to
|
||||
// workaround the frame buffer issue on receiver.
|
||||
} else {
|
||||
vp9_info->gof_idx =
|
||||
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
|
||||
@ -985,6 +1062,8 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
|
||||
size_t max_ref_temporal_layer_id = 0;
|
||||
|
||||
std::vector<size_t> ref_pid_list;
|
||||
|
||||
vp9_info->num_ref_pics = 0;
|
||||
for (const RefFrameBuffer& ref_buf : ref_buf_list) {
|
||||
RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
|
||||
@ -997,6 +1076,16 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
}
|
||||
RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
|
||||
|
||||
// Encoder may reference several spatial layers on the same previous
|
||||
// frame in case if some spatial layers are skipped on the current frame.
|
||||
// We shouldn't put duplicate references as it may break some old
|
||||
// clients and isn't RTP compatible.
|
||||
if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
|
||||
ref_buf.pic_num) != ref_pid_list.end()) {
|
||||
continue;
|
||||
}
|
||||
ref_pid_list.push_back(ref_buf.pic_num);
|
||||
|
||||
const size_t p_diff = pic_num - ref_buf.pic_num;
|
||||
RTC_DCHECK_LE(p_diff, 127UL);
|
||||
|
||||
@ -1038,20 +1127,13 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
|
||||
vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
|
||||
vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
|
||||
|
||||
if (enc_layer_conf.update_last[layer_id.spatial_layer_id]) {
|
||||
ref_buf_[enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]] =
|
||||
frame_buf;
|
||||
for (size_t i = 0; i < kNumVp9Buffers; ++i) {
|
||||
if (enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id] &
|
||||
(1 << i)) {
|
||||
ref_buf_[i] = frame_buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (enc_layer_conf.update_alt_ref[layer_id.spatial_layer_id]) {
|
||||
ref_buf_[enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]] =
|
||||
frame_buf;
|
||||
}
|
||||
|
||||
if (enc_layer_conf.update_golden[layer_id.spatial_layer_id]) {
|
||||
ref_buf_[enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]] =
|
||||
frame_buf;
|
||||
}
|
||||
} else {
|
||||
RTC_DCHECK_EQ(num_spatial_layers_, 1);
|
||||
RTC_DCHECK_EQ(num_temporal_layers_, 1);
|
||||
@ -1061,7 +1143,9 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
|
||||
}
|
||||
}
|
||||
|
||||
vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
|
||||
vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(
|
||||
bool is_key_pic,
|
||||
size_t first_active_spatial_layer_id) {
|
||||
// kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
|
||||
RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
|
||||
|
||||
@ -1083,8 +1167,10 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
|
||||
// for temporal references plus 1 buffer for spatial reference. 7 buffers
|
||||
// in total.
|
||||
|
||||
for (size_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
const size_t gof_idx = pics_since_key_ % gof_.num_frames_in_gof;
|
||||
for (size_t sl_idx = first_active_spatial_layer_id;
|
||||
sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
||||
const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
|
||||
const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
|
||||
|
||||
if (!is_key_pic) {
|
||||
// Set up temporal reference.
|
||||
@ -1096,36 +1182,47 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) {
|
||||
|
||||
// Sanity check that reference picture number is smaller than current
|
||||
// picture number.
|
||||
const size_t curr_pic_num = pics_since_key_ + 1;
|
||||
RTC_DCHECK_LT(ref_buf_[buf_idx].pic_num, curr_pic_num);
|
||||
const size_t pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
|
||||
// Incorrect spatial layer may be in the buffer due to a key-frame.
|
||||
const bool same_spatial_layer =
|
||||
ref_buf_[buf_idx].spatial_layer_id == sl_idx;
|
||||
bool correct_pid = false;
|
||||
if (different_framerates_used_) {
|
||||
correct_pid = pid_diff < kMaxAllowedPidDIff;
|
||||
} else {
|
||||
// Below code assumes single temporal referecence.
|
||||
RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
|
||||
correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
|
||||
}
|
||||
|
||||
// Below code assumes single temporal referecence.
|
||||
RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
|
||||
if (pid_diff == gof_.pid_diff[gof_idx][0]) {
|
||||
if (same_spatial_layer && correct_pid) {
|
||||
ref_config.lst_fb_idx[sl_idx] = buf_idx;
|
||||
ref_config.reference_last[sl_idx] = 1;
|
||||
} else {
|
||||
// This reference doesn't match with one specified by GOF. This can
|
||||
// only happen if spatial layer is enabled dynamically without key
|
||||
// frame. Spatial prediction is supposed to be enabled in this case.
|
||||
RTC_DCHECK(is_inter_layer_pred_allowed);
|
||||
RTC_DCHECK(is_inter_layer_pred_allowed &&
|
||||
sl_idx > first_active_spatial_layer_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_inter_layer_pred_allowed && sl_idx > 0) {
|
||||
if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
|
||||
// Set up spatial reference.
|
||||
RTC_DCHECK(last_updated_buf_idx);
|
||||
ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
|
||||
ref_config.reference_golden[sl_idx] = 1;
|
||||
} else {
|
||||
RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || sl_idx == 0 ||
|
||||
RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
|
||||
sl_idx == first_active_spatial_layer_id ||
|
||||
inter_layer_pred_ == InterLayerPredMode::kOff);
|
||||
}
|
||||
|
||||
last_updated_buf_idx.reset();
|
||||
|
||||
if (gof_.temporal_idx[gof_idx] <= num_temporal_layers_ - 1) {
|
||||
if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
|
||||
num_temporal_layers_ == 1) {
|
||||
last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
|
||||
|
||||
// Ensure last frame buffer is not used for temporal prediction (it is
|
||||
|
||||
@ -70,7 +70,9 @@ class VP9EncoderImpl : public VP9Encoder {
|
||||
CodecSpecificInfoVP9* vp9_info);
|
||||
void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
|
||||
const size_t pic_num);
|
||||
vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic);
|
||||
vpx_svc_ref_frame_config_t SetReferences(
|
||||
bool is_key_pic,
|
||||
size_t first_active_spatial_layer_id);
|
||||
|
||||
bool ExplicitlyConfiguredSpatialLayers() const;
|
||||
bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation);
|
||||
@ -110,6 +112,7 @@ class VP9EncoderImpl : public VP9Encoder {
|
||||
GofInfoVP9 gof_; // Contains each frame's temporal information for
|
||||
// non-flexible mode.
|
||||
bool force_key_frame_;
|
||||
bool different_framerates_used_;
|
||||
size_t pics_since_key_;
|
||||
uint8_t num_temporal_layers_;
|
||||
uint8_t num_spatial_layers_; // Number of configured SLs
|
||||
@ -121,6 +124,9 @@ class VP9EncoderImpl : public VP9Encoder {
|
||||
const bool trusted_rate_controller_;
|
||||
const bool full_superframe_drop_;
|
||||
bool first_frame_in_picture_;
|
||||
VideoBitrateAllocation current_bitrate_allocation_;
|
||||
absl::optional<VideoBitrateAllocation> requested_bitrate_allocation_;
|
||||
bool ss_info_needed_;
|
||||
|
||||
std::vector<FramerateController> framerate_controller_;
|
||||
|
||||
|
||||
@ -67,9 +67,11 @@ class VCMEncodedFrame : protected EncodedImage {
|
||||
/**
|
||||
* Frame RTP timestamp (90kHz)
|
||||
*/
|
||||
using EncodedImage::Timestamp;
|
||||
using EncodedImage::set_size;
|
||||
using EncodedImage::SetTimestamp;
|
||||
using EncodedImage::size;
|
||||
using EncodedImage::Timestamp;
|
||||
|
||||
/**
|
||||
* Get render time in milliseconds
|
||||
*/
|
||||
@ -90,6 +92,7 @@ class VCMEncodedFrame : protected EncodedImage {
|
||||
* Get video timing
|
||||
*/
|
||||
EncodedImage::Timing video_timing() const { return timing_; }
|
||||
EncodedImage::Timing* video_timing_mutable() { return &timing_; }
|
||||
/**
|
||||
* True if this frame is complete, false otherwise
|
||||
*/
|
||||
@ -109,8 +112,10 @@ class VCMEncodedFrame : protected EncodedImage {
|
||||
* the object.
|
||||
*/
|
||||
const CodecSpecificInfo* CodecSpecific() const { return &_codecSpecificInfo; }
|
||||
void SetCodecSpecific(const CodecSpecificInfo* codec_specific) {
|
||||
_codecSpecificInfo = *codec_specific;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Verifies that current allocated buffer size is larger than or equal to the
|
||||
* input size.
|
||||
@ -121,6 +126,7 @@ class VCMEncodedFrame : protected EncodedImage {
|
||||
*/
|
||||
void VerifyAndAllocate(size_t minimumSize);
|
||||
|
||||
protected:
|
||||
void Reset();
|
||||
|
||||
void CopyCodecSpecific(const RTPVideoHeader* header);
|
||||
|
||||
@ -87,10 +87,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
|
||||
|
||||
wait_ms = max_wait_time_ms;
|
||||
|
||||
// Need to hold |crit_| in order to use |frames_|, therefore we
|
||||
// Need to hold |crit_| in order to access frames_to_decode_. therefore we
|
||||
// set it here in the loop instead of outside the loop in order to not
|
||||
// acquire the lock unnecesserily.
|
||||
next_frame_it_ = frames_.end();
|
||||
// acquire the lock unnecessarily.
|
||||
frames_to_decode_.clear();
|
||||
|
||||
// |frame_it| points to the first frame after the
|
||||
// |last_decoded_frame_it_|.
|
||||
@ -128,7 +128,53 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
|
||||
continue;
|
||||
}
|
||||
|
||||
next_frame_it_ = frame_it;
|
||||
// Only ever return all parts of a superframe. Therefore skip this
|
||||
// frame if it's not a beginning of a superframe.
|
||||
if (frame->inter_layer_predicted) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Gather all remaining frames for the same superframe.
|
||||
std::vector<FrameMap::iterator> current_superframe;
|
||||
current_superframe.push_back(frame_it);
|
||||
bool last_layer_completed =
|
||||
frame_it->second.frame->is_last_spatial_layer;
|
||||
FrameMap::iterator next_frame_it = frame_it;
|
||||
while (true) {
|
||||
++next_frame_it;
|
||||
if (next_frame_it == frames_.end() ||
|
||||
next_frame_it->first.picture_id != frame->id.picture_id ||
|
||||
!next_frame_it->second.continuous) {
|
||||
break;
|
||||
}
|
||||
// Check if the next frame has some undecoded references other than
|
||||
// the previous frame in the same superframe.
|
||||
size_t num_allowed_undecoded_refs =
|
||||
(next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
|
||||
if (next_frame_it->second.num_missing_decodable >
|
||||
num_allowed_undecoded_refs) {
|
||||
break;
|
||||
}
|
||||
// All frames in the superframe should have the same timestamp.
|
||||
if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
|
||||
RTC_LOG(LS_WARNING)
|
||||
<< "Frames in a single superframe have different"
|
||||
" timestamps. Skipping undecodable superframe.";
|
||||
break;
|
||||
}
|
||||
current_superframe.push_back(next_frame_it);
|
||||
last_layer_completed =
|
||||
next_frame_it->second.frame->is_last_spatial_layer;
|
||||
}
|
||||
// Check if the current superframe is complete.
|
||||
// TODO(bugs.webrtc.org/10064): consider returning all available to
|
||||
// decode frames even if the superframe is not complete yet.
|
||||
if (!last_layer_completed) {
|
||||
continue;
|
||||
}
|
||||
|
||||
frames_to_decode_ = std::move(current_superframe);
|
||||
|
||||
if (frame->RenderTime() == -1) {
|
||||
frame->SetRenderTime(
|
||||
timing_->RenderTimeMs(frame->Timestamp(), now_ms));
|
||||
@ -154,9 +200,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
|
||||
{
|
||||
rtc::CritScope lock(&crit_);
|
||||
now_ms = clock_->TimeInMilliseconds();
|
||||
if (next_frame_it_ != frames_.end()) {
|
||||
std::unique_ptr<EncodedFrame> frame =
|
||||
std::move(next_frame_it_->second.frame);
|
||||
std::vector<EncodedFrame*> frames_out;
|
||||
for (const FrameMap::iterator& frame_it : frames_to_decode_) {
|
||||
RTC_DCHECK(frame_it != frames_.end());
|
||||
EncodedFrame* frame = frame_it->second.frame.release();
|
||||
|
||||
if (!frame->delayed_by_retransmission()) {
|
||||
int64_t frame_delay;
|
||||
@ -187,14 +234,22 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
|
||||
|
||||
UpdateJitterDelay();
|
||||
UpdateTimingFrameInfo();
|
||||
PropagateDecodability(next_frame_it_->second);
|
||||
PropagateDecodability(frame_it->second);
|
||||
|
||||
AdvanceLastDecodedFrame(next_frame_it_);
|
||||
AdvanceLastDecodedFrame(frame_it);
|
||||
last_decoded_frame_timestamp_ = frame->Timestamp();
|
||||
*frame_out = std::move(frame);
|
||||
frames_out.push_back(frame);
|
||||
}
|
||||
|
||||
if (!frames_out.empty()) {
|
||||
if (frames_out.size() == 1) {
|
||||
frame_out->reset(frames_out[0]);
|
||||
} else {
|
||||
frame_out->reset(CombineAndDeleteFrames(frames_out));
|
||||
}
|
||||
return kFrameFound;
|
||||
}
|
||||
}
|
||||
} // rtc::Critscope lock(&crit_)
|
||||
|
||||
if (latest_return_time_ms - now_ms > 0) {
|
||||
// If |next_frame_it_ == frames_.end()| and there is still time left, it
|
||||
@ -203,7 +258,6 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame(
|
||||
// remaining time and then return.
|
||||
return NextFrame(latest_return_time_ms - now_ms, frame_out);
|
||||
}
|
||||
|
||||
return kTimeout;
|
||||
}
|
||||
|
||||
@ -606,11 +660,38 @@ void FrameBuffer::ClearFramesAndHistory() {
|
||||
frames_.clear();
|
||||
last_decoded_frame_it_ = frames_.end();
|
||||
last_continuous_frame_it_ = frames_.end();
|
||||
next_frame_it_ = frames_.end();
|
||||
frames_to_decode_.clear();
|
||||
num_frames_history_ = 0;
|
||||
num_frames_buffered_ = 0;
|
||||
}
|
||||
|
||||
EncodedFrame* FrameBuffer::CombineAndDeleteFrames(
|
||||
const std::vector<EncodedFrame*>& frames) const {
|
||||
RTC_DCHECK(!frames.empty());
|
||||
EncodedFrame* frame = frames[0];
|
||||
size_t total_length = 0;
|
||||
for (size_t i = 0; i < frames.size(); ++i) {
|
||||
total_length += frames[i]->size();
|
||||
}
|
||||
frame->VerifyAndAllocate(total_length);
|
||||
uint8_t* buffer = frame->MutableBuffer();
|
||||
// Append all remaining frames to the first one.
|
||||
size_t used_buffer_bytes = frame->size();
|
||||
for (size_t i = 1; i < frames.size(); ++i) {
|
||||
EncodedFrame* frame_to_append = frames[i];
|
||||
memcpy(buffer + used_buffer_bytes, frame_to_append->Buffer(),
|
||||
frame_to_append->size());
|
||||
used_buffer_bytes += frame_to_append->size();
|
||||
frame->video_timing_mutable()->network2_timestamp_ms =
|
||||
frame_to_append->video_timing().network2_timestamp_ms;
|
||||
frame->video_timing_mutable()->receive_finish_ms =
|
||||
frame_to_append->video_timing().receive_finish_ms;
|
||||
delete frame_to_append;
|
||||
}
|
||||
frame->set_size(total_length);
|
||||
return frame;
|
||||
}
|
||||
|
||||
FrameBuffer::FrameInfo::FrameInfo() = default;
|
||||
FrameBuffer::FrameInfo::FrameInfo(FrameInfo&&) = default;
|
||||
FrameBuffer::FrameInfo::~FrameInfo() = default;
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/video/encoded_frame.h"
|
||||
#include "modules/video_coding/include/video_coding_defines.h"
|
||||
@ -156,6 +157,13 @@ class FrameBuffer {
|
||||
bool HasBadRenderTiming(const EncodedFrame& frame, int64_t now_ms)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
// The cleaner solution would be to have the NextFrame function return a
|
||||
// vector of frames, but until the decoding pipeline can support decoding
|
||||
// multiple frames at the same time we combine all frames to one frame and
|
||||
// return it. See bugs.webrtc.org/10064
|
||||
EncodedFrame* CombineAndDeleteFrames(
|
||||
const std::vector<EncodedFrame*>& frames) const;
|
||||
|
||||
FrameMap frames_ RTC_GUARDED_BY(crit_);
|
||||
|
||||
rtc::CriticalSection crit_;
|
||||
@ -167,7 +175,7 @@ class FrameBuffer {
|
||||
absl::optional<uint32_t> last_decoded_frame_timestamp_ RTC_GUARDED_BY(crit_);
|
||||
FrameMap::iterator last_decoded_frame_it_ RTC_GUARDED_BY(crit_);
|
||||
FrameMap::iterator last_continuous_frame_it_ RTC_GUARDED_BY(crit_);
|
||||
FrameMap::iterator next_frame_it_ RTC_GUARDED_BY(crit_);
|
||||
std::vector<FrameMap::iterator> frames_to_decode_ RTC_GUARDED_BY(crit_);
|
||||
int num_frames_history_ RTC_GUARDED_BY(crit_);
|
||||
int num_frames_buffered_ RTC_GUARDED_BY(crit_);
|
||||
bool stopped_ RTC_GUARDED_BY(crit_);
|
||||
|
||||
@ -124,6 +124,7 @@ class TestFrameBuffer2 : public ::testing::Test {
|
||||
static constexpr int kFps1 = 1000;
|
||||
static constexpr int kFps10 = kFps1 / 10;
|
||||
static constexpr int kFps20 = kFps1 / 20;
|
||||
static constexpr size_t kFrameSize = 10;
|
||||
|
||||
TestFrameBuffer2()
|
||||
: clock_(0),
|
||||
@ -150,6 +151,7 @@ class TestFrameBuffer2 : public ::testing::Test {
|
||||
uint8_t spatial_layer,
|
||||
int64_t ts_ms,
|
||||
bool inter_layer_predicted,
|
||||
bool last_spatial_layer,
|
||||
T... refs) {
|
||||
static_assert(sizeof...(refs) <= kMaxReferences,
|
||||
"To many references specified for EncodedFrame.");
|
||||
@ -162,6 +164,10 @@ class TestFrameBuffer2 : public ::testing::Test {
|
||||
frame->SetTimestamp(ts_ms * 90);
|
||||
frame->num_references = references.size();
|
||||
frame->inter_layer_predicted = inter_layer_predicted;
|
||||
frame->is_last_spatial_layer = last_spatial_layer;
|
||||
// Add some data to buffer.
|
||||
frame->VerifyAndAllocate(kFrameSize);
|
||||
frame->SetSize(kFrameSize);
|
||||
for (size_t r = 0; r < references.size(); ++r)
|
||||
frame->references[r] = references[r];
|
||||
|
||||
@ -194,6 +200,13 @@ class TestFrameBuffer2 : public ::testing::Test {
|
||||
ASSERT_EQ(spatial_layer, frames_[index]->id.spatial_layer);
|
||||
}
|
||||
|
||||
void CheckFrameSize(size_t index, size_t size) {
|
||||
rtc::CritScope lock(&crit_);
|
||||
ASSERT_LT(index, frames_.size());
|
||||
ASSERT_TRUE(frames_[index]);
|
||||
ASSERT_EQ(frames_[index]->size(), size);
|
||||
}
|
||||
|
||||
void CheckNoFrame(size_t index) {
|
||||
rtc::CritScope lock(&crit_);
|
||||
ASSERT_LT(index, frames_.size());
|
||||
@ -246,7 +259,7 @@ TEST_F(TestFrameBuffer2, WaitForFrame) {
|
||||
uint32_t ts = Rand();
|
||||
|
||||
ExtractFrame(50);
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
CheckFrame(0, pid, 0);
|
||||
}
|
||||
|
||||
@ -254,13 +267,11 @@ TEST_F(TestFrameBuffer2, OneSuperFrame) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
ExtractFrame();
|
||||
InsertFrame(pid, 1, ts, true);
|
||||
InsertFrame(pid, 0, ts, false, false);
|
||||
InsertFrame(pid, 1, ts, true, true);
|
||||
ExtractFrame();
|
||||
|
||||
CheckFrame(0, pid, 0);
|
||||
CheckFrame(1, pid, 1);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, SetPlayoutDelay) {
|
||||
@ -293,8 +304,8 @@ TEST_F(TestFrameBuffer2, DISABLED_OneUnorderedSuperFrame) {
|
||||
uint32_t ts = Rand();
|
||||
|
||||
ExtractFrame(50);
|
||||
InsertFrame(pid, 1, ts, true);
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 1, ts, true, true);
|
||||
InsertFrame(pid, 0, ts, false, false);
|
||||
ExtractFrame();
|
||||
|
||||
CheckFrame(0, pid, 0);
|
||||
@ -305,14 +316,14 @@ TEST_F(TestFrameBuffer2, DISABLED_OneLayerStreamReordered) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
CheckFrame(0, pid, 0);
|
||||
for (int i = 1; i < 10; i += 2) {
|
||||
ExtractFrame(50);
|
||||
InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, pid + i);
|
||||
InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, true, pid + i);
|
||||
clock_.AdvanceTimeMilliseconds(kFps10);
|
||||
InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1);
|
||||
InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1);
|
||||
clock_.AdvanceTimeMilliseconds(kFps10);
|
||||
ExtractFrame();
|
||||
CheckFrame(i, pid + i, 0);
|
||||
@ -330,9 +341,9 @@ TEST_F(TestFrameBuffer2, MissingFrame) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid + 2, 0, ts, false, pid);
|
||||
InsertFrame(pid + 3, 0, ts, false, pid + 1, pid + 2);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
InsertFrame(pid + 2, 0, ts, false, true, pid);
|
||||
InsertFrame(pid + 3, 0, ts, false, true, pid + 1, pid + 2);
|
||||
ExtractFrame();
|
||||
ExtractFrame();
|
||||
ExtractFrame();
|
||||
@ -346,11 +357,11 @@ TEST_F(TestFrameBuffer2, OneLayerStream) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
CheckFrame(0, pid, 0);
|
||||
for (int i = 1; i < 10; ++i) {
|
||||
InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1);
|
||||
InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1);
|
||||
ExtractFrame();
|
||||
clock_.AdvanceTimeMilliseconds(kFps10);
|
||||
CheckFrame(i, pid + i, 0);
|
||||
@ -361,12 +372,13 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid + 1, 0, ts + kFps20, false, pid);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
InsertFrame(pid + 1, 0, ts + kFps20, false, true, pid);
|
||||
for (int i = 2; i < 10; i += 2) {
|
||||
uint32_t ts_tl0 = ts + i / 2 * kFps10;
|
||||
InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 2);
|
||||
InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, pid + i, pid + i - 1);
|
||||
InsertFrame(pid + i, 0, ts_tl0, false, true, pid + i - 2);
|
||||
InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, true, pid + i,
|
||||
pid + i - 1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
@ -386,49 +398,15 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) {
|
||||
CheckNoFrame(9);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, DropSpatialLayerSlowDecoder) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 1, ts, false);
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
uint32_t ts_tl0 = ts + i * kFps10;
|
||||
InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 1);
|
||||
InsertFrame(pid + i, 1, ts_tl0, false, pid + i - 1);
|
||||
}
|
||||
|
||||
ExtractFrame();
|
||||
ExtractFrame();
|
||||
clock_.AdvanceTimeMilliseconds(57);
|
||||
for (int i = 2; i < 12; ++i) {
|
||||
ExtractFrame();
|
||||
clock_.AdvanceTimeMilliseconds(57);
|
||||
}
|
||||
|
||||
CheckFrame(0, pid, 0);
|
||||
CheckFrame(1, pid, 1);
|
||||
CheckFrame(2, pid + 1, 0);
|
||||
CheckFrame(3, pid + 1, 1);
|
||||
CheckFrame(4, pid + 2, 0);
|
||||
CheckFrame(5, pid + 2, 1);
|
||||
CheckFrame(6, pid + 3, 0);
|
||||
CheckFrame(7, pid + 4, 0);
|
||||
CheckFrame(8, pid + 5, 0);
|
||||
CheckNoFrame(9);
|
||||
CheckNoFrame(10);
|
||||
CheckNoFrame(11);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, InsertLateFrame) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
InsertFrame(pid + 2, 0, ts, false);
|
||||
InsertFrame(pid + 2, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
InsertFrame(pid + 1, 0, ts, false, pid);
|
||||
InsertFrame(pid + 1, 0, ts, false, true, pid);
|
||||
ExtractFrame();
|
||||
|
||||
CheckFrame(0, pid, 0);
|
||||
@ -441,12 +419,12 @@ TEST_F(TestFrameBuffer2, ProtectionMode) {
|
||||
uint32_t ts = Rand();
|
||||
|
||||
EXPECT_CALL(jitter_estimator_, GetJitterEstimate(1.0));
|
||||
InsertFrame(pid, 0, ts, false);
|
||||
InsertFrame(pid, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
|
||||
buffer_->SetProtectionMode(kProtectionNackFEC);
|
||||
EXPECT_CALL(jitter_estimator_, GetJitterEstimate(0.0));
|
||||
InsertFrame(pid + 1, 0, ts, false);
|
||||
InsertFrame(pid + 1, 0, ts, false, true);
|
||||
ExtractFrame();
|
||||
}
|
||||
|
||||
@ -454,45 +432,45 @@ TEST_F(TestFrameBuffer2, NoContinuousFrame) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, pid));
|
||||
EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, true, pid));
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, LastContinuousFrameSingleLayer) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1));
|
||||
EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, pid));
|
||||
EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, pid + 3));
|
||||
EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false));
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, true, pid + 1));
|
||||
EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, true, pid));
|
||||
EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, true, pid + 3));
|
||||
EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false, true));
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, LastContinuousFrameTwoLayers) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, pid));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, pid + 1));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, pid + 2));
|
||||
EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, pid));
|
||||
EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, pid + 2));
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, false));
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true, true));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, true, pid));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, false, pid + 1));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, true, pid + 1));
|
||||
EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, false, pid + 2));
|
||||
EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, false, pid));
|
||||
EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, true, pid + 2));
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, PictureIdJumpBack) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false));
|
||||
EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, pid));
|
||||
EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true));
|
||||
EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, true, pid));
|
||||
ExtractFrame();
|
||||
CheckFrame(0, pid, 0);
|
||||
|
||||
// Jump back in pid but increase ts.
|
||||
EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false));
|
||||
EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false, true));
|
||||
ExtractFrame();
|
||||
ExtractFrame();
|
||||
CheckFrame(1, pid - 1, 0);
|
||||
@ -511,6 +489,7 @@ TEST_F(TestFrameBuffer2, StatsCallback) {
|
||||
|
||||
{
|
||||
std::unique_ptr<FrameObjectFake> frame(new FrameObjectFake());
|
||||
frame->VerifyAndAllocate(kFrameSize);
|
||||
frame->SetSize(kFrameSize);
|
||||
frame->id.picture_id = pid;
|
||||
frame->id.spatial_layer = 0;
|
||||
@ -526,42 +505,42 @@ TEST_F(TestFrameBuffer2, StatsCallback) {
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, ForwardJumps) {
|
||||
EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false));
|
||||
EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, 5453));
|
||||
EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, true, 5453));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false));
|
||||
EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false));
|
||||
EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, 29804));
|
||||
EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, true, 29804));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, 29805));
|
||||
EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, true, 29805));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false));
|
||||
EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false));
|
||||
EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, DuplicateFrames) {
|
||||
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false));
|
||||
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false));
|
||||
EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true));
|
||||
}
|
||||
|
||||
// TODO(philipel): implement more unittests related to invalid references.
|
||||
TEST_F(TestFrameBuffer2, InvalidReferences) {
|
||||
EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, 2));
|
||||
EXPECT_EQ(1, InsertFrame(1, 0, 2000, false));
|
||||
EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, true, 2));
|
||||
EXPECT_EQ(1, InsertFrame(1, 0, 2000, false, true));
|
||||
ExtractFrame();
|
||||
EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, 1));
|
||||
EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, true, 1));
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, KeyframeRequired) {
|
||||
EXPECT_EQ(1, InsertFrame(1, 0, 1000, false));
|
||||
EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, 1));
|
||||
EXPECT_EQ(3, InsertFrame(3, 0, 3000, false));
|
||||
EXPECT_EQ(1, InsertFrame(1, 0, 1000, false, true));
|
||||
EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, true, 1));
|
||||
EXPECT_EQ(3, InsertFrame(3, 0, 3000, false, true));
|
||||
ExtractFrame();
|
||||
ExtractFrame(0, true);
|
||||
ExtractFrame();
|
||||
@ -575,42 +554,81 @@ TEST_F(TestFrameBuffer2, KeyframeClearsFullBuffer) {
|
||||
const int kMaxBufferSize = 600;
|
||||
|
||||
for (int i = 1; i <= kMaxBufferSize; ++i)
|
||||
EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, i - 1));
|
||||
EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, true, i - 1));
|
||||
ExtractFrame();
|
||||
CheckNoFrame(0);
|
||||
|
||||
EXPECT_EQ(
|
||||
kMaxBufferSize + 1,
|
||||
InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000, false));
|
||||
EXPECT_EQ(kMaxBufferSize + 1,
|
||||
InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000,
|
||||
false, true));
|
||||
ExtractFrame();
|
||||
CheckFrame(1, kMaxBufferSize + 1, 0);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, DontUpdateOnUndecodableFrame) {
|
||||
InsertFrame(1, 0, 0, false);
|
||||
InsertFrame(1, 0, 0, false, true);
|
||||
ExtractFrame(0, true);
|
||||
InsertFrame(3, 0, 0, false, 2, 0);
|
||||
InsertFrame(3, 0, 0, false, 0);
|
||||
InsertFrame(2, 0, 0, false);
|
||||
InsertFrame(3, 0, 0, false, true, 2, 0);
|
||||
InsertFrame(3, 0, 0, false, true, 0);
|
||||
InsertFrame(2, 0, 0, false, true);
|
||||
ExtractFrame(0, true);
|
||||
ExtractFrame(0, true);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, DontDecodeOlderTimestamp) {
|
||||
InsertFrame(2, 0, 1, false);
|
||||
InsertFrame(1, 0, 2, false); // Older picture id but newer timestamp.
|
||||
InsertFrame(2, 0, 1, false, true);
|
||||
InsertFrame(1, 0, 2, false, true); // Older picture id but newer timestamp.
|
||||
ExtractFrame(0);
|
||||
ExtractFrame(0);
|
||||
CheckFrame(0, 1, 0);
|
||||
CheckNoFrame(1);
|
||||
|
||||
InsertFrame(3, 0, 4, false);
|
||||
InsertFrame(4, 0, 3, false); // Newer picture id but older timestamp.
|
||||
InsertFrame(3, 0, 4, false, true);
|
||||
InsertFrame(4, 0, 3, false, true); // Newer picture id but older timestamp.
|
||||
ExtractFrame(0);
|
||||
ExtractFrame(0);
|
||||
CheckFrame(2, 3, 0);
|
||||
CheckNoFrame(3);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, CombineFramesToSuperframe) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false, false);
|
||||
InsertFrame(pid, 1, ts, true, true);
|
||||
ExtractFrame(0);
|
||||
ExtractFrame(0);
|
||||
CheckFrame(0, pid, 0);
|
||||
CheckNoFrame(1);
|
||||
// Two frames should be combined and returned together.
|
||||
CheckFrameSize(0, kFrameSize * 2);
|
||||
}
|
||||
|
||||
TEST_F(TestFrameBuffer2, HigherSpatialLayerNonDecodable) {
|
||||
uint16_t pid = Rand();
|
||||
uint32_t ts = Rand();
|
||||
|
||||
InsertFrame(pid, 0, ts, false, false);
|
||||
InsertFrame(pid, 1, ts, true, true);
|
||||
|
||||
ExtractFrame(0);
|
||||
CheckFrame(0, pid, 0);
|
||||
|
||||
InsertFrame(pid + 1, 1, ts + kFps20, false, true, pid);
|
||||
InsertFrame(pid + 2, 0, ts + kFps10, false, false, pid);
|
||||
InsertFrame(pid + 2, 1, ts + kFps10, true, true, pid + 1);
|
||||
|
||||
clock_.AdvanceTimeMilliseconds(1000);
|
||||
// Frame pid+1 is decodable but too late.
|
||||
// In superframe pid+2 frame sid=0 is decodable, but frame sid=1 is not.
|
||||
// Incorrect implementation might skip pid+1 frame and output undecodable
|
||||
// pid+2 instead.
|
||||
ExtractFrame();
|
||||
ExtractFrame();
|
||||
CheckFrame(1, pid + 1, 1);
|
||||
CheckFrame(2, pid + 2, 0);
|
||||
}
|
||||
|
||||
} // namespace video_coding
|
||||
} // namespace webrtc
|
||||
|
||||
@ -104,6 +104,7 @@ RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer,
|
||||
timing_.receive_finish_ms = last_packet->receive_time_ms;
|
||||
}
|
||||
timing_.flags = last_packet->video_header.video_timing.flags;
|
||||
is_last_spatial_layer = last_packet->markerBit;
|
||||
}
|
||||
|
||||
RtpFrameObject::~RtpFrameObject() {
|
||||
|
||||
@ -489,12 +489,24 @@ RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9(
|
||||
UnwrapPictureIds(frame);
|
||||
return kHandOff;
|
||||
}
|
||||
} else {
|
||||
if (frame->frame_type() == kVideoFrameKey) {
|
||||
} else if (frame->frame_type() == kVideoFrameKey) {
|
||||
if (frame->id.spatial_layer == 0) {
|
||||
RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
|
||||
return kDrop;
|
||||
}
|
||||
const auto gof_info_it = gof_info_.find(unwrapped_tl0);
|
||||
if (gof_info_it == gof_info_.end())
|
||||
return kStash;
|
||||
|
||||
info = &gof_info_it->second;
|
||||
|
||||
if (frame->frame_type() == kVideoFrameKey) {
|
||||
frame->num_references = 0;
|
||||
FrameReceivedVp9(frame->id.picture_id, info);
|
||||
UnwrapPictureIds(frame);
|
||||
return kHandOff;
|
||||
}
|
||||
} else {
|
||||
auto gof_info_it = gof_info_.find(
|
||||
(codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
|
||||
|
||||
|
||||
@ -496,6 +496,14 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) {
|
||||
VideoSendTiming::kInvalid;
|
||||
webrtc_rtp_header.video_header().is_last_packet_in_frame =
|
||||
webrtc_rtp_header.header.markerBit;
|
||||
if (parsed_payload.video_header().codec == kVideoCodecVP9) {
|
||||
const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
|
||||
parsed_payload.video_header().video_type_header);
|
||||
webrtc_rtp_header.video_header().is_last_packet_in_frame |=
|
||||
codec_header.end_of_frame;
|
||||
webrtc_rtp_header.video_header().is_first_packet_in_frame |=
|
||||
codec_header.beginning_of_frame;
|
||||
}
|
||||
|
||||
packet.GetExtension<VideoOrientation>(
|
||||
&webrtc_rtp_header.video_header().rotation);
|
||||
|
||||
@ -670,6 +670,10 @@ void VideoQualityTest::SetupVideo(Transport* send_transport,
|
||||
vp9_settings.numberOfSpatialLayers = static_cast<unsigned char>(
|
||||
params_.ss[video_idx].num_spatial_layers);
|
||||
vp9_settings.interLayerPred = params_.ss[video_idx].inter_layer_pred;
|
||||
// High FPS vp9 screenshare requires flexible mode.
|
||||
if (params_.video[video_idx].fps > 5) {
|
||||
vp9_settings.flexibleMode = true;
|
||||
}
|
||||
video_encoder_configs_[video_idx].encoder_specific_settings =
|
||||
new rtc::RefCountedObject<
|
||||
VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings);
|
||||
|
||||
@ -381,10 +381,6 @@ void VideoReceiveStream::RequestKeyFrame() {
|
||||
|
||||
void VideoReceiveStream::OnCompleteFrame(
|
||||
std::unique_ptr<video_coding::EncodedFrame> frame) {
|
||||
// TODO(webrtc:9249): Workaround to allow decoding of VP9 SVC stream with
|
||||
// partially enabled inter-layer prediction.
|
||||
frame->id.spatial_layer = 0;
|
||||
|
||||
// TODO(https://bugs.webrtc.org/9974): Consider removing this workaround.
|
||||
int64_t time_now_ms = rtc::TimeMillis();
|
||||
if (last_complete_frame_time_ms_ > 0 &&
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user