From a669a3a0dc9d20313c3e437d4c0ed5003f545727 Mon Sep 17 00:00:00 2001 From: stefan Date: Thu, 6 Oct 2016 05:04:52 -0700 Subject: [PATCH] Revert "Revert of Use sps and pps to determine decodability of H.264 frames. (patchset #4 id:60001 of https://codereview.webrtc.org/2341713002/ )" This reverts commit 3cdfcd88a14449a9b116cb6149e1348d3a1e4cb2. NOPRESUBMIT=true BUG=webrtc:6208 Review-Url: https://codereview.webrtc.org/2385143002 Cr-Commit-Position: refs/heads/master@{#14551} --- .../rtp_rtcp/source/rtp_format_h264.cc | 37 ++++++-- .../source/rtp_format_h264_unittest.cc | 28 ++++-- webrtc/modules/video_coding/decoding_state.cc | 78 ++++++++++++++++- webrtc/modules/video_coding/decoding_state.h | 8 ++ webrtc/modules/video_coding/frame_buffer.cc | 4 + webrtc/modules/video_coding/frame_buffer.h | 4 + .../video_coding/jitter_buffer_unittest.cc | 86 ++++++++++++++++++- webrtc/modules/video_coding/session_info.cc | 13 +++ webrtc/modules/video_coding/session_info.h | 3 + 9 files changed, 243 insertions(+), 18 deletions(-) diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc index 19467ca651..81dcb8b9e2 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc @@ -484,6 +484,8 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( parsed_payload->type.Video.width = sps->width; parsed_payload->type.Video.height = sps->height; nalu.sps_id = sps->id; + } else { + LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice."; } parsed_payload->frame_type = kVideoFrameKey; break; @@ -502,18 +504,32 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( } break; } - case H264::NaluType::kSei: - FALLTHROUGH(); case H264::NaluType::kIdr: parsed_payload->frame_type = kVideoFrameKey; FALLTHROUGH(); - default: { + case H264::NaluType::kSlice: { rtc::Optional pps_id = PpsParser::ParsePpsIdFromSlice( &payload_data[start_offset], end_offset - start_offset); - if (pps_id) + if (pps_id) { nalu.pps_id = *pps_id; + } else { + LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " + << static_cast(nalu.type); + } break; } + // Slices below don't contain SPS or PPS ids. + case H264::NaluType::kAud: + case H264::NaluType::kEndOfSequence: + case H264::NaluType::kEndOfStream: + case H264::NaluType::kFiller: + break; + case H264::NaluType::kSei: + parsed_payload->frame_type = kVideoFrameKey; + break; + case H264::NaluType::kStapA: + case H264::NaluType::kFuA: + RTC_NOTREACHED(); } RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; if (h264->nalus_length == kMaxNalusPerPacket) { @@ -547,8 +563,13 @@ bool RtpDepacketizerH264::ParseFuaNalu( length_ -= kNalHeaderSize; rtc::Optional pps_id = PpsParser::ParsePpsIdFromSlice( payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize); - if (pps_id) + if (pps_id) { nalu.pps_id = *pps_id; + } else { + LOG(LS_WARNING) << "Failed to parse PPS from first fragment of FU-A NAL " + "unit with original type: " + << static_cast(nalu.type); + } uint8_t original_nal_header = fnri | original_nal_type; modified_buffer_.reset(new rtc::Buffer()); modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); @@ -570,8 +591,10 @@ bool RtpDepacketizerH264::ParseFuaNalu( RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; h264->packetization_type = kH264FuA; h264->nalu_type = original_nal_type; - h264->nalus[h264->nalus_length] = nalu; - h264->nalus_length = 1; + if (first_fragment) { + h264->nalus[h264->nalus_length] = nalu; + h264->nalus_length = 1; + } return true; } diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc index 81ff499f87..894415da8a 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc @@ -771,10 +771,8 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) { const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; EXPECT_EQ(kH264FuA, h264.packetization_type); EXPECT_EQ(kIdr, h264.nalu_type); - ASSERT_EQ(1u, h264.nalus_length); - EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); - EXPECT_EQ(-1, h264.nalus[0].sps_id); - EXPECT_EQ(-1, h264.nalus[0].pps_id); + // NALU info is only expected for the first FU-A packet. + EXPECT_EQ(0u, h264.nalus_length); } payload = RtpDepacketizer::ParsedPayload(); @@ -787,10 +785,8 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) { const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; EXPECT_EQ(kH264FuA, h264.packetization_type); EXPECT_EQ(kIdr, h264.nalu_type); - ASSERT_EQ(1u, h264.nalus_length); - EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); - EXPECT_EQ(-1, h264.nalus[0].sps_id); - EXPECT_EQ(-1, h264.nalus[0].pps_id); + // NALU info is only expected for the first FU-A packet. + ASSERT_EQ(0u, h264.nalus_length); } } @@ -825,4 +821,20 @@ TEST_F(RtpDepacketizerH264Test, TestShortSpsPacket) { EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); } +TEST_F(RtpDepacketizerH264Test, TestSeiPacket) { + const uint8_t kPayload[] = { + kSei, // F=0, NRI=0, Type=6. + 0x03, 0x03, 0x03, 0x03 // Payload. + }; + RtpDepacketizer::ParsedPayload payload; + ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); + const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; + EXPECT_EQ(kH264SingleNalu, h264.packetization_type); + EXPECT_EQ(kSei, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kSei), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(-1, h264.nalus[0].pps_id); +} + } // namespace webrtc diff --git a/webrtc/modules/video_coding/decoding_state.cc b/webrtc/modules/video_coding/decoding_state.cc index 89be9b66c1..229555287c 100644 --- a/webrtc/modules/video_coding/decoding_state.cc +++ b/webrtc/modules/video_coding/decoding_state.cc @@ -10,6 +10,8 @@ #include "webrtc/modules/video_coding/decoding_state.h" +#include "webrtc/base/logging.h" +#include "webrtc/common_video/h264/h264_common.h" #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/video_coding/frame_buffer.h" #include "webrtc/modules/video_coding/jitter_buffer_common.h" @@ -40,6 +42,8 @@ void VCMDecodingState::Reset() { full_sync_ = true; in_initial_state_ = true; memset(frame_decoded_, 0, sizeof(frame_decoded_)); + received_sps_.clear(); + received_pps_.clear(); } uint32_t VCMDecodingState::time_stamp() const { @@ -74,6 +78,24 @@ void VCMDecodingState::SetState(const VCMFrameBuffer* frame) { temporal_id_ = frame->TemporalId(); tl0_pic_id_ = frame->Tl0PicId(); + for (const NaluInfo& nalu : frame->GetNaluInfos()) { + if (nalu.type == H264::NaluType::kPps) { + if (nalu.pps_id < 0) { + LOG(LS_WARNING) << "Received pps without pps id."; + } else if (nalu.sps_id < 0) { + LOG(LS_WARNING) << "Received pps without sps id."; + } else { + received_pps_[nalu.pps_id] = nalu.sps_id; + } + } else if (nalu.type == H264::NaluType::kSps) { + if (nalu.sps_id < 0) { + LOG(LS_WARNING) << "Received sps without sps id."; + } else { + received_sps_.insert(nalu.sps_id); + } + } + } + if (UsingFlexibleMode(frame)) { uint16_t frame_index = picture_id_ % kFrameDecodedLength; if (in_initial_state_) { @@ -106,6 +128,8 @@ void VCMDecodingState::CopyFrom(const VCMDecodingState& state) { in_initial_state_ = state.in_initial_state_; frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_; memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_)); + received_sps_ = state.received_sps_; + received_pps_ = state.received_pps_; } bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) { @@ -183,8 +207,10 @@ bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const { // A key frame is always considered continuous as it doesn't refer to any // frames and therefore won't introduce any errors even if prior frames are // missing. - if (frame->FrameType() == kVideoFrameKey) + if (frame->FrameType() == kVideoFrameKey && + HaveSpsAndPps(frame->GetNaluInfos())) { return true; + } // When in the initial state we always require a key frame to start decoding. if (in_initial_state_) return false; @@ -205,7 +231,8 @@ bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const { return ContinuousPictureId(frame->PictureId()); } } else { - return ContinuousSeqNum(static_cast(frame->GetLowSeqNum())); + return ContinuousSeqNum(static_cast(frame->GetLowSeqNum())) && + HaveSpsAndPps(frame->GetNaluInfos()); } } @@ -282,4 +309,51 @@ bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const { return diff > kFrameDecodedLength / 2; } +bool VCMDecodingState::HaveSpsAndPps(const std::vector& nalus) const { + std::set new_sps; + std::map new_pps; + for (const NaluInfo& nalu : nalus) { + // Check if this nalu actually contains sps/pps information or dependencies. + if (nalu.sps_id == -1 && nalu.pps_id == -1) + continue; + switch (nalu.type) { + case H264::NaluType::kPps: + if (nalu.pps_id < 0) { + LOG(LS_WARNING) << "Received pps without pps id."; + } else if (nalu.sps_id < 0) { + LOG(LS_WARNING) << "Received pps without sps id."; + } else { + new_pps[nalu.pps_id] = nalu.sps_id; + } + break; + case H264::NaluType::kSps: + if (nalu.sps_id < 0) { + LOG(LS_WARNING) << "Received sps without sps id."; + } else { + new_sps.insert(nalu.sps_id); + } + break; + default: { + int needed_sps = -1; + auto pps_it = new_pps.find(nalu.pps_id); + if (pps_it != new_pps.end()) { + needed_sps = pps_it->second; + } else { + auto pps_it2 = received_pps_.find(nalu.pps_id); + if (pps_it2 == received_pps_.end()) { + return false; + } + needed_sps = pps_it2->second; + } + if (new_sps.find(needed_sps) == new_sps.end() && + received_sps_.find(needed_sps) == received_sps_.end()) { + return false; + } + break; + } + } + } + return true; +} + } // namespace webrtc diff --git a/webrtc/modules/video_coding/decoding_state.h b/webrtc/modules/video_coding/decoding_state.h index f4ea8ae081..c7340809a7 100644 --- a/webrtc/modules/video_coding/decoding_state.h +++ b/webrtc/modules/video_coding/decoding_state.h @@ -11,11 +11,16 @@ #ifndef WEBRTC_MODULES_VIDEO_CODING_DECODING_STATE_H_ #define WEBRTC_MODULES_VIDEO_CODING_DECODING_STATE_H_ +#include +#include +#include + #include "webrtc/typedefs.h" namespace webrtc { // Forward declarations +struct NaluInfo; class VCMFrameBuffer; class VCMPacket; @@ -61,6 +66,7 @@ class VCMDecodingState { bool UsingPictureId(const VCMFrameBuffer* frame) const; bool UsingFlexibleMode(const VCMFrameBuffer* frame) const; bool AheadOfFramesDecodedClearedTo(uint16_t index) const; + bool HaveSpsAndPps(const std::vector& nalus) const; // Keep state of last decoded frame. // TODO(mikhal/stefan): create designated classes to handle these types. @@ -75,6 +81,8 @@ class VCMDecodingState { // Used to check references in flexible mode. bool frame_decoded_[kFrameDecodedLength]; uint16_t frame_decoded_cleared_to_; + std::set received_sps_; + std::map received_pps_; }; } // namespace webrtc diff --git a/webrtc/modules/video_coding/frame_buffer.cc b/webrtc/modules/video_coding/frame_buffer.cc index b332a00c8d..bb8ec93713 100644 --- a/webrtc/modules/video_coding/frame_buffer.cc +++ b/webrtc/modules/video_coding/frame_buffer.cc @@ -66,6 +66,10 @@ bool VCMFrameBuffer::NonReference() const { return _sessionInfo.NonReference(); } +std::vector VCMFrameBuffer::GetNaluInfos() const { + return _sessionInfo.GetNaluInfos(); +} + void VCMFrameBuffer::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) { _sessionInfo.SetGofInfo(gof_info, idx); // TODO(asapersson): Consider adding hdr->VP9.ref_picture_id for testing. diff --git a/webrtc/modules/video_coding/frame_buffer.h b/webrtc/modules/video_coding/frame_buffer.h index f5a707efe4..95c598534c 100644 --- a/webrtc/modules/video_coding/frame_buffer.h +++ b/webrtc/modules/video_coding/frame_buffer.h @@ -11,6 +11,8 @@ #ifndef WEBRTC_MODULES_VIDEO_CODING_FRAME_BUFFER_H_ #define WEBRTC_MODULES_VIDEO_CODING_FRAME_BUFFER_H_ +#include + #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/video_coding/include/video_coding.h" #include "webrtc/modules/video_coding/encoded_frame.h" @@ -61,6 +63,8 @@ class VCMFrameBuffer : public VCMEncodedFrame { int Tl0PicId() const; bool NonReference() const; + std::vector GetNaluInfos() const; + void SetGofInfo(const GofInfoVP9& gof_info, size_t idx); // Increments a counter to keep track of the number of packets of this frame diff --git a/webrtc/modules/video_coding/jitter_buffer_unittest.cc b/webrtc/modules/video_coding/jitter_buffer_unittest.cc index f2e5f10deb..425a4e9745 100644 --- a/webrtc/modules/video_coding/jitter_buffer_unittest.cc +++ b/webrtc/modules/video_coding/jitter_buffer_unittest.cc @@ -12,7 +12,9 @@ #include #include +#include +#include "webrtc/common_video/h264/h264_common.h" #include "webrtc/modules/video_coding/frame_buffer.h" #include "webrtc/modules/video_coding/jitter_buffer.h" #include "webrtc/modules/video_coding/media_opt_util.h" @@ -1160,6 +1162,89 @@ TEST_F(TestBasicJitterBuffer, H264InsertStartCode) { jitter_buffer_->ReleaseFrame(frame_out); } +TEST_F(TestBasicJitterBuffer, SpsAndPpsHandling) { + jitter_buffer_->SetDecodeErrorMode(kNoErrors); + + packet_->timestamp = timestamp_; + packet_->frameType = kVideoFrameKey; + packet_->isFirstPacket = true; + packet_->markerBit = true; + packet_->codec = kVideoCodecH264; + packet_->video_header.codec = kRtpVideoH264; + packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kIdr; + packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr; + packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1; + packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0; + packet_->video_header.codecHeader.H264.nalus_length = 1; + bool retransmitted = false; + EXPECT_EQ(kCompleteSession, + jitter_buffer_->InsertPacket(*packet_, &retransmitted)); + // Not decodable since sps and pps are missing. + EXPECT_EQ(nullptr, DecodeCompleteFrame()); + + timestamp_ += 3000; + packet_->timestamp = timestamp_; + ++seq_num_; + packet_->seqNum = seq_num_; + packet_->frameType = kVideoFrameKey; + packet_->isFirstPacket = true; + packet_->markerBit = false; + packet_->codec = kVideoCodecH264; + packet_->video_header.codec = kRtpVideoH264; + packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kStapA; + packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kSps; + packet_->video_header.codecHeader.H264.nalus[0].sps_id = 0; + packet_->video_header.codecHeader.H264.nalus[0].pps_id = -1; + packet_->video_header.codecHeader.H264.nalus[1].type = H264::NaluType::kPps; + packet_->video_header.codecHeader.H264.nalus[1].sps_id = 0; + packet_->video_header.codecHeader.H264.nalus[1].pps_id = 0; + packet_->video_header.codecHeader.H264.nalus_length = 2; + // Not complete since the marker bit hasn't been received. + EXPECT_EQ(kIncomplete, + jitter_buffer_->InsertPacket(*packet_, &retransmitted)); + + ++seq_num_; + packet_->seqNum = seq_num_; + packet_->frameType = kVideoFrameKey; + packet_->isFirstPacket = false; + packet_->markerBit = true; + packet_->codec = kVideoCodecH264; + packet_->video_header.codec = kRtpVideoH264; + packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kIdr; + packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kIdr; + packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1; + packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0; + packet_->video_header.codecHeader.H264.nalus_length = 1; + // Complete and decodable since the pps and sps are received in the first + // packet of this frame. + EXPECT_EQ(kCompleteSession, + jitter_buffer_->InsertPacket(*packet_, &retransmitted)); + VCMEncodedFrame* frame_out = DecodeCompleteFrame(); + ASSERT_NE(nullptr, frame_out); + jitter_buffer_->ReleaseFrame(frame_out); + + timestamp_ += 3000; + packet_->timestamp = timestamp_; + ++seq_num_; + packet_->seqNum = seq_num_; + packet_->frameType = kVideoFrameDelta; + packet_->isFirstPacket = true; + packet_->markerBit = true; + packet_->codec = kVideoCodecH264; + packet_->video_header.codec = kRtpVideoH264; + packet_->video_header.codecHeader.H264.nalu_type = H264::NaluType::kSlice; + packet_->video_header.codecHeader.H264.nalus[0].type = H264::NaluType::kSlice; + packet_->video_header.codecHeader.H264.nalus[0].sps_id = -1; + packet_->video_header.codecHeader.H264.nalus[0].pps_id = 0; + packet_->video_header.codecHeader.H264.nalus_length = 1; + // Complete and decodable since sps, pps and key frame has been received. + EXPECT_EQ(kCompleteSession, + jitter_buffer_->InsertPacket(*packet_, &retransmitted)); + frame_out = DecodeCompleteFrame(); + ASSERT_NE(nullptr, frame_out); + jitter_buffer_->ReleaseFrame(frame_out); +} + // Test threshold conditions of decodable state. TEST_F(TestBasicJitterBuffer, PacketLossWithSelectiveErrorsThresholdCheck) { jitter_buffer_->SetDecodeErrorMode(kSelectiveErrors); @@ -2633,5 +2718,4 @@ TEST_F(TestJitterBufferNack, ResetByFutureKeyFrameDoesntError) { nack_list = jitter_buffer_->GetNackList(&extended); EXPECT_EQ(0u, nack_list.size()); } - } // namespace webrtc diff --git a/webrtc/modules/video_coding/session_info.cc b/webrtc/modules/video_coding/session_info.cc index 8f136eecb4..39c1c0b5b6 100644 --- a/webrtc/modules/video_coding/session_info.cc +++ b/webrtc/modules/video_coding/session_info.cc @@ -111,6 +111,19 @@ bool VCMSessionInfo::NonReference() const { return packets_.front().video_header.codecHeader.VP8.nonReference; } +std::vector VCMSessionInfo::GetNaluInfos() const { + if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoH264) + return std::vector(); + std::vector nalu_infos; + for (const VCMPacket& packet : packets_) { + for (size_t i = 0; i < packet.video_header.codecHeader.H264.nalus_length; + ++i) { + nalu_infos.push_back(packet.video_header.codecHeader.H264.nalus[i]); + } + } + return nalu_infos; +} + void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) { if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoVp9 || packets_.front().video_header.codecHeader.VP9.flexible_mode) { diff --git a/webrtc/modules/video_coding/session_info.h b/webrtc/modules/video_coding/session_info.h index c2de1acfde..a4eb405d0b 100644 --- a/webrtc/modules/video_coding/session_info.h +++ b/webrtc/modules/video_coding/session_info.h @@ -12,6 +12,7 @@ #define WEBRTC_MODULES_VIDEO_CODING_SESSION_INFO_H_ #include +#include #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/video_coding/include/video_coding.h" @@ -81,6 +82,8 @@ class VCMSessionInfo { int Tl0PicId() const; bool NonReference() const; + std::vector GetNaluInfos() const; + void SetGofInfo(const GofInfoVP9& gof_info, size_t idx); // The number of packets discarded because the decoder can't make use of