Add support for VP9 packetization/depacketization.

RTP payload format for VP9: https://www.ietf.org/id/draft-uberti-payload-vp9-01.txt BUG=webrtc:4148, webrtc:4168, chromium:500602 TBR=mflodman Review URL: https://codereview.webrtc.org/1232023006 Cr-Commit-Position: refs/heads/master@{#9649}
2015-07-28 04:02:54 -07:00 · 2015-07-28 04:02:54 -07:00 · f38ea3caa3
commit f38ea3caa3
parent 95b8718dc6
9 changed files with 1643 additions and 1 deletions
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@ -32,8 +32,15 @@ struct RTPAudioHeader {
 };

 const int16_t kNoPictureId = -1;
+const int16_t kMaxOneBytePictureId = 0x7F;    // 7 bits
+const int16_t kMaxTwoBytePictureId = 0x7FFF;  // 15 bits
 const int16_t kNoTl0PicIdx = -1;
 const uint8_t kNoTemporalIdx = 0xFF;
+const uint8_t kNoSpatialIdx = 0xFF;
+const uint8_t kNoGofIdx = 0xFF;
+const size_t kMaxVp9RefPics = 3;
+const size_t kMaxVp9FramesInGof = 16;
+const size_t kMaxVp9NumberOfSpatialLayers = 8;
 const int kNoKeyIdx = -1;

 struct RTPVideoHeaderVP8 {
@ -62,6 +69,80 @@ struct RTPVideoHeaderVP8 {
                              // in a VP8 partition. Otherwise false
 };

+struct GofInfoVP9 {
+  void CopyGofInfoVP9(const GofInfoVP9& src) {
+    num_frames_in_gof = src.num_frames_in_gof;
+    for (size_t i = 0; i < num_frames_in_gof; ++i) {
+      temporal_idx[i] = src.temporal_idx[i];
+      temporal_up_switch[i] = src.temporal_up_switch[i];
+      num_ref_pics[i] = src.num_ref_pics[i];
+      for (size_t r = 0; r < num_ref_pics[i]; ++r) {
+        pid_diff[i][r] = src.pid_diff[i][r];
+      }
+    }
+  }
+
+  size_t num_frames_in_gof;
+  uint8_t temporal_idx[kMaxVp9FramesInGof];
+  bool temporal_up_switch[kMaxVp9FramesInGof];
+  size_t num_ref_pics[kMaxVp9FramesInGof];
+  int16_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
+};
+
+struct RTPVideoHeaderVP9 {
+  void InitRTPVideoHeaderVP9() {
+    inter_pic_predicted = false;
+    flexible_mode = false;
+    beginning_of_frame = false;
+    end_of_frame = false;
+    ss_data_available = false;
+    picture_id = kNoPictureId;
+    max_picture_id = kMaxTwoBytePictureId;
+    tl0_pic_idx = kNoTl0PicIdx;
+    temporal_idx = kNoTemporalIdx;
+    spatial_idx = kNoSpatialIdx;
+    temporal_up_switch = false;
+    inter_layer_predicted = false;
+    gof_idx = kNoGofIdx;
+    num_ref_pics = 0;
+  }
+
+  bool inter_pic_predicted;  // This layer frame is dependent on previously
+                             // coded frame(s).
+  bool flexible_mode;        // This frame is in flexible mode.
+  bool beginning_of_frame;   // True if this packet is the first in a VP9 layer
+                             // frame.
+  bool end_of_frame;  // True if this packet is the last in a VP9 layer frame.
+  bool ss_data_available;  // True if SS data is available in this payload
+                           // descriptor.
+  int16_t picture_id;      // PictureID index, 15 bits;
+                           // kNoPictureId if PictureID does not exist.
+  int16_t max_picture_id;  // Maximum picture ID index; either 0x7F or 0x7FFF;
+  int16_t tl0_pic_idx;     // TL0PIC_IDX, 8 bits;
+                           // kNoTl0PicIdx means no value provided.
+  uint8_t temporal_idx;    // Temporal layer index, or kNoTemporalIdx.
+  uint8_t spatial_idx;     // Spatial layer index, or kNoSpatialIdx.
+  bool temporal_up_switch;  // True if upswitch to higher frame rate is possible
+                            // starting from this frame.
+  bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
+                               // layer frame.
+
+  uint8_t gof_idx;  // Index to predefined temporal frame info in SS data.
+
+  size_t num_ref_pics;  // Number of reference pictures used by this layer
+                        // frame.
+  int16_t pid_diff[kMaxVp9RefPics];  // P_DIFF signaled to derive the PictureID
+                                     // of the reference pictures.
+  int16_t ref_picture_id[kMaxVp9RefPics];  // PictureID of reference pictures.
+
+  // SS data.
+  size_t num_spatial_layers;
+  bool spatial_layer_resolution_present;
+  uint16_t width[kMaxVp9NumberOfSpatialLayers];
+  uint16_t height[kMaxVp9NumberOfSpatialLayers];
+  GofInfoVP9 gof;
+};
+
 // The packetization types that we support: single, aggregated, and fragmented.
 enum H264PacketizationTypes {
  kH264SingleNalu,  // This packet contains a single NAL unit.
@ -85,6 +166,7 @@ struct RTPVideoHeaderH264 {

 union RTPVideoTypeHeader {
  RTPVideoHeaderVP8 VP8;
+  RTPVideoHeaderVP9 VP9;
  RTPVideoHeaderH264 H264;
 };

@ -92,6 +174,7 @@ enum RtpVideoCodecTypes {
  kRtpVideoNone,
  kRtpVideoGeneric,
  kRtpVideoVp8,
+  kRtpVideoVp9,
  kRtpVideoH264
 };
 // Since RTPVideoHeader is used as a member of a union, it can't have a
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@ -243,9 +243,10 @@
            'rtp_rtcp/source/rtcp_utility_unittest.cc',
            'rtp_rtcp/source/rtp_fec_unittest.cc',
            'rtp_rtcp/source/rtp_format_h264_unittest.cc',
-            'rtp_rtcp/source/rtp_format_vp8_unittest.cc',
            'rtp_rtcp/source/rtp_format_vp8_test_helper.cc',
            'rtp_rtcp/source/rtp_format_vp8_test_helper.h',
+            'rtp_rtcp/source/rtp_format_vp8_unittest.cc',
+            'rtp_rtcp/source/rtp_format_vp9_unittest.cc',
            'rtp_rtcp/source/rtp_packet_history_unittest.cc',
            'rtp_rtcp/source/rtp_payload_registry_unittest.cc',
            'rtp_rtcp/source/rtp_rtcp_impl_unittest.cc',
--- a/webrtc/modules/rtp_rtcp/BUILD.gn
+++ b/webrtc/modules/rtp_rtcp/BUILD.gn
@ -60,6 +60,8 @@ source_set("rtp_rtcp") {
    "source/rtp_format_video_generic.h",
    "source/rtp_format_vp8.cc",
    "source/rtp_format_vp8.h",
+    "source/rtp_format_vp9.cc",
+    "source/rtp_format_vp9.h",
    "source/rtp_header_extension.cc",
    "source/rtp_header_extension.h",
    "source/rtp_header_parser.cc",
--- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
+++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
@ -96,6 +96,8 @@
        'source/rtp_format_h264.h',
        'source/rtp_format_vp8.cc',
        'source/rtp_format_vp8.h',
+        'source/rtp_format_vp9.cc',
+        'source/rtp_format_vp9.h',
        'source/rtp_format_video_generic.cc',
        'source/rtp_format_video_generic.h',
        'source/vp8_partition_aggregator.cc',
--- a/webrtc/modules/rtp_rtcp/source/rtp_format.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format.cc
@ -13,6 +13,7 @@
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h"
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"

 namespace webrtc {
 RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type,
@ -25,6 +26,9 @@ RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type,
    case kRtpVideoVp8:
      assert(rtp_type_header != NULL);
      return new RtpPacketizerVp8(rtp_type_header->VP8, max_payload_len);
+    case kRtpVideoVp9:
+      assert(rtp_type_header != NULL);
+      return new RtpPacketizerVp9(rtp_type_header->VP9, max_payload_len);
    case kRtpVideoGeneric:
      return new RtpPacketizerGeneric(frame_type, max_payload_len);
    case kRtpVideoNone:
@ -39,6 +43,8 @@ RtpDepacketizer* RtpDepacketizer::Create(RtpVideoCodecTypes type) {
      return new RtpDepacketizerH264();
    case kRtpVideoVp8:
      return new RtpDepacketizerVp8();
+    case kRtpVideoVp9:
+      return new RtpDepacketizerVp9();
    case kRtpVideoGeneric:
      return new RtpDepacketizerGeneric();
    case kRtpVideoNone:
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc
@ -0,0 +1,764 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include <cmath>
+
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/base/checks.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+
+#define RETURN_FALSE_ON_ERROR(x) \
+  if (!(x)) {                    \
+    return false;                \
+  }
+
+namespace webrtc {
+namespace {
+// Length of VP9 payload descriptors' fixed part.
+const size_t kFixedPayloadDescriptorBytes = 1;
+
+// Packet fragmentation mode. If true, packets are split into (almost) equal
+// sizes. Otherwise, as many bytes as possible are fit into one packet.
+const bool kBalancedMode = true;
+
+const uint32_t kReservedBitValue0 = 0;
+
+uint8_t TemporalIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
+  return (hdr.temporal_idx == kNoTemporalIdx) ? def : hdr.temporal_idx;
+}
+
+uint8_t SpatialIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
+  return (hdr.spatial_idx == kNoSpatialIdx) ? def : hdr.spatial_idx;
+}
+
+int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
+  return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx;
+}
+
+uint8_t GofIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
+  return (hdr.gof_idx == kNoGofIdx) ? def : hdr.gof_idx;
+}
+
+// Picture ID:
+//
+//      +-+-+-+-+-+-+-+-+
+// I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
+//      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
+// M:   | EXTENDED PID  |
+//      +-+-+-+-+-+-+-+-+
+//
+size_t PictureIdLength(const RTPVideoHeaderVP9& hdr) {
+  if (hdr.picture_id == kNoPictureId)
+    return 0;
+  return (hdr.max_picture_id == kMaxOneBytePictureId) ? 1 : 2;
+}
+
+bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) {
+  return PictureIdLength(hdr) > 0;
+}
+
+// Layer indices:
+//
+// Flexible mode (F=1):     Non-flexible mode (F=0):
+//
+//      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
+// L:   |  T  |U|  S  |D|   |GOF_IDX|  S  |D|
+//      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
+//                          |   TL0PICIDX   |
+//                          +-+-+-+-+-+-+-+-+
+//
+size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) {
+  if (hdr.flexible_mode) {
+    return (hdr.temporal_idx == kNoTemporalIdx &&
+            hdr.spatial_idx == kNoSpatialIdx) ? 0 : 1;
+  } else {
+    return (hdr.gof_idx == kNoGofIdx &&
+            hdr.spatial_idx == kNoSpatialIdx) ? 0 : 2;
+  }
+}
+
+bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) {
+  return LayerInfoLength(hdr) > 0;
+}
+
+// Reference indices:
+//
+//      +-+-+-+-+-+-+-+-+  -|           P=1,F=1: At least one reference index
+// P,F: | P_DIFF    |X|N|   .                    has to be specified.
+//      +-+-+-+-+-+-+-+-+   . up to 3 times
+// X:   |EXTENDED P_DIFF|   .               X=1: Extended P_DIFF is used (14
+//      +-+-+-+-+-+-+-+-+  -|                    bits). Else 6 bits are used.
+//                                          N=1: An additional P_DIFF follows
+//                                               current P_DIFF.
+size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) {
+  if (!hdr.inter_pic_predicted || !hdr.flexible_mode)
+    return 0;
+
+  DCHECK_GT(hdr.num_ref_pics, 0U);
+  DCHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics);
+  size_t length = 0;
+  for (size_t i = 0; i < hdr.num_ref_pics; ++i) {
+    length += hdr.pid_diff[i] > 0x3F ? 2 : 1;   // P_DIFF > 6 bits => extended
+  }
+  return length;
+}
+
+// Scalability structure (SS).
+//
+//      +-+-+-+-+-+-+-+-+
+// V:   | N_S |Y|  N_G  |
+//      +-+-+-+-+-+-+-+-+              -|
+// Y:   |     WIDTH     | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+               . N_S + 1 times
+//      |     HEIGHT    | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+// N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
+//      +-+-+-+-+-+-+-+-+              -|            . N_G + 1 times
+//      |    P_DIFF     | (OPTIONAL)    . R times    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+//
+size_t SsDataLength(const RTPVideoHeaderVP9& hdr) {
+  if (!hdr.ss_data_available)
+    return 0;
+
+  DCHECK_GT(hdr.num_spatial_layers, 0U);
+  DCHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
+  DCHECK_GT(hdr.gof.num_frames_in_gof, 0U);
+  DCHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof);
+  size_t length = 1;                           // V
+  if (hdr.spatial_layer_resolution_present) {
+    length += 4 * hdr.num_spatial_layers;      // Y
+  }
+  // N_G
+  length += hdr.gof.num_frames_in_gof;  // T, U, R
+  for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) {
+    DCHECK_LE(hdr.gof.num_ref_pics[i], kMaxVp9RefPics);
+    length += hdr.gof.num_ref_pics[i];  // R times
+  }
+  return length;
+}
+
+size_t PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9& hdr) {
+  return kFixedPayloadDescriptorBytes + PictureIdLength(hdr) +
+         LayerInfoLength(hdr) + RefIndicesLength(hdr);
+}
+
+size_t PayloadDescriptorLength(const RTPVideoHeaderVP9& hdr) {
+  return PayloadDescriptorLengthMinusSsData(hdr) + SsDataLength(hdr);
+}
+
+void QueuePacket(size_t start_pos,
+                 size_t size,
+                 bool layer_begin,
+                 bool layer_end,
+                 RtpPacketizerVp9::PacketInfoQueue* packets) {
+  RtpPacketizerVp9::PacketInfo packet_info;
+  packet_info.payload_start_pos = start_pos;
+  packet_info.size = size;
+  packet_info.layer_begin = layer_begin;
+  packet_info.layer_end = layer_end;
+  packets->push(packet_info);
+}
+
+// Picture ID:
+//
+//      +-+-+-+-+-+-+-+-+
+// I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
+//      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
+// M:   | EXTENDED PID  |
+//      +-+-+-+-+-+-+-+-+
+//
+bool WritePictureId(const RTPVideoHeaderVP9& vp9,
+                    rtc::BitBufferWriter* writer) {
+  bool m_bit = (PictureIdLength(vp9) == 2);
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(m_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.picture_id, m_bit ? 15 : 7));
+  return true;
+}
+
+// Layer indices:
+//
+// Flexible mode (F=1):
+//
+//      +-+-+-+-+-+-+-+-+
+// L:   |  T  |U|  S  |D|
+//      +-+-+-+-+-+-+-+-+
+//
+bool WriteLayerInfoFlexibleMode(const RTPVideoHeaderVP9& vp9,
+                                rtc::BitBufferWriter* writer) {
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1));
+  return true;
+}
+
+// Non-flexible mode (F=0):
+//
+//      +-+-+-+-+-+-+-+-+
+// L:   |GOF_IDX|  S  |D|
+//      +-+-+-+-+-+-+-+-+
+//      |   TL0PICIDX   |
+//      +-+-+-+-+-+-+-+-+
+//
+bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9,
+                                   rtc::BitBufferWriter* writer) {
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(GofIdxField(vp9, 0), 4));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1));
+  RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0)));
+  return true;
+}
+
+bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9,
+                    rtc::BitBufferWriter* writer) {
+  if (vp9.flexible_mode) {
+    return WriteLayerInfoFlexibleMode(vp9, writer);
+  } else {
+    return WriteLayerInfoNonFlexibleMode(vp9, writer);
+  }
+}
+
+// Reference indices:
+//
+//      +-+-+-+-+-+-+-+-+  -|           P=1,F=1: At least one reference index
+// P,F: | P_DIFF    |X|N|   .                    has to be specified.
+//      +-+-+-+-+-+-+-+-+   . up to 3 times
+// X:   |EXTENDED P_DIFF|   .               X=1: Extended P_DIFF is used (14
+//      +-+-+-+-+-+-+-+-+  -|                    bits). Else 6 bits are used.
+//                                          N=1: An additional P_DIFF follows
+//                                               current P_DIFF.
+bool WriteRefIndices(const RTPVideoHeaderVP9& vp9,
+                     rtc::BitBufferWriter* writer) {
+  if (!PictureIdPresent(vp9) ||
+      vp9.num_ref_pics == 0 || vp9.num_ref_pics > kMaxVp9RefPics) {
+    return false;
+  }
+  for (size_t i = 0; i < vp9.num_ref_pics; ++i) {
+    bool x_bit = (vp9.pid_diff[i] > 0x3F);
+    bool n_bit = !(i == vp9.num_ref_pics - 1);
+    if (x_bit) {
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i] >> 8, 6));
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1));
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1));
+      RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.pid_diff[i]));
+    } else {
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 6));
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1));
+      RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1));
+    }
+  }
+  return true;
+}
+
+// Scalability structure (SS).
+//
+//      +-+-+-+-+-+-+-+-+
+// V:   | N_S |Y|  N_G  |
+//      +-+-+-+-+-+-+-+-+              -|
+// Y:   |     WIDTH     | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+               . N_S + 1 times
+//      |     HEIGHT    | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+// N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
+//      +-+-+-+-+-+-+-+-+              -|            . N_G + 1 times
+//      |    P_DIFF     | (OPTIONAL)    . R times    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+//
+bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) {
+  DCHECK_GT(vp9.num_spatial_layers, 0U);
+  DCHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
+  DCHECK_GT(vp9.gof.num_frames_in_gof, 0U);
+  DCHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof);
+
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3));
+  RETURN_FALSE_ON_ERROR(
+      writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_frames_in_gof - 1, 4));
+
+  if (vp9.spatial_layer_resolution_present) {
+    for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
+      RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.width[i]));
+      RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i]));
+    }
+  }
+  for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) {
+    RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3));
+    RETURN_FALSE_ON_ERROR(
+        writer->WriteBits(vp9.gof.temporal_up_switch[i] ? 1 : 0, 1));
+    RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_ref_pics[i], 2));
+    RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 2));
+    for (size_t r = 0; r < vp9.gof.num_ref_pics[i]; ++r) {
+      RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.pid_diff[i][r]));
+    }
+  }
+  return true;
+}
+
+// Picture ID:
+//
+//      +-+-+-+-+-+-+-+-+
+// I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
+//      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
+// M:   | EXTENDED PID  |
+//      +-+-+-+-+-+-+-+-+
+//
+bool ParsePictureId(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
+  uint32_t picture_id;
+  uint32_t m_bit;
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&m_bit, 1));
+  if (m_bit) {
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 15));
+    vp9->max_picture_id = kMaxTwoBytePictureId;
+  } else {
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 7));
+    vp9->max_picture_id = kMaxOneBytePictureId;
+  }
+  vp9->picture_id = picture_id;
+  return true;
+}
+
+// Layer indices (flexible mode):
+//
+//      +-+-+-+-+-+-+-+-+
+// L:   |  T  |U|  S  |D|
+//      +-+-+-+-+-+-+-+-+
+//
+bool ParseLayerInfoFlexibleMode(rtc::BitBuffer* parser,
+                                RTPVideoHeaderVP9* vp9) {
+  uint32_t t, u_bit, s, d_bit;
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1));
+  vp9->temporal_idx = t;
+  vp9->temporal_up_switch = u_bit ? true : false;
+  vp9->spatial_idx = s;
+  vp9->inter_layer_predicted = d_bit ? true : false;
+  return true;
+}
+
+// Layer indices (non-flexible mode):
+//
+//      +-+-+-+-+-+-+-+-+
+// L:   |GOF_IDX|  S  |D|
+//      +-+-+-+-+-+-+-+-+
+//      |   TL0PICIDX   |
+//      +-+-+-+-+-+-+-+-+
+//
+bool ParseLayerInfoNonFlexibleMode(rtc::BitBuffer* parser,
+                                   RTPVideoHeaderVP9* vp9) {
+  uint32_t gof_idx, s, d_bit;
+  uint8_t tl0picidx;
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&gof_idx, 4));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&tl0picidx));
+  vp9->gof_idx = gof_idx;
+  vp9->spatial_idx = s;
+  vp9->inter_layer_predicted = d_bit ? true : false;
+  vp9->tl0_pic_idx = tl0picidx;
+  return true;
+}
+
+bool ParseLayerInfo(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
+  if (vp9->flexible_mode) {
+    return ParseLayerInfoFlexibleMode(parser, vp9);
+  } else {
+    return ParseLayerInfoNonFlexibleMode(parser, vp9);
+  }
+}
+
+// Reference indices:
+//
+//      +-+-+-+-+-+-+-+-+  -|           P=1,F=1: At least one reference index
+// P,F: | P_DIFF    |X|N|   .                    has to be specified.
+//      +-+-+-+-+-+-+-+-+   . up to 3 times
+// X:   |EXTENDED P_DIFF|   .               X=1: Extended P_DIFF is used (14
+//      +-+-+-+-+-+-+-+-+  -|                    bits). Else 6 bits are used.
+//                                          N=1: An additional P_DIFF follows
+//                                               current P_DIFF.
+bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
+  if (vp9->picture_id == kNoPictureId)
+    return false;
+
+  vp9->num_ref_pics = 0;
+  uint32_t n_bit;
+  do {
+    if (vp9->num_ref_pics == kMaxVp9RefPics)
+      return false;
+
+    uint32_t p_diff, x_bit;
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&p_diff, 6));
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&x_bit, 1));
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_bit, 1));
+
+    if (x_bit) {
+      // P_DIFF is 14 bits.
+      uint8_t ext_p_diff;
+      RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&ext_p_diff));
+      p_diff = (p_diff << 8) + ext_p_diff;
+    }
+
+    vp9->pid_diff[vp9->num_ref_pics] = p_diff;
+    uint32_t scaled_pid = vp9->picture_id;
+    while (p_diff > scaled_pid) {
+      scaled_pid += vp9->max_picture_id + 1;
+    }
+    vp9->ref_picture_id[vp9->num_ref_pics++] = scaled_pid - p_diff;
+  } while (n_bit);
+
+  return true;
+}
+
+// Scalability structure (SS).
+//
+//      +-+-+-+-+-+-+-+-+
+// V:   | N_S |Y|  N_G  |
+//      +-+-+-+-+-+-+-+-+              -|
+// Y:   |     WIDTH     | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+               . N_S + 1 times
+//      |     HEIGHT    | (OPTIONAL)    .
+//      +               +               .
+//      |               | (OPTIONAL)    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+// N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
+//      +-+-+-+-+-+-+-+-+              -|            . N_G + 1 times
+//      |    P_DIFF     | (OPTIONAL)    . R times    .
+//      +-+-+-+-+-+-+-+-+              -|           -|
+//
+bool ParseSsData(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
+  uint32_t n_s, y_bit, n_g;
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_s, 3));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&y_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_g, 4));
+  vp9->num_spatial_layers = n_s + 1;
+  vp9->spatial_layer_resolution_present = y_bit ? true : false;
+  vp9->gof.num_frames_in_gof = n_g + 1;
+
+  if (y_bit) {
+    for (size_t i = 0; i < vp9->num_spatial_layers; ++i) {
+      RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->width[i]));
+      RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->height[i]));
+    }
+  }
+  for (size_t i = 0; i < vp9->gof.num_frames_in_gof; ++i) {
+    uint32_t t, u_bit, r;
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3));
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1));
+    RETURN_FALSE_ON_ERROR(parser->ReadBits(&r, 2));
+    RETURN_FALSE_ON_ERROR(parser->ConsumeBits(2));
+    vp9->gof.temporal_idx[i] = t;
+    vp9->gof.temporal_up_switch[i] = u_bit ? true : false;
+    vp9->gof.num_ref_pics[i] = r;
+
+    for (size_t p = 0; p < vp9->gof.num_ref_pics[i]; ++p) {
+      uint8_t p_diff;
+      RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&p_diff));
+      vp9->gof.pid_diff[i][p] = p_diff;
+    }
+  }
+  return true;
+}
+
+// Gets the size of next payload chunk to send. Returns 0 on error.
+size_t CalcNextSize(size_t max_length, size_t rem_bytes) {
+  if (max_length == 0 || rem_bytes == 0) {
+    return 0;
+  }
+  if (kBalancedMode) {
+    size_t num_frags = std::ceil(static_cast<double>(rem_bytes) / max_length);
+    return static_cast<size_t>(
+        static_cast<double>(rem_bytes) / num_frags + 0.5);
+  }
+  return max_length >= rem_bytes ? rem_bytes : max_length;
+}
+}  // namespace
+
+
+RtpPacketizerVp9::RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr,
+                                   size_t max_payload_length)
+    : hdr_(hdr),
+      max_payload_length_(max_payload_length),
+      payload_(nullptr),
+      payload_size_(0) {
+}
+
+RtpPacketizerVp9::~RtpPacketizerVp9() {
+}
+
+ProtectionType RtpPacketizerVp9::GetProtectionType() {
+  bool protect =
+      hdr_.temporal_idx == 0 || hdr_.temporal_idx == kNoTemporalIdx;
+  return protect ? kProtectedPacket : kUnprotectedPacket;
+}
+
+StorageType RtpPacketizerVp9::GetStorageType(uint32_t retransmission_settings) {
+  StorageType storage = kAllowRetransmission;
+  if (hdr_.temporal_idx == 0 &&
+      !(retransmission_settings & kRetransmitBaseLayer)) {
+    storage = kDontRetransmit;
+  } else if (hdr_.temporal_idx != kNoTemporalIdx && hdr_.temporal_idx > 0 &&
+             !(retransmission_settings & kRetransmitHigherLayers)) {
+    storage = kDontRetransmit;
+  }
+  return storage;
+}
+
+std::string RtpPacketizerVp9::ToString() {
+  return "RtpPacketizerVp9";
+}
+
+void RtpPacketizerVp9::SetPayloadData(
+    const uint8_t* payload,
+    size_t payload_size,
+    const RTPFragmentationHeader* fragmentation) {
+  payload_ = payload;
+  payload_size_ = payload_size;
+  GeneratePackets();
+}
+
+void RtpPacketizerVp9::GeneratePackets() {
+  if (max_payload_length_ < PayloadDescriptorLength(hdr_) + 1) {
+    LOG(LS_ERROR) << "Payload header and one payload byte won't fit.";
+    return;
+  }
+  size_t bytes_processed = 0;
+  while (bytes_processed < payload_size_) {
+    size_t rem_bytes = payload_size_ - bytes_processed;
+    size_t rem_payload_len = max_payload_length_ -
+         (bytes_processed ? PayloadDescriptorLengthMinusSsData(hdr_)
+                          : PayloadDescriptorLength(hdr_));
+
+    size_t packet_bytes = CalcNextSize(rem_payload_len, rem_bytes);
+    if (packet_bytes == 0) {
+      LOG(LS_ERROR) << "Failed to generate VP9 packets.";
+      while (!packets_.empty())
+        packets_.pop();
+      return;
+    }
+    QueuePacket(bytes_processed, packet_bytes, bytes_processed == 0,
+                rem_bytes == packet_bytes, &packets_);
+    bytes_processed += packet_bytes;
+  }
+  assert(bytes_processed == payload_size_);
+}
+
+bool RtpPacketizerVp9::NextPacket(uint8_t* buffer,
+                                  size_t* bytes_to_send,
+                                  bool* last_packet) {
+  if (packets_.empty()) {
+    return false;
+  }
+  PacketInfo packet_info = packets_.front();
+  packets_.pop();
+
+  if (!WriteHeaderAndPayload(packet_info, buffer, bytes_to_send)) {
+    return false;
+  }
+  *last_packet = packets_.empty();
+  return true;
+}
+
+// VP9 format:
+//
+// Payload descriptor for F = 1 (flexible mode)
+//       0 1 2 3 4 5 6 7
+//      +-+-+-+-+-+-+-+-+
+//      |I|P|L|F|B|E|V|-| (REQUIRED)
+//      +-+-+-+-+-+-+-+-+
+// I:   |M| PICTURE ID  | (RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+
+// M:   | EXTENDED PID  | (RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+
+// L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+                             -|
+// P,F: | P_DIFF    |X|N| (CONDITIONALLY RECOMMENDED)  .
+//      +-+-+-+-+-+-+-+-+                              . up to 3 times
+// X:   |EXTENDED P_DIFF|                              .
+//      +-+-+-+-+-+-+-+-+                             -|
+// V:   | SS            |
+//      | ..            |
+//      +-+-+-+-+-+-+-+-+
+//
+// Payload descriptor for F = 0 (non-flexible mode)
+//       0 1 2 3 4 5 6 7
+//      +-+-+-+-+-+-+-+-+
+//      |I|P|L|F|B|E|V|-| (REQUIRED)
+//      +-+-+-+-+-+-+-+-+
+// I:   |M| PICTURE ID  | (RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+
+// M:   | EXTENDED PID  | (RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+
+// L:   |GOF_IDX|  S  |D| (CONDITIONALLY RECOMMENDED)
+//      +-+-+-+-+-+-+-+-+
+//      |   TL0PICIDX   | (CONDITIONALLY REQUIRED)
+//      +-+-+-+-+-+-+-+-+
+// V:   | SS            |
+//      | ..            |
+//      +-+-+-+-+-+-+-+-+
+
+bool RtpPacketizerVp9::WriteHeaderAndPayload(const PacketInfo& packet_info,
+                                             uint8_t* buffer,
+                                             size_t* bytes_to_send) const {
+  size_t header_length;
+  if (!WriteHeader(packet_info, buffer, &header_length))
+    return false;
+
+  // Copy payload data.
+  memcpy(&buffer[header_length],
+         &payload_[packet_info.payload_start_pos], packet_info.size);
+
+  *bytes_to_send = header_length + packet_info.size;
+  return true;
+}
+
+bool RtpPacketizerVp9::WriteHeader(const PacketInfo& packet_info,
+                                   uint8_t* buffer,
+                                   size_t* header_length) const {
+  // Required payload descriptor byte.
+  bool i_bit = PictureIdPresent(hdr_);
+  bool p_bit = hdr_.inter_pic_predicted;
+  bool l_bit = LayerInfoPresent(hdr_);
+  bool f_bit = hdr_.flexible_mode;
+  bool b_bit = hdr_.beginning_of_frame && packet_info.layer_begin;
+  bool e_bit = hdr_.end_of_frame && packet_info.layer_end;
+  bool v_bit = hdr_.ss_data_available && b_bit;
+
+  rtc::BitBufferWriter writer(buffer, max_payload_length_);
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(i_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(p_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(l_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(f_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(b_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(e_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(v_bit ? 1 : 0, 1));
+  RETURN_FALSE_ON_ERROR(writer.WriteBits(kReservedBitValue0, 1));
+
+  // Add fields that are present.
+  if (i_bit && !WritePictureId(hdr_, &writer)) {
+    LOG(LS_ERROR) << "Failed writing VP9 picture id.";
+    return false;
+  }
+  if (l_bit && !WriteLayerInfo(hdr_, &writer)) {
+    LOG(LS_ERROR) << "Failed writing VP9 layer info.";
+    return false;
+  }
+  if (p_bit && f_bit && !WriteRefIndices(hdr_, &writer)) {
+    LOG(LS_ERROR) << "Failed writing VP9 ref indices.";
+    return false;
+  }
+  if (v_bit && !WriteSsData(hdr_, &writer)) {
+    LOG(LS_ERROR) << "Failed writing VP9 SS data.";
+    return false;
+  }
+
+  size_t offset_bytes = 0;
+  size_t offset_bits = 0;
+  writer.GetCurrentOffset(&offset_bytes, &offset_bits);
+  assert(offset_bits == 0);
+
+  *header_length = offset_bytes;
+  return true;
+}
+
+bool RtpDepacketizerVp9::Parse(ParsedPayload* parsed_payload,
+                               const uint8_t* payload,
+                               size_t payload_length) {
+  assert(parsed_payload != nullptr);
+  if (payload_length == 0) {
+    LOG(LS_ERROR) << "Payload length is zero.";
+    return false;
+  }
+
+  // Parse mandatory first byte of payload descriptor.
+  rtc::BitBuffer parser(payload, payload_length);
+  uint32_t i_bit, p_bit, l_bit, f_bit, b_bit, e_bit, v_bit;
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&i_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&p_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&l_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&f_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&b_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&e_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ReadBits(&v_bit, 1));
+  RETURN_FALSE_ON_ERROR(parser.ConsumeBits(1));
+
+  // Parsed payload.
+  parsed_payload->type.Video.width = 0;
+  parsed_payload->type.Video.height = 0;
+  parsed_payload->type.Video.simulcastIdx = 0;
+  parsed_payload->type.Video.codec = kRtpVideoVp9;
+
+  parsed_payload->frame_type = p_bit ? kVideoFrameDelta : kVideoFrameKey;
+
+  RTPVideoHeaderVP9* vp9 = &parsed_payload->type.Video.codecHeader.VP9;
+  vp9->InitRTPVideoHeaderVP9();
+  vp9->inter_pic_predicted = p_bit ? true : false;
+  vp9->flexible_mode = f_bit ? true : false;
+  vp9->beginning_of_frame = b_bit ? true : false;
+  vp9->end_of_frame = e_bit ? true : false;
+  vp9->ss_data_available = v_bit ? true : false;
+  vp9->temporal_idx = 0;
+  vp9->spatial_idx = 0;
+
+  // Parse fields that are present.
+  if (i_bit && !ParsePictureId(&parser, vp9)) {
+    LOG(LS_ERROR) << "Failed parsing VP9 picture id.";
+    return false;
+  }
+  if (l_bit && !ParseLayerInfo(&parser, vp9)) {
+    LOG(LS_ERROR) << "Failed parsing VP9 layer info.";
+    return false;
+  }
+  if (p_bit && f_bit && !ParseRefIndices(&parser, vp9)) {
+    LOG(LS_ERROR) << "Failed parsing VP9 ref indices.";
+    return false;
+  }
+  if (v_bit) {
+    if (!ParseSsData(&parser, vp9)) {
+      LOG(LS_ERROR) << "Failed parsing VP9 SS data.";
+      return false;
+    }
+    if (vp9->spatial_layer_resolution_present) {
+      // TODO(asapersson): Add support for spatial layers.
+      parsed_payload->type.Video.width = vp9->width[0];
+      parsed_payload->type.Video.height = vp9->height[0];
+    }
+  }
+  parsed_payload->type.Video.isFirstPacket = b_bit && (vp9->spatial_idx == 0);
+
+  uint64_t rem_bits = parser.RemainingBitCount();
+  assert(rem_bits % 8 == 0);
+  parsed_payload->payload_length = rem_bits / 8;
+  if (parsed_payload->payload_length == 0) {
+    LOG(LS_ERROR) << "Failed parsing VP9 payload data.";
+    return false;
+  }
+  parsed_payload->payload =
+      payload + payload_length - parsed_payload->payload_length;
+
+  return true;
+}
+}  // namespace webrtc
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h
@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+//
+// This file contains the declaration of the VP9 packetizer class.
+// A packetizer object is created for each encoded video frame. The
+// constructor is called with the payload data and size.
+//
+// After creating the packetizer, the method NextPacket is called
+// repeatedly to get all packets for the frame. The method returns
+// false as long as there are more packets left to fetch.
+//
+
+#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_
+#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_
+
+#include <queue>
+#include <string>
+
+#include "webrtc/base/constructormagic.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class RtpPacketizerVp9 : public RtpPacketizer {
+ public:
+  RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr, size_t max_payload_length);
+
+  virtual ~RtpPacketizerVp9();
+
+  ProtectionType GetProtectionType() override;
+
+  StorageType GetStorageType(uint32_t retransmission_settings) override;
+
+  std::string ToString() override;
+
+  // The payload data must be one encoded VP9 frame.
+  void SetPayloadData(const uint8_t* payload,
+                      size_t payload_size,
+                      const RTPFragmentationHeader* fragmentation) override;
+
+  // Gets the next payload with VP9 payload header.
+  // |buffer| is a pointer to where the output will be written.
+  // |bytes_to_send| is an output variable that will contain number of bytes
+  // written to buffer.
+  // |last_packet| is true for the last packet of the frame, false otherwise
+  // (i.e. call the function again to get the next packet).
+  // Returns true on success, false otherwise.
+  bool NextPacket(uint8_t* buffer,
+                  size_t* bytes_to_send,
+                  bool* last_packet) override;
+
+  typedef struct {
+    size_t payload_start_pos;
+    size_t size;
+    bool layer_begin;
+    bool layer_end;
+  } PacketInfo;
+  typedef std::queue<PacketInfo> PacketInfoQueue;
+
+ private:
+  // Calculates all packet sizes and loads info to packet queue.
+  void GeneratePackets();
+
+  // Writes the payload descriptor header and copies payload to the |buffer|.
+  // |packet_info| determines which part of the payload to write.
+  // |bytes_to_send| contains the number of written bytes to the buffer.
+  // Returns true on success, false otherwise.
+  bool WriteHeaderAndPayload(const PacketInfo& packet_info,
+                             uint8_t* buffer,
+                             size_t* bytes_to_send) const;
+
+  // Writes payload descriptor header to |buffer|.
+  // Returns true on success, false otherwise.
+  bool WriteHeader(const PacketInfo& packet_info,
+                   uint8_t* buffer,
+                   size_t* header_length) const;
+
+  const RTPVideoHeaderVP9 hdr_;
+  const size_t max_payload_length_;  // The max length in bytes of one packet.
+  const uint8_t* payload_;           // The payload data to be packetized.
+  size_t payload_size_;              // The size in bytes of the payload data.
+  PacketInfoQueue packets_;
+
+  DISALLOW_COPY_AND_ASSIGN(RtpPacketizerVp9);
+};
+
+
+class RtpDepacketizerVp9 : public RtpDepacketizer {
+ public:
+  virtual ~RtpDepacketizerVp9() {}
+
+  bool Parse(ParsedPayload* parsed_payload,
+             const uint8_t* payload,
+             size_t payload_length) override;
+};
+
+}  // namespace webrtc
+#endif  // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@ -0,0 +1,664 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <vector>
+
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+namespace {
+void VerifyHeader(const RTPVideoHeaderVP9& expected,
+                  const RTPVideoHeaderVP9& actual) {
+  EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted);
+  EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted);
+  EXPECT_EQ(expected.flexible_mode, actual.flexible_mode);
+  EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame);
+  EXPECT_EQ(expected.end_of_frame, actual.end_of_frame);
+  EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
+  EXPECT_EQ(expected.picture_id, actual.picture_id);
+  EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
+  EXPECT_EQ(expected.temporal_idx == kNoTemporalIdx ? 0 : expected.temporal_idx,
+      actual.temporal_idx);
+  EXPECT_EQ(expected.spatial_idx == kNoSpatialIdx ? 0 : expected.spatial_idx,
+      actual.spatial_idx);
+  EXPECT_EQ(expected.gof_idx, actual.gof_idx);
+  EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx);
+  EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch);
+
+  EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics);
+  for (uint8_t i = 0; i < expected.num_ref_pics; ++i) {
+    EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]);
+    EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]);
+  }
+  if (expected.ss_data_available) {
+    EXPECT_EQ(expected.spatial_layer_resolution_present,
+              actual.spatial_layer_resolution_present);
+    EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers);
+    if (expected.spatial_layer_resolution_present) {
+      for (size_t i = 0; i < expected.num_spatial_layers; i++) {
+        EXPECT_EQ(expected.width[i], actual.width[i]);
+        EXPECT_EQ(expected.height[i], actual.height[i]);
+      }
+    }
+    EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof);
+    for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) {
+      EXPECT_EQ(expected.gof.temporal_up_switch[i],
+                actual.gof.temporal_up_switch[i]);
+      EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]);
+      EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]);
+      for (size_t j = 0; j < expected.gof.num_ref_pics[i]; j++) {
+        EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]);
+      }
+    }
+  }
+}
+
+void VerifyPayload(const RtpDepacketizer::ParsedPayload& parsed,
+                   const uint8_t* payload,
+                   size_t payload_length) {
+  EXPECT_EQ(payload, parsed.payload);
+  EXPECT_EQ(payload_length, parsed.payload_length);
+  EXPECT_THAT(std::vector<uint8_t>(parsed.payload,
+                                   parsed.payload + parsed.payload_length),
+              ::testing::ElementsAreArray(payload, payload_length));
+}
+
+void ParseAndCheckPacket(const uint8_t* packet,
+                         const RTPVideoHeaderVP9& expected,
+                         size_t expected_hdr_length,
+                         size_t expected_length) {
+  rtc::scoped_ptr<RtpDepacketizer> depacketizer(new RtpDepacketizerVp9());
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer->Parse(&parsed, packet, expected_length));
+  EXPECT_EQ(kRtpVideoVp9, parsed.type.Video.codec);
+  VerifyHeader(expected, parsed.type.Video.codecHeader.VP9);
+  const size_t kExpectedPayloadLength = expected_length - expected_hdr_length;
+  VerifyPayload(parsed, packet + expected_hdr_length, kExpectedPayloadLength);
+}
+}  // namespace
+
+// Payload descriptor for flexible mode
+//        0 1 2 3 4 5 6 7
+//        +-+-+-+-+-+-+-+-+
+//        |I|P|L|F|B|E|V|-| (REQUIRED)
+//        +-+-+-+-+-+-+-+-+
+//   I:   |M| PICTURE ID  | (RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+
+//   M:   | EXTENDED PID  | (RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+
+//   L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+                             -|
+//   P,F: | P_DIFF    |X|N| (CONDITIONALLY RECOMMENDED)  .
+//        +-+-+-+-+-+-+-+-+                              . up to 3 times
+//   X:   |EXTENDED P_DIFF| (OPTIONAL)                   .
+//        +-+-+-+-+-+-+-+-+                             -|
+//   V:   | SS            |
+//        | ..            |
+//        +-+-+-+-+-+-+-+-+
+//
+// Payload descriptor for non-flexible mode
+//        0 1 2 3 4 5 6 7
+//        +-+-+-+-+-+-+-+-+
+//        |I|P|L|F|B|E|V|-| (REQUIRED)
+//        +-+-+-+-+-+-+-+-+
+//   I:   |M| PICTURE ID  | (RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+
+//   M:   | EXTENDED PID  | (RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+
+//   L:   |GOF_IDX|  S  |D| (CONDITIONALLY RECOMMENDED)
+//        +-+-+-+-+-+-+-+-+
+//        |   TL0PICIDX   | (CONDITIONALLY REQUIRED)
+//        +-+-+-+-+-+-+-+-+
+//   V:   | SS            |
+//        | ..            |
+//        +-+-+-+-+-+-+-+-+
+
+class RtpPacketizerVp9Test : public ::testing::Test {
+ protected:
+  RtpPacketizerVp9Test() {}
+  virtual void SetUp() {
+    expected_.InitRTPVideoHeaderVP9();
+    // Always input one layer frame at a time.
+    expected_.beginning_of_frame = true;
+    expected_.end_of_frame = true;
+  }
+
+  rtc::scoped_ptr<uint8_t[]> packet_;
+  rtc::scoped_ptr<uint8_t[]> payload_;
+  size_t payload_size_;
+  size_t payload_pos_;
+  RTPVideoHeaderVP9 expected_;
+  rtc::scoped_ptr<RtpPacketizerVp9> packetizer_;
+
+  void Init(size_t payload_size, size_t packet_size) {
+    payload_.reset(new uint8_t[payload_size]);
+    memset(payload_.get(), 7, payload_size);
+    payload_size_ = payload_size;
+    payload_pos_ = 0;
+    packetizer_.reset(new RtpPacketizerVp9(expected_, packet_size));
+    packetizer_->SetPayloadData(payload_.get(), payload_size_, NULL);
+
+    const int kMaxPayloadDescriptorLength = 100;
+    packet_.reset(new uint8_t[payload_size_ + kMaxPayloadDescriptorLength]);
+  }
+
+  void CheckPayload(const uint8_t* packet,
+                    size_t start_pos,
+                    size_t end_pos,
+                    bool last) {
+    for (size_t i = start_pos; i < end_pos; ++i) {
+      EXPECT_EQ(packet[i], payload_[payload_pos_++]);
+    }
+    EXPECT_EQ(last, payload_pos_ == payload_size_);
+  }
+
+  void CreateParseAndCheckPackets(const size_t* expected_hdr_sizes,
+                                  const size_t* expected_sizes,
+                                  size_t expected_num_packets) {
+    ASSERT_TRUE(packetizer_.get() != NULL);
+    size_t length = 0;
+    bool last = false;
+    if (expected_num_packets == 0) {
+      EXPECT_FALSE(packetizer_->NextPacket(packet_.get(), &length, &last));
+      return;
+    }
+    for (size_t i = 0; i < expected_num_packets; ++i) {
+      EXPECT_TRUE(packetizer_->NextPacket(packet_.get(), &length, &last));
+      EXPECT_EQ(expected_sizes[i], length);
+      RTPVideoHeaderVP9 hdr = expected_;
+      hdr.beginning_of_frame = (i == 0);
+      hdr.end_of_frame = last;
+      ParseAndCheckPacket(packet_.get(), hdr, expected_hdr_sizes[i], length);
+      CheckPayload(packet_.get(), expected_hdr_sizes[i], length, last);
+    }
+    EXPECT_TRUE(last);
+  }
+};
+
+TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_OnePacket) {
+  const size_t kFrameSize = 25;
+  const size_t kPacketSize = 26;
+  Init(kFrameSize, kPacketSize);
+
+  // One packet:
+  // I:0, P:0, L:0, F:0, B:1, E:1, V:0  (1hdr + 25 payload)
+  const size_t kExpectedHdrSizes[] = {1};
+  const size_t kExpectedSizes[] = {26};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_TwoPackets) {
+  const size_t kFrameSize = 27;
+  const size_t kPacketSize = 27;
+  Init(kFrameSize, kPacketSize);
+
+  // Two packets:
+  // I:0, P:0, L:0, F:0, B:1, E:0, V:0  (1hdr + 14 payload)
+  // I:0, P:0, L:0, F:0, B:0, E:1, V:0  (1hdr + 13 payload)
+  const size_t kExpectedHdrSizes[] = {1, 1};
+  const size_t kExpectedSizes[] = {15, 14};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestTooShortBufferToFitPayload) {
+  const size_t kFrameSize = 1;
+  const size_t kPacketSize = 1;
+  Init(kFrameSize, kPacketSize);  // 1hdr + 1 payload
+
+  const size_t kExpectedNum = 0;
+  CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestOneBytePictureId) {
+  const size_t kFrameSize = 30;
+  const size_t kPacketSize = 12;
+
+  expected_.picture_id = kMaxOneBytePictureId;   // 2 byte payload descriptor
+  expected_.max_picture_id = kMaxOneBytePictureId;
+  Init(kFrameSize, kPacketSize);
+
+  // Three packets:
+  // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (2hdr + 10 payload)
+  // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (2hdr + 10 payload)
+  // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (2hdr + 10 payload)
+  const size_t kExpectedHdrSizes[] = {2, 2, 2};
+  const size_t kExpectedSizes[] = {12, 12, 12};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestTwoBytePictureId) {
+  const size_t kFrameSize = 31;
+  const size_t kPacketSize = 13;
+
+  expected_.picture_id = kMaxTwoBytePictureId;  // 3 byte payload descriptor
+  Init(kFrameSize, kPacketSize);
+
+  // Four packets:
+  // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (3hdr + 8 payload)
+  // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
+  // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
+  // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (3hdr + 7 payload)
+  const size_t kExpectedHdrSizes[] = {3, 3, 3, 3};
+  const size_t kExpectedSizes[] = {11, 11, 11, 10};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) {
+  const size_t kFrameSize = 30;
+  const size_t kPacketSize = 25;
+
+  expected_.gof_idx = 3;
+  expected_.spatial_idx = 2;
+  expected_.inter_layer_predicted = true;  // D
+  expected_.tl0_pic_idx = 117;
+  Init(kFrameSize, kPacketSize);
+
+  // Two packets:
+  //    | I:0, P:0, L:1, F:0, B:1, E:0, V:0 | (3hdr + 15 payload)
+  // L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 |
+  //    | I:0, P:0, L:1, F:0, B:0, E:1, V:0 | (3hdr + 15 payload)
+  // L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 |
+  const size_t kExpectedHdrSizes[] = {3, 3};
+  const size_t kExpectedSizes[] = {18, 18};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithFlexibleMode) {
+  const size_t kFrameSize = 21;
+  const size_t kPacketSize = 23;
+
+  expected_.flexible_mode = true;
+  expected_.temporal_idx = 3;
+  expected_.temporal_up_switch = true;  // U
+  expected_.spatial_idx = 2;
+  expected_.inter_layer_predicted = false;  // D
+  Init(kFrameSize, kPacketSize);
+
+  // One packet:
+  // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 21 payload)
+  // L:   T:3, U:1, S:2, D:0
+  const size_t kExpectedHdrSizes[] = {2};
+  const size_t kExpectedSizes[] = {23};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestRefIdx) {
+  const size_t kFrameSize = 16;
+  const size_t kPacketSize = 22;
+
+  expected_.inter_pic_predicted = true;  // P
+  expected_.flexible_mode = true;        // F
+  expected_.picture_id = 100;
+  expected_.num_ref_pics = 2;
+  expected_.pid_diff[0] = 3;
+  expected_.pid_diff[1] = 1171;
+  expected_.ref_picture_id[0] = 97;     // 100 - 3 = 97
+  expected_.ref_picture_id[1] = 31697;  // 0x7FFF + 1 + 100 - 1171 = 31697
+  Init(kFrameSize, kPacketSize);
+
+  // Two packets:
+  // I:1, P:1, L:0, F:1, B:1, E:1, V:0 (6hdr + 16 payload)
+  // I:   100 (2 bytes)
+  // P,F: P_DIFF:3, X:0, N:1
+  //      P_DIFF:1171, X:1, N:0 (2 bytes)
+  const size_t kExpectedHdrSizes[] = {6};
+  const size_t kExpectedSizes[] = {22};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) {
+  const size_t kFrameSize = 16;
+  const size_t kPacketSize = 22;
+
+  expected_.inter_pic_predicted = true;
+  expected_.flexible_mode = true;
+  expected_.num_ref_pics = 1;
+  expected_.pid_diff[0] = 3;
+  Init(kFrameSize, kPacketSize);
+
+  const size_t kExpectedNum = 0;
+  CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) {
+  const size_t kFrameSize = 21;
+  const size_t kPacketSize = 25;
+
+  expected_.ss_data_available = true;
+  expected_.num_spatial_layers = 1;
+  expected_.spatial_layer_resolution_present = false;
+  expected_.gof.num_frames_in_gof = 1;
+  expected_.gof.temporal_idx[0] = 0;
+  expected_.gof.temporal_up_switch[0] = true;
+  expected_.gof.num_ref_pics[0] = 1;
+  expected_.gof.pid_diff[0][0] = 4;
+  Init(kFrameSize, kPacketSize);
+
+  // One packet:
+  // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (4hdr + 21 payload)
+  // N_S:0, Y:0, N_G:0
+  // T:0, U:1, R:1 | P_DIFF[0][0]:4
+  const size_t kExpectedHdrSizes[] = {4};
+  const size_t kExpectedSizes[] = {25};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestSsData) {
+  const size_t kFrameSize = 21;
+  const size_t kPacketSize = 39;
+
+  expected_.ss_data_available = true;
+  expected_.num_spatial_layers = 2;
+  expected_.spatial_layer_resolution_present = true;
+  expected_.width[0] = 640;
+  expected_.width[1] = 1280;
+  expected_.height[0] = 360;
+  expected_.height[1] = 720;
+  expected_.gof.num_frames_in_gof = 3;
+  expected_.gof.temporal_idx[0] = 0;
+  expected_.gof.temporal_idx[1] = 1;
+  expected_.gof.temporal_idx[2] = 2;
+  expected_.gof.temporal_up_switch[0] = true;
+  expected_.gof.temporal_up_switch[1] = true;
+  expected_.gof.temporal_up_switch[2] = false;
+  expected_.gof.num_ref_pics[0] = 0;
+  expected_.gof.num_ref_pics[1] = 3;
+  expected_.gof.num_ref_pics[2] = 2;
+  expected_.gof.pid_diff[1][0] = 5;
+  expected_.gof.pid_diff[1][1] = 6;
+  expected_.gof.pid_diff[1][2] = 7;
+  expected_.gof.pid_diff[2][0] = 8;
+  expected_.gof.pid_diff[2][1] = 9;
+  Init(kFrameSize, kPacketSize);
+
+  // One packet:
+  // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (18hdr + 21 payload)
+  // N_S:1, Y:1, N_G:2
+  // WIDTH:640   // 2 bytes
+  // HEIGHT:360  // 2 bytes
+  // WIDTH:1280  // 2 bytes
+  // HEIGHT:720  // 2 bytes
+  // T:0, U:1, R:0
+  // T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7
+  // T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9
+  const size_t kExpectedHdrSizes[] = {18};
+  const size_t kExpectedSizes[] = {39};
+  const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
+  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
+}
+
+TEST_F(RtpPacketizerVp9Test, TestBaseLayerProtectionAndStorageType) {
+  const size_t kFrameSize = 10;
+  const size_t kPacketSize = 12;
+
+  // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload)
+  // L:   T:0, U:0, S:0, D:0
+  expected_.flexible_mode = true;
+  expected_.temporal_idx = 0;
+  Init(kFrameSize, kPacketSize);
+  EXPECT_EQ(kProtectedPacket, packetizer_->GetProtectionType());
+  EXPECT_EQ(kAllowRetransmission,
+            packetizer_->GetStorageType(kRetransmitBaseLayer));
+  EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitOff));
+}
+
+TEST_F(RtpPacketizerVp9Test, TestHigherLayerProtectionAndStorageType) {
+  const size_t kFrameSize = 10;
+  const size_t kPacketSize = 12;
+
+  // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload)
+  // L:   T:1, U:0, S:0, D:0
+  expected_.flexible_mode = true;
+  expected_.temporal_idx = 1;
+  Init(kFrameSize, kPacketSize);
+  EXPECT_EQ(kUnprotectedPacket, packetizer_->GetProtectionType());
+  EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitBaseLayer));
+  EXPECT_EQ(kAllowRetransmission,
+            packetizer_->GetStorageType(kRetransmitHigherLayers));
+}
+
+
+class RtpDepacketizerVp9Test : public ::testing::Test {
+ protected:
+  RtpDepacketizerVp9Test()
+      : depacketizer_(new RtpDepacketizerVp9()) {}
+
+  virtual void SetUp() {
+    expected_.InitRTPVideoHeaderVP9();
+  }
+
+  RTPVideoHeaderVP9 expected_;
+  rtc::scoped_ptr<RtpDepacketizer> depacketizer_;
+};
+
+TEST_F(RtpDepacketizerVp9Test, ParseBasicHeader) {
+  const uint8_t kHeaderLength = 1;
+  uint8_t packet[4] = {0};
+  packet[0] = 0x0C;  // I:0 P:0 L:0 F:0 B:1 E:1 V:0 R:0
+  expected_.beginning_of_frame = true;
+  expected_.end_of_frame = true;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseOneBytePictureId) {
+  const uint8_t kHeaderLength = 2;
+  uint8_t packet[10] = {0};
+  packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
+  packet[1] = kMaxOneBytePictureId;
+
+  expected_.picture_id = kMaxOneBytePictureId;
+  expected_.max_picture_id = kMaxOneBytePictureId;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseTwoBytePictureId) {
+  const uint8_t kHeaderLength = 3;
+  uint8_t packet[10] = {0};
+  packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
+  packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F);
+  packet[2] = kMaxTwoBytePictureId & 0xFF;
+
+  expected_.picture_id = kMaxTwoBytePictureId;
+  expected_.max_picture_id = kMaxTwoBytePictureId;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
+  const uint8_t kHeaderLength = 3;
+  const uint8_t kGofIdx = 7;
+  const uint8_t kSpatialIdx = 1;
+  const uint8_t kDbit = 1;
+  const uint8_t kTl0PicIdx = 17;
+  uint8_t packet[13] = {0};
+  packet[0] = 0x20;  // I:0 P:0 L:1 F:0 B:0 E:0 V:0 R:0
+  packet[1] = (kGofIdx << 4) | (kSpatialIdx << 1) | kDbit;  // GOF_IDX:7 S:1 D:1
+  packet[2] = kTl0PicIdx;                                   // TL0PICIDX:17
+
+  expected_.gof_idx = kGofIdx;
+  expected_.spatial_idx = kSpatialIdx;
+  expected_.inter_layer_predicted = kDbit ? true : false;
+  expected_.tl0_pic_idx = kTl0PicIdx;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) {
+  const uint8_t kHeaderLength = 2;
+  const uint8_t kTemporalIdx = 2;
+  const uint8_t kUbit = 1;
+  const uint8_t kSpatialIdx = 0;
+  const uint8_t kDbit = 0;
+  uint8_t packet[13] = {0};
+  packet[0] = 0x38;  // I:0 P:0 L:1 F:1 B:1 E:0 V:0 R:0
+  packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
+
+  // I:0 P:0 L:1 F:1 B:1 E:0 V:0
+  // L:   T:2 U:1 S:0 D:0
+  expected_.beginning_of_frame = true;
+  expected_.flexible_mode = true;
+  expected_.temporal_idx = kTemporalIdx;
+  expected_.temporal_up_switch = kUbit ? true : false;
+  expected_.spatial_idx = kSpatialIdx;
+  expected_.inter_layer_predicted = kDbit ? true : false;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) {
+  const uint8_t kHeaderLength = 7;
+  const int16_t kPictureId = 17;
+  const int16_t kPdiff1 = 17;
+  const int16_t kPdiff2 = 18;
+  const int16_t kExtPdiff3 = 2171;
+  uint8_t packet[13] = {0};
+  packet[0] = 0xD8;  // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
+  packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F);  // Two byte pictureID.
+  packet[2] = kPictureId;
+  packet[3] = (kPdiff1 << 2) | (0 << 1) | 1;            // P_DIFF X:0 N:1
+  packet[4] = (kPdiff2 << 2) | (0 << 1) | 1;            // P_DIFF X:0 N:1
+  packet[5] = ((kExtPdiff3 >> 8) << 2) | (1 << 1) | 0;  // P_DIFF X:1 N:0
+  packet[6] = kExtPdiff3 & 0xff;                        // EXTENDED P_DIFF
+
+  // I:1 P:1 L:0 F:1 B:1 E:0 V:0
+  // I:    PICTURE ID:17
+  // I:
+  // P,F:  P_DIFF:17 X:0 N:1   => refPictureId = 17 - 17 = 0
+  // P,F:  P_DIFF:18 X:0 N:1   => refPictureId = 0x7FFF + 1 + 17 - 18 = 0x7FFF
+  // P,F:  P_DIFF:2171 X:1 N:0 => refPictureId = 0x7FFF + 1 + 17 - 2171 = 30614
+  expected_.beginning_of_frame = true;
+  expected_.inter_pic_predicted = true;
+  expected_.flexible_mode = true;
+  expected_.picture_id = kPictureId;
+  expected_.num_ref_pics = 3;
+  expected_.pid_diff[0] = kPdiff1;
+  expected_.pid_diff[1] = kPdiff2;
+  expected_.pid_diff[2] = kExtPdiff3;
+  expected_.ref_picture_id[0] = 0;
+  expected_.ref_picture_id[1] = 0x7FFF;
+  expected_.ref_picture_id[2] = 30614;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) {
+  const int16_t kPdiff = 3;
+  uint8_t packet[13] = {0};
+  packet[0] = 0x58;                          // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0
+  packet[1] = (kPdiff << 2) | (0 << 1) | 0;  // P,F:  P_DIFF:3 X:0 N:0
+
+  RtpDepacketizer::ParsedPayload parsed;
+  EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) {
+  const int16_t kPdiff = 3;
+  uint8_t packet[13] = {0};
+  packet[0] = 0xD8;                          // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
+  packet[1] = kMaxOneBytePictureId;          // I:    PICTURE ID:127
+  packet[2] = (kPdiff << 2) | (0 << 1) | 1;  // P,F:  P_DIFF:3 X:0 N:1
+  packet[3] = (kPdiff << 2) | (0 << 1) | 1;  // P,F:  P_DIFF:3 X:0 N:1
+  packet[4] = (kPdiff << 2) | (0 << 1) | 1;  // P,F:  P_DIFF:3 X:0 N:1
+  packet[5] = (kPdiff << 2) | (0 << 1) | 0;  // P,F:  P_DIFF:3 X:0 N:0
+
+  RtpDepacketizer::ParsedPayload parsed;
+  EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseSsData) {
+  const uint8_t kHeaderLength = 5;
+  const uint8_t kYbit = 0;
+  const size_t kNs = 2;
+  const size_t kNg = 2;
+  uint8_t packet[23] = {0};
+  packet[0] = 0x0A;  // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
+  packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (kNg - 1);  // N_S Y N_G
+  packet[2] = (0 << 5) | (1 << 4) | (0 << 2) | 0;           // T:0 U:1 R:0 -
+  packet[3] = (2 << 5) | (0 << 4) | (1 << 2) | 0;           // T:2 U:0 R:1 -
+  packet[4] = 33;
+
+  expected_.beginning_of_frame = true;
+  expected_.ss_data_available = true;
+  expected_.num_spatial_layers = kNs;
+  expected_.spatial_layer_resolution_present = kYbit ? true : false;
+  expected_.gof.num_frames_in_gof = kNg;
+  expected_.gof.temporal_idx[0] = 0;
+  expected_.gof.temporal_idx[1] = 2;
+  expected_.gof.temporal_up_switch[0] = true;
+  expected_.gof.temporal_up_switch[1] = false;
+  expected_.gof.num_ref_pics[0] = 0;
+  expected_.gof.num_ref_pics[1] = 1;
+  expected_.gof.pid_diff[1][0] = 33;
+  ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) {
+  uint8_t packet[2] = {0};
+  packet[0] = 0x08;  // I:0 P:0 L:0 F:0 B:1 E:0 V:0 R:0
+
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_EQ(kVideoFrameKey, parsed.frame_type);
+  EXPECT_TRUE(parsed.type.Video.isFirstPacket);
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) {
+  uint8_t packet[2] = {0};
+  packet[0] = 0x44;  // I:0 P:1 L:0 F:0 B:0 E:1 V:0 R:0
+
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_EQ(kVideoFrameDelta, parsed.frame_type);
+  EXPECT_FALSE(parsed.type.Video.isFirstPacket);
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseResolution) {
+  const uint16_t kWidth[2] = {640, 1280};
+  const uint16_t kHeight[2] = {360, 720};
+  uint8_t packet[20] = {0};
+  packet[0] = 0x0A;  // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
+  packet[1] = (1 << 5) | (1 << 4) | 0;  // N_S:1 Y:1 N_G:0
+  packet[2] = kWidth[0] >> 8;
+  packet[3] = kWidth[0] & 0xFF;
+  packet[4] = kHeight[0] >> 8;
+  packet[5] = kHeight[0] & 0xFF;
+  packet[6] = kWidth[1] >> 8;
+  packet[7] = kWidth[1] & 0xFF;
+  packet[8] = kHeight[1] >> 8;
+  packet[9] = kHeight[1] & 0xFF;
+  packet[10] = 0;  // T:0 U:0 R:0 -
+
+  RtpDepacketizer::ParsedPayload parsed;
+  ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+  EXPECT_EQ(kWidth[0], parsed.type.Video.width);
+  EXPECT_EQ(kHeight[0], parsed.type.Video.height);
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) {
+  uint8_t packet[1] = {0};
+  RtpDepacketizer::ParsedPayload parsed;
+  EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, 0));
+}
+
+TEST_F(RtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) {
+  const uint8_t kHeaderLength = 1;
+  uint8_t packet[kHeaderLength] = {0};
+  RtpDepacketizer::ParsedPayload parsed;
+  EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/video_coding/main/source/packet.cc
+++ b/webrtc/modules/video_coding/main/source/packet.cc
@ -118,6 +118,18 @@ void VCMPacket::CopyCodecSpecifics(const RTPVideoHeader& videoHeader) {

      codec = kVideoCodecVP8;
      return;
+    case kRtpVideoVp9:
+      if (isFirstPacket && markerBit)
+        completeNALU = kNaluComplete;
+      else if (isFirstPacket)
+        completeNALU = kNaluStart;
+      else if (markerBit)
+        completeNALU = kNaluEnd;
+      else
+        completeNALU = kNaluIncomplete;
+
+      codec = kVideoCodecVP9;
+      return;
    case kRtpVideoH264:
      isFirstPacket = videoHeader.isFirstPacket;
      if (isFirstPacket)