diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 2844d52ed1..de0e6756d0 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -129,8 +129,18 @@ rtc_library("video_coding") {
     "media_opt_util.h",
     "packet_buffer.cc",
     "packet_buffer.h",
+    "rtp_frame_id_only_ref_finder.cc",
+    "rtp_frame_id_only_ref_finder.h",
     "rtp_frame_reference_finder.cc",
     "rtp_frame_reference_finder.h",
+    "rtp_generic_ref_finder.cc",
+    "rtp_generic_ref_finder.h",
+    "rtp_seq_num_only_ref_finder.cc",
+    "rtp_seq_num_only_ref_finder.h",
+    "rtp_vp8_ref_finder.cc",
+    "rtp_vp8_ref_finder.h",
+    "rtp_vp9_ref_finder.cc",
+    "rtp_vp9_ref_finder.h",
     "rtt_filter.cc",
     "rtt_filter.h",
     "timestamp_map.cc",
diff --git a/modules/video_coding/rtp_frame_id_only_ref_finder.cc b/modules/video_coding/rtp_frame_id_only_ref_finder.cc
new file mode 100644
index 0000000000..f2494ec763
--- /dev/null
+++ b/modules/video_coding/rtp_frame_id_only_ref_finder.cc
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_frame_id_only_ref_finder.h"
+
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::ReturnVector RtpFrameIdOnlyRefFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame,
+    int frame_id) {
+  frame->id.picture_id = unwrapper_.Unwrap(frame_id & (kFrameIdLength - 1));
+  frame->num_references =
+      frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1;
+  frame->references[0] = frame->id.picture_id - 1;
+
+  RtpFrameReferenceFinder::ReturnVector res;
+  res.push_back(std::move(frame));
+  return res;
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/modules/video_coding/rtp_frame_id_only_ref_finder.h b/modules/video_coding/rtp_frame_id_only_ref_finder.h
new file mode 100644
index 0000000000..7728ba92bc
--- /dev/null
+++ b/modules/video_coding/rtp_frame_id_only_ref_finder.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_
+#define MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_
+
+#include <memory>
+
+#include "absl/container/inlined_vector.h"
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+#include "rtc_base/numerics/sequence_number_util.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpFrameIdOnlyRefFinder {
+ public:
+  RtpFrameIdOnlyRefFinder() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame,
+      int frame_id);
+
+ private:
+  static constexpr int kFrameIdLength = 1 << 15;
+  SeqNumUnwrapper<uint16_t, kFrameIdLength> unwrapper_;
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 98751c0404..13e1fe38aa 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -10,15 +10,141 @@
 
 #include "modules/video_coding/rtp_frame_reference_finder.h"
 
-#include <algorithm>
+#include <utility>
 
-#include "absl/base/macros.h"
+#include "absl/types/variant.h"
 #include "modules/video_coding/frame_object.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/logging.h"
+#include "modules/video_coding/rtp_frame_id_only_ref_finder.h"
+#include "modules/video_coding/rtp_generic_ref_finder.h"
+#include "modules/video_coding/rtp_seq_num_only_ref_finder.h"
+#include "modules/video_coding/rtp_vp8_ref_finder.h"
+#include "modules/video_coding/rtp_vp9_ref_finder.h"
 
 namespace webrtc {
 namespace video_coding {
+namespace internal {
+class RtpFrameReferenceFinderImpl {
+ public:
+  RtpFrameReferenceFinderImpl() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame);
+  RtpFrameReferenceFinder::ReturnVector PaddingReceived(uint16_t seq_num);
+  void ClearTo(uint16_t seq_num);
+
+ private:
+  using RefFinder = absl::variant<absl::monostate,
+                                  RtpGenericFrameRefFinder,
+                                  RtpFrameIdOnlyRefFinder,
+                                  RtpSeqNumOnlyRefFinder,
+                                  RtpVp8RefFinder,
+                                  RtpVp9RefFinder>;
+
+  template <typename T>
+  T& GetRefFinderAs();
+  RefFinder ref_finder_;
+};
+
+RtpFrameReferenceFinder::ReturnVector RtpFrameReferenceFinderImpl::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
+
+  if (video_header.generic.has_value()) {
+    return GetRefFinderAs<RtpGenericFrameRefFinder>().ManageFrame(
+        std::move(frame), *video_header.generic);
+  }
+
+  switch (frame->codec_type()) {
+    case kVideoCodecVP8: {
+      const RTPVideoHeaderVP8& vp8_header =
+          absl::get<RTPVideoHeaderVP8>(video_header.video_type_header);
+
+      if (vp8_header.temporalIdx == kNoTemporalIdx ||
+          vp8_header.tl0PicIdx == kNoTl0PicIdx) {
+        if (vp8_header.pictureId == kNoPictureId) {
+          return GetRefFinderAs<RtpSeqNumOnlyRefFinder>().ManageFrame(
+              std::move(frame));
+        }
+
+        return GetRefFinderAs<RtpFrameIdOnlyRefFinder>().ManageFrame(
+            std::move(frame), vp8_header.pictureId);
+      }
+
+      return GetRefFinderAs<RtpVp8RefFinder>().ManageFrame(std::move(frame));
+    }
+    case kVideoCodecVP9: {
+      const RTPVideoHeaderVP9& vp9_header =
+          absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
+
+      if (vp9_header.temporal_idx == kNoTemporalIdx) {
+        if (vp9_header.picture_id == kNoPictureId) {
+          return GetRefFinderAs<RtpSeqNumOnlyRefFinder>().ManageFrame(
+              std::move(frame));
+        }
+
+        return GetRefFinderAs<RtpFrameIdOnlyRefFinder>().ManageFrame(
+            std::move(frame), vp9_header.picture_id);
+      }
+
+      return GetRefFinderAs<RtpVp9RefFinder>().ManageFrame(std::move(frame));
+    }
+    case kVideoCodecH264: {
+      return GetRefFinderAs<RtpSeqNumOnlyRefFinder>().ManageFrame(
+          std::move(frame));
+    }
+    case kVideoCodecGeneric: {
+      if (auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
+              &video_header.video_type_header)) {
+        return GetRefFinderAs<RtpFrameIdOnlyRefFinder>().ManageFrame(
+            std::move(frame), generic_header->picture_id);
+      }
+
+      return GetRefFinderAs<RtpSeqNumOnlyRefFinder>().ManageFrame(
+          std::move(frame));
+    }
+    default: {
+      RTC_CHECK_NOTREACHED();
+    }
+  }
+}
+
+RtpFrameReferenceFinder::ReturnVector
+RtpFrameReferenceFinderImpl::PaddingReceived(uint16_t seq_num) {
+  if (auto* ref_finder = absl::get_if<RtpSeqNumOnlyRefFinder>(&ref_finder_)) {
+    return ref_finder->PaddingReceived(seq_num);
+  }
+  return {};
+}
+
+void RtpFrameReferenceFinderImpl::ClearTo(uint16_t seq_num) {
+  struct ClearToVisitor {
+    void operator()(absl::monostate& ref_finder) {}
+    void operator()(RtpGenericFrameRefFinder& ref_finder) {}
+    void operator()(RtpFrameIdOnlyRefFinder& ref_finder) {}
+    void operator()(RtpSeqNumOnlyRefFinder& ref_finder) {
+      ref_finder.ClearTo(seq_num);
+    }
+    void operator()(RtpVp8RefFinder& ref_finder) {
+      ref_finder.ClearTo(seq_num);
+    }
+    void operator()(RtpVp9RefFinder& ref_finder) {
+      ref_finder.ClearTo(seq_num);
+    }
+    uint16_t seq_num;
+  };
+
+  absl::visit(ClearToVisitor{seq_num}, ref_finder_);
+}
+
+template <typename T>
+T& RtpFrameReferenceFinderImpl::GetRefFinderAs() {
+  if (auto* ref_finder = absl::get_if<T>(&ref_finder_)) {
+    return *ref_finder;
+  }
+  return ref_finder_.emplace<T>();
+}
+
+}  // namespace internal
 
 RtpFrameReferenceFinder::RtpFrameReferenceFinder(
     OnCompleteFrameCallback* frame_callback)
@@ -27,11 +153,9 @@ RtpFrameReferenceFinder::RtpFrameReferenceFinder(
 RtpFrameReferenceFinder::RtpFrameReferenceFinder(
     OnCompleteFrameCallback* frame_callback,
     int64_t picture_id_offset)
-    : last_picture_id_(-1),
-      current_ss_idx_(0),
-      cleared_to_seq_num_(-1),
+    : picture_id_offset_(picture_id_offset),
       frame_callback_(frame_callback),
-      picture_id_offset_(picture_id_offset) {}
+      impl_(std::make_unique<internal::RtpFrameReferenceFinderImpl>()) {}
 
 RtpFrameReferenceFinder::~RtpFrameReferenceFinder() = default;
 
@@ -42,672 +166,27 @@ void RtpFrameReferenceFinder::ManageFrame(
       AheadOf<uint16_t>(cleared_to_seq_num_, frame->first_seq_num())) {
     return;
   }
-
-  FrameDecision decision = ManageFrameInternal(frame.get());
-
-  switch (decision) {
-    case kStash:
-      if (stashed_frames_.size() > kMaxStashedFrames)
-        stashed_frames_.pop_back();
-      stashed_frames_.push_front(std::move(frame));
-      break;
-    case kHandOff:
-      HandOffFrame(std::move(frame));
-      RetryStashedFrames();
-      break;
-    case kDrop:
-      break;
-  }
-}
-
-void RtpFrameReferenceFinder::RetryStashedFrames() {
-  bool complete_frame = false;
-  do {
-    complete_frame = false;
-    for (auto frame_it = stashed_frames_.begin();
-         frame_it != stashed_frames_.end();) {
-      FrameDecision decision = ManageFrameInternal(frame_it->get());
-
-      switch (decision) {
-        case kStash:
-          ++frame_it;
-          break;
-        case kHandOff:
-          complete_frame = true;
-          HandOffFrame(std::move(*frame_it));
-          ABSL_FALLTHROUGH_INTENDED;
-        case kDrop:
-          frame_it = stashed_frames_.erase(frame_it);
-      }
-    }
-  } while (complete_frame);
-}
-
-void RtpFrameReferenceFinder::HandOffFrame(
-    std::unique_ptr<RtpFrameObject> frame) {
-  frame->id.picture_id += picture_id_offset_;
-  for (size_t i = 0; i < frame->num_references; ++i) {
-    frame->references[i] += picture_id_offset_;
-  }
-
-  frame_callback_->OnCompleteFrame(std::move(frame));
-}
-
-RtpFrameReferenceFinder::FrameDecision
-RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
-  if (const absl::optional<RTPVideoHeader::GenericDescriptorInfo>&
-          generic_descriptor = frame->GetRtpVideoHeader().generic) {
-    return ManageFrameGeneric(frame, *generic_descriptor);
-  }
-
-  switch (frame->codec_type()) {
-    case kVideoCodecVP8:
-      return ManageFrameVp8(frame);
-    case kVideoCodecVP9:
-      return ManageFrameVp9(frame);
-    case kVideoCodecGeneric:
-      if (auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
-              &frame->GetRtpVideoHeader().video_type_header)) {
-        return ManageFramePidOrSeqNum(frame, generic_header->picture_id);
-      }
-      ABSL_FALLTHROUGH_INTENDED;
-    default:
-      return ManageFramePidOrSeqNum(frame, kNoPictureId);
-  }
+  HandOffFrames(impl_->ManageFrame(std::move(frame)));
 }
 
 void RtpFrameReferenceFinder::PaddingReceived(uint16_t seq_num) {
-  auto clean_padding_to =
-      stashed_padding_.lower_bound(seq_num - kMaxPaddingAge);
-  stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to);
-  stashed_padding_.insert(seq_num);
-  UpdateLastPictureIdWithPadding(seq_num);
-  RetryStashedFrames();
+  HandOffFrames(impl_->PaddingReceived(seq_num));
 }
 
 void RtpFrameReferenceFinder::ClearTo(uint16_t seq_num) {
   cleared_to_seq_num_ = seq_num;
-
-  auto it = stashed_frames_.begin();
-  while (it != stashed_frames_.end()) {
-    if (AheadOf<uint16_t>(cleared_to_seq_num_, (*it)->first_seq_num())) {
-      it = stashed_frames_.erase(it);
-    } else {
-      ++it;
-    }
-  }
+  impl_->ClearTo(seq_num);
 }
 
-void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
-  auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num);
-
-  // If this padding packet "belongs" to a group of pictures that we don't track
-  // anymore, do nothing.
-  if (gop_seq_num_it == last_seq_num_gop_.begin())
-    return;
-  --gop_seq_num_it;
-
-  // Calculate the next contiuous sequence number and search for it in
-  // the padding packets we have stashed.
-  uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1;
-  auto padding_seq_num_it =
-      stashed_padding_.lower_bound(next_seq_num_with_padding);
-
-  // While there still are padding packets and those padding packets are
-  // continuous, then advance the "last-picture-id-with-padding" and remove
-  // the stashed padding packet.
-  while (padding_seq_num_it != stashed_padding_.end() &&
-         *padding_seq_num_it == next_seq_num_with_padding) {
-    gop_seq_num_it->second.second = next_seq_num_with_padding;
-    ++next_seq_num_with_padding;
-    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
-  }
-
-  // In the case where the stream has been continuous without any new keyframes
-  // for a while there is a risk that new frames will appear to be older than
-  // the keyframe they belong to due to wrapping sequence number. In order
-  // to prevent this we advance the picture id of the keyframe every so often.
-  if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) {
-    auto save = gop_seq_num_it->second;
-    last_seq_num_gop_.clear();
-    last_seq_num_gop_[seq_num] = save;
-  }
-}
-
-RtpFrameReferenceFinder::FrameDecision
-RtpFrameReferenceFinder::ManageFrameGeneric(
-    RtpFrameObject* frame,
-    const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
-  frame->id.picture_id = descriptor.frame_id;
-  frame->SetSpatialIndex(descriptor.spatial_index);
-
-  if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
-    RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
-    return kDrop;
-  }
-
-  frame->num_references = descriptor.dependencies.size();
-  for (size_t i = 0; i < descriptor.dependencies.size(); ++i)
-    frame->references[i] = descriptor.dependencies[i];
-
-  return kHandOff;
-}
-
-RtpFrameReferenceFinder::FrameDecision
-RtpFrameReferenceFinder::ManageFramePidOrSeqNum(RtpFrameObject* frame,
-                                                int picture_id) {
-  // If |picture_id| is specified then we use that to set the frame references,
-  // otherwise we use sequence number.
-  if (picture_id != kNoPictureId) {
-    frame->id.picture_id = unwrapper_.Unwrap(picture_id & 0x7FFF);
-    frame->num_references =
-        frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1;
-    frame->references[0] = frame->id.picture_id - 1;
-    return kHandOff;
-  }
-
-  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-    last_seq_num_gop_.insert(std::make_pair(
-        frame->last_seq_num(),
-        std::make_pair(frame->last_seq_num(), frame->last_seq_num())));
-  }
-
-  // We have received a frame but not yet a keyframe, stash this frame.
-  if (last_seq_num_gop_.empty())
-    return kStash;
-
-  // Clean up info for old keyframes but make sure to keep info
-  // for the last keyframe.
-  auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
-  for (auto it = last_seq_num_gop_.begin();
-       it != clean_to && last_seq_num_gop_.size() > 1;) {
-    it = last_seq_num_gop_.erase(it);
-  }
-
-  // Find the last sequence number of the last frame for the keyframe
-  // that this frame indirectly references.
-  auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
-  if (seq_num_it == last_seq_num_gop_.begin()) {
-    RTC_LOG(LS_WARNING) << "Generic frame with packet range ["
-                        << frame->first_seq_num() << ", "
-                        << frame->last_seq_num()
-                        << "] has no GoP, dropping frame.";
-    return kDrop;
-  }
-  seq_num_it--;
-
-  // Make sure the packet sequence numbers are continuous, otherwise stash
-  // this frame.
-  uint16_t last_picture_id_gop = seq_num_it->second.first;
-  uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second;
-  if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
-    uint16_t prev_seq_num = frame->first_seq_num() - 1;
-
-    if (prev_seq_num != last_picture_id_with_padding_gop)
-      return kStash;
-  }
-
-  RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
-
-  // Since keyframes can cause reordering we can't simply assign the
-  // picture id according to some incrementing counter.
-  frame->id.picture_id = frame->last_seq_num();
-  frame->num_references =
-      frame->frame_type() == VideoFrameType::kVideoFrameDelta;
-  frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop);
-  if (AheadOf<uint16_t>(frame->id.picture_id, last_picture_id_gop)) {
-    seq_num_it->second.first = frame->id.picture_id;
-    seq_num_it->second.second = frame->id.picture_id;
-  }
-
-  UpdateLastPictureIdWithPadding(frame->id.picture_id);
-  frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
-  return kHandOff;
-}
-
-RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp8(
-    RtpFrameObject* frame) {
-  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
-  const RTPVideoHeaderVP8& codec_header =
-      absl::get<RTPVideoHeaderVP8>(video_header.video_type_header);
-
-  if (codec_header.pictureId == kNoPictureId ||
-      codec_header.temporalIdx == kNoTemporalIdx ||
-      codec_header.tl0PicIdx == kNoTl0PicIdx) {
-    return ManageFramePidOrSeqNum(frame, codec_header.pictureId);
-  }
-
-  // Protect against corrupted packets with arbitrary large temporal idx.
-  if (codec_header.temporalIdx >= kMaxTemporalLayers)
-    return kDrop;
-
-  frame->id.picture_id = codec_header.pictureId & 0x7FFF;
-
-  if (last_picture_id_ == -1)
-    last_picture_id_ = frame->id.picture_id;
-
-  // Clean up info about not yet received frames that are too old.
-  uint16_t old_picture_id =
-      Subtract<kPicIdLength>(frame->id.picture_id, kMaxNotYetReceivedFrames);
-  auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
-  not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
-                                 clean_frames_to);
-  // Avoid re-adding picture ids that were just erased.
-  if (AheadOf<uint16_t, kPicIdLength>(old_picture_id, last_picture_id_)) {
-    last_picture_id_ = old_picture_id;
-  }
-  // Find if there has been a gap in fully received frames and save the picture
-  // id of those frames in |not_yet_received_frames_|.
-  if (AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id, last_picture_id_)) {
-    do {
-      last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
-      not_yet_received_frames_.insert(last_picture_id_);
-    } while (last_picture_id_ != frame->id.picture_id);
-  }
-
-  int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(codec_header.tl0PicIdx & 0xFF);
-
-  // Clean up info for base layers that are too old.
-  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo;
-  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
-  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
-
-  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-    if (codec_header.temporalIdx != 0) {
-      return kDrop;
-    }
-    frame->num_references = 0;
-    layer_info_[unwrapped_tl0].fill(-1);
-    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
-    return kHandOff;
-  }
-
-  auto layer_info_it = layer_info_.find(
-      codec_header.temporalIdx == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0);
-
-  // If we don't have the base layer frame yet, stash this frame.
-  if (layer_info_it == layer_info_.end())
-    return kStash;
-
-  // A non keyframe base layer frame has been received, copy the layer info
-  // from the previous base layer frame and set a reference to the previous
-  // base layer frame.
-  if (codec_header.temporalIdx == 0) {
-    layer_info_it =
-        layer_info_.emplace(unwrapped_tl0, layer_info_it->second).first;
-    frame->num_references = 1;
-    int64_t last_pid_on_layer = layer_info_it->second[0];
-
-    // Is this an old frame that has already been used to update the state? If
-    // so, drop it.
-    if (AheadOrAt<uint16_t, kPicIdLength>(last_pid_on_layer,
-                                          frame->id.picture_id)) {
-      return kDrop;
-    }
-
-    frame->references[0] = last_pid_on_layer;
-    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
-    return kHandOff;
-  }
-
-  // Layer sync frame, this frame only references its base layer frame.
-  if (codec_header.layerSync) {
-    frame->num_references = 1;
-    int64_t last_pid_on_layer = layer_info_it->second[codec_header.temporalIdx];
-
-    // Is this an old frame that has already been used to update the state? If
-    // so, drop it.
-    if (last_pid_on_layer != -1 &&
-        AheadOrAt<uint16_t, kPicIdLength>(last_pid_on_layer,
-                                          frame->id.picture_id)) {
-      return kDrop;
-    }
-
-    frame->references[0] = layer_info_it->second[0];
-    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
-    return kHandOff;
-  }
-
-  // Find all references for this frame.
-  frame->num_references = 0;
-  for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
-    // If we have not yet received a previous frame on this temporal layer,
-    // stash this frame.
-    if (layer_info_it->second[layer] == -1)
-      return kStash;
-
-    // If the last frame on this layer is ahead of this frame it means that
-    // a layer sync frame has been received after this frame for the same
-    // base layer frame, drop this frame.
-    if (AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[layer],
-                                        frame->id.picture_id)) {
-      return kDrop;
-    }
-
-    // If we have not yet received a frame between this frame and the referenced
-    // frame then we have to wait for that frame to be completed first.
-    auto not_received_frame_it =
-        not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
-    if (not_received_frame_it != not_yet_received_frames_.end() &&
-        AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id,
-                                        *not_received_frame_it)) {
-      return kStash;
-    }
-
-    if (!(AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id,
-                                          layer_info_it->second[layer]))) {
-      RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id
-                          << " and packet range [" << frame->first_seq_num()
-                          << ", " << frame->last_seq_num()
-                          << "] already received, "
-                             " dropping frame.";
-      return kDrop;
-    }
-
-    ++frame->num_references;
-    frame->references[layer] = layer_info_it->second[layer];
-  }
-
-  UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
-  return kHandOff;
-}
-
-void RtpFrameReferenceFinder::UpdateLayerInfoVp8(RtpFrameObject* frame,
-                                                 int64_t unwrapped_tl0,
-                                                 uint8_t temporal_idx) {
-  auto layer_info_it = layer_info_.find(unwrapped_tl0);
-
-  // Update this layer info and newer.
-  while (layer_info_it != layer_info_.end()) {
-    if (layer_info_it->second[temporal_idx] != -1 &&
-        AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[temporal_idx],
-                                        frame->id.picture_id)) {
-      // The frame was not newer, then no subsequent layer info have to be
-      // update.
-      break;
-    }
-
-    layer_info_it->second[temporal_idx] = frame->id.picture_id;
-    ++unwrapped_tl0;
-    layer_info_it = layer_info_.find(unwrapped_tl0);
-  }
-  not_yet_received_frames_.erase(frame->id.picture_id);
-
-  UnwrapPictureIds(frame);
-}
-
-RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9(
-    RtpFrameObject* frame) {
-  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
-  const RTPVideoHeaderVP9& codec_header =
-      absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
-
-  if (codec_header.picture_id == kNoPictureId ||
-      codec_header.temporal_idx == kNoTemporalIdx) {
-    return ManageFramePidOrSeqNum(frame, codec_header.picture_id);
-  }
-
-  // Protect against corrupted packets with arbitrary large temporal idx.
-  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
-      codec_header.spatial_idx >= kMaxSpatialLayers)
-    return kDrop;
-
-  frame->id.spatial_layer = codec_header.spatial_idx;
-  frame->inter_layer_predicted = codec_header.inter_layer_predicted;
-  frame->id.picture_id = codec_header.picture_id & 0x7FFF;
-
-  if (last_picture_id_ == -1)
-    last_picture_id_ = frame->id.picture_id;
-
-  if (codec_header.flexible_mode) {
-    if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
-      return kDrop;
-    }
-    frame->num_references = codec_header.num_ref_pics;
+void RtpFrameReferenceFinder::HandOffFrames(ReturnVector frames) {
+  for (auto& frame : frames) {
+    frame->id.picture_id += picture_id_offset_;
     for (size_t i = 0; i < frame->num_references; ++i) {
-      frame->references[i] = Subtract<kPicIdLength>(frame->id.picture_id,
-                                                    codec_header.pid_diff[i]);
+      frame->references[i] += picture_id_offset_;
     }
 
-    UnwrapPictureIds(frame);
-    return kHandOff;
+    frame_callback_->OnCompleteFrame(std::move(frame));
   }
-
-  if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
-    RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
-                           "non-flexible mode.";
-    return kDrop;
-  }
-
-  GofInfo* info;
-  int64_t unwrapped_tl0 =
-      tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
-  if (codec_header.ss_data_available) {
-    if (codec_header.temporal_idx != 0) {
-      RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
-                             "layer frame. Scalability structure ignored.";
-    } else {
-      if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
-        return kDrop;
-      }
-
-      for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
-        if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
-          return kDrop;
-        }
-      }
-
-      GofInfoVP9 gof = codec_header.gof;
-      if (gof.num_frames_in_gof == 0) {
-        RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
-                               "that stream has only one temporal layer.";
-        gof.SetGofInfoVP9(kTemporalStructureMode1);
-      }
-
-      current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
-      scalability_structures_[current_ss_idx_] = gof;
-      scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id;
-      gof_info_.emplace(unwrapped_tl0,
-                        GofInfo(&scalability_structures_[current_ss_idx_],
-                                frame->id.picture_id));
-    }
-
-    const auto gof_info_it = gof_info_.find(unwrapped_tl0);
-    if (gof_info_it == gof_info_.end())
-      return kStash;
-
-    info = &gof_info_it->second;
-
-    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-      frame->num_references = 0;
-      FrameReceivedVp9(frame->id.picture_id, info);
-      UnwrapPictureIds(frame);
-      return kHandOff;
-    }
-  } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-    if (frame->id.spatial_layer == 0) {
-      RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
-      return kDrop;
-    }
-    const auto gof_info_it = gof_info_.find(unwrapped_tl0);
-    if (gof_info_it == gof_info_.end())
-      return kStash;
-
-    info = &gof_info_it->second;
-
-    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
-      frame->num_references = 0;
-      FrameReceivedVp9(frame->id.picture_id, info);
-      UnwrapPictureIds(frame);
-      return kHandOff;
-    }
-  } else {
-    auto gof_info_it = gof_info_.find(
-        (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
-
-    // Gof info for this frame is not available yet, stash this frame.
-    if (gof_info_it == gof_info_.end())
-      return kStash;
-
-    if (codec_header.temporal_idx == 0) {
-      gof_info_it = gof_info_
-                        .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof,
-                                                        frame->id.picture_id))
-                        .first;
-    }
-
-    info = &gof_info_it->second;
-  }
-
-  // Clean up info for base layers that are too old.
-  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
-  auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
-  gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
-
-  FrameReceivedVp9(frame->id.picture_id, info);
-
-  // Make sure we don't miss any frame that could potentially have the
-  // up switch flag set.
-  if (MissingRequiredFrameVp9(frame->id.picture_id, *info))
-    return kStash;
-
-  if (codec_header.temporal_up_switch)
-    up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx);
-
-  // Clean out old info about up switch frames.
-  uint16_t old_picture_id = Subtract<kPicIdLength>(frame->id.picture_id, 50);
-  auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
-  up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
-
-  size_t diff = ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start,
-                                                    frame->id.picture_id);
-  size_t gof_idx = diff % info->gof->num_frames_in_gof;
-
-  if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
-    return kDrop;
-  }
-  // Populate references according to the scalability structure.
-  frame->num_references = info->gof->num_ref_pics[gof_idx];
-  for (size_t i = 0; i < frame->num_references; ++i) {
-    frame->references[i] = Subtract<kPicIdLength>(
-        frame->id.picture_id, info->gof->pid_diff[gof_idx][i]);
-
-    // If this is a reference to a frame earlier than the last up switch point,
-    // then ignore this reference.
-    if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx,
-                              frame->references[i])) {
-      --frame->num_references;
-    }
-  }
-
-  // Override GOF references.
-  if (!codec_header.inter_pic_predicted) {
-    frame->num_references = 0;
-  }
-
-  UnwrapPictureIds(frame);
-  return kHandOff;
-}
-
-bool RtpFrameReferenceFinder::MissingRequiredFrameVp9(uint16_t picture_id,
-                                                      const GofInfo& info) {
-  size_t diff =
-      ForwardDiff<uint16_t, kPicIdLength>(info.gof->pid_start, picture_id);
-  size_t gof_idx = diff % info.gof->num_frames_in_gof;
-  size_t temporal_idx = info.gof->temporal_idx[gof_idx];
-
-  if (temporal_idx >= kMaxTemporalLayers) {
-    RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
-                        << " temporal "
-                           "layers are supported.";
-    return true;
-  }
-
-  // For every reference this frame has, check if there is a frame missing in
-  // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
-  // layers. If so, we are missing a required frame.
-  uint8_t num_references = info.gof->num_ref_pics[gof_idx];
-  for (size_t i = 0; i < num_references; ++i) {
-    uint16_t ref_pid =
-        Subtract<kPicIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
-    for (size_t l = 0; l < temporal_idx; ++l) {
-      auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
-      if (missing_frame_it != missing_frames_for_layer_[l].end() &&
-          AheadOf<uint16_t, kPicIdLength>(picture_id, *missing_frame_it)) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-void RtpFrameReferenceFinder::FrameReceivedVp9(uint16_t picture_id,
-                                               GofInfo* info) {
-  int last_picture_id = info->last_picture_id;
-  size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
-
-  // If there is a gap, find which temporal layer the missing frames
-  // belong to and add the frame as missing for that temporal layer.
-  // Otherwise, remove this frame from the set of missing frames.
-  if (AheadOf<uint16_t, kPicIdLength>(picture_id, last_picture_id)) {
-    size_t diff = ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start,
-                                                      last_picture_id);
-    size_t gof_idx = diff % gof_size;
-
-    last_picture_id = Add<kPicIdLength>(last_picture_id, 1);
-    while (last_picture_id != picture_id) {
-      gof_idx = (gof_idx + 1) % gof_size;
-      RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
-
-      size_t temporal_idx = info->gof->temporal_idx[gof_idx];
-      if (temporal_idx >= kMaxTemporalLayers) {
-        RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
-                            << " temporal "
-                               "layers are supported.";
-        return;
-      }
-
-      missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
-      last_picture_id = Add<kPicIdLength>(last_picture_id, 1);
-    }
-
-    info->last_picture_id = last_picture_id;
-  } else {
-    size_t diff =
-        ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start, picture_id);
-    size_t gof_idx = diff % gof_size;
-    RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
-
-    size_t temporal_idx = info->gof->temporal_idx[gof_idx];
-    if (temporal_idx >= kMaxTemporalLayers) {
-      RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
-                          << " temporal "
-                             "layers are supported.";
-      return;
-    }
-
-    missing_frames_for_layer_[temporal_idx].erase(picture_id);
-  }
-}
-
-bool RtpFrameReferenceFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
-                                                    uint8_t temporal_idx,
-                                                    uint16_t pid_ref) {
-  for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
-       up_switch_it != up_switch_.end() &&
-       AheadOf<uint16_t, kPicIdLength>(picture_id, up_switch_it->first);
-       ++up_switch_it) {
-    if (up_switch_it->second < temporal_idx)
-      return true;
-  }
-
-  return false;
-}
-
-void RtpFrameReferenceFinder::UnwrapPictureIds(RtpFrameObject* frame) {
-  for (size_t i = 0; i < frame->num_references; ++i)
-    frame->references[i] = unwrapper_.Unwrap(frame->references[i]);
-  frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
 }
 
 }  // namespace video_coding
diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h
index 8be051c8bc..c7ee07e215 100644
--- a/modules/video_coding/rtp_frame_reference_finder.h
+++ b/modules/video_coding/rtp_frame_reference_finder.h
@@ -11,24 +11,15 @@
 #ifndef MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_
 #define MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_
 
-#include <array>
-#include <deque>
-#include <map>
 #include <memory>
-#include <set>
-#include <utility>
 
-#include "modules/include/module_common_types_public.h"
-#include "modules/rtp_rtcp/source/rtp_video_header.h"
-#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
-#include "rtc_base/numerics/sequence_number_util.h"
-#include "rtc_base/thread_annotations.h"
+#include "modules/video_coding/frame_object.h"
 
 namespace webrtc {
 namespace video_coding {
-
-class EncodedFrame;
-class RtpFrameObject;
+namespace internal {
+class RtpFrameReferenceFinderImpl;
+}  // namespace internal
 
 // A complete frame is a frame which has received all its packets and all its
 // references are known.
@@ -40,6 +31,8 @@ class OnCompleteFrameCallback {
 
 class RtpFrameReferenceFinder {
  public:
+  using ReturnVector = absl::InlinedVector<std::unique_ptr<RtpFrameObject>, 3>;
+
   explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback);
   explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback,
                                    int64_t picture_id_offset);
@@ -61,145 +54,15 @@ class RtpFrameReferenceFinder {
   void ClearTo(uint16_t seq_num);
 
  private:
-  static const uint16_t kPicIdLength = 1 << 15;
-  static const uint8_t kMaxTemporalLayers = 5;
-  static const int kMaxLayerInfo = 50;
-  static const int kMaxStashedFrames = 100;
-  static const int kMaxNotYetReceivedFrames = 100;
-  static const int kMaxGofSaved = 50;
-  static const int kMaxPaddingAge = 100;
-
-  enum FrameDecision { kStash, kHandOff, kDrop };
-
-  struct GofInfo {
-    GofInfo(GofInfoVP9* gof, uint16_t last_picture_id)
-        : gof(gof), last_picture_id(last_picture_id) {}
-    GofInfoVP9* gof;
-    uint16_t last_picture_id;
-  };
-
-  // Find the relevant group of pictures and update its "last-picture-id-with
-  // padding" sequence number.
-  void UpdateLastPictureIdWithPadding(uint16_t seq_num);
-
-  // Retry stashed frames until no more complete frames are found.
-  void RetryStashedFrames();
-
-  void HandOffFrame(std::unique_ptr<RtpFrameObject> frame);
-
-  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
-
-  FrameDecision ManageFrameGeneric(
-      RtpFrameObject* frame,
-      const RTPVideoHeader::GenericDescriptorInfo& descriptor);
-
-  // Find references for frames with no or very limited information in the
-  // descriptor. If |picture_id| is unspecified then packet sequence numbers
-  // will be used to determine the references of the frames.
-  FrameDecision ManageFramePidOrSeqNum(RtpFrameObject* frame, int picture_id);
-
-  // Find references for Vp8 frames
-  FrameDecision ManageFrameVp8(RtpFrameObject* frame);
-
-  // Updates necessary layer info state used to determine frame references for
-  // Vp8.
-  void UpdateLayerInfoVp8(RtpFrameObject* frame,
-                          int64_t unwrapped_tl0,
-                          uint8_t temporal_idx);
-
-  // Find references for Vp9 frames
-  FrameDecision ManageFrameVp9(RtpFrameObject* frame);
-
-  // Check if we are missing a frame necessary to determine the references
-  // for this frame.
-  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
-
-  // Updates which frames that have been received. If there is a gap,
-  // missing frames will be added to |missing_frames_for_layer_| or
-  // if this is an already missing frame then it will be removed.
-  void FrameReceivedVp9(uint16_t picture_id, GofInfo* info);
-
-  // Check if there is a frame with the up-switch flag set in the interval
-  // (|pid_ref|, |picture_id|) with temporal layer smaller than |temporal_idx|.
-  bool UpSwitchInIntervalVp9(uint16_t picture_id,
-                             uint8_t temporal_idx,
-                             uint16_t pid_ref);
-
-  // Unwrap |frame|s picture id and its references to 16 bits.
-  void UnwrapPictureIds(RtpFrameObject* frame);
-
-  // For every group of pictures, hold two sequence numbers. The first being
-  // the sequence number of the last packet of the last completed frame, and
-  // the second being the sequence number of the last packet of the last
-  // completed frame advanced by any potential continuous packets of padding.
-  std::map<uint16_t,
-           std::pair<uint16_t, uint16_t>,
-           DescendingSeqNumComp<uint16_t>>
-      last_seq_num_gop_;
-
-  // Save the last picture id in order to detect when there is a gap in frames
-  // that have not yet been fully received.
-  int last_picture_id_;
-
-  // Padding packets that have been received but that are not yet continuous
-  // with any group of pictures.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> stashed_padding_;
-
-  // Frames earlier than the last received frame that have not yet been
-  // fully received.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
-      not_yet_received_frames_;
-
-  // Sequence numbers of frames earlier than the last received frame that
-  // have not yet been fully received.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> not_yet_received_seq_num_;
-
-  // Frames that have been fully received but didn't have all the information
-  // needed to determine their references.
-  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
-
-  // Holds the information about the last completed frame for a given temporal
-  // layer given an unwrapped Tl0 picture index.
-  std::map<int64_t, std::array<int64_t, kMaxTemporalLayers>> layer_info_;
-
-  // Where the current scalability structure is in the
-  // |scalability_structures_| array.
-  uint8_t current_ss_idx_;
-
-  // Holds received scalability structures.
-  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_;
-
-  // Holds the the Gof information for a given unwrapped TL0 picture index.
-  std::map<int64_t, GofInfo> gof_info_;
-
-  // Keep track of which picture id and which temporal layer that had the
-  // up switch flag set.
-  std::map<uint16_t, uint8_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
-      up_switch_;
-
-  // For every temporal layer, keep a set of which frames that are missing.
-  std::array<std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>,
-             kMaxTemporalLayers>
-      missing_frames_for_layer_;
-
-  // How far frames have been cleared by sequence number. A frame will be
-  // cleared if it contains a packet with a sequence number older than
-  // |cleared_to_seq_num_|.
-  int cleared_to_seq_num_;
-
-  OnCompleteFrameCallback* frame_callback_;
-
-  // Unwrapper used to unwrap generic RTP streams. In a generic stream we derive
-  // a picture id from the packet sequence number.
-  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_;
-
-  // Unwrapper used to unwrap VP8/VP9 streams which have their picture id
-  // specified.
-  SeqNumUnwrapper<uint16_t, kPicIdLength> unwrapper_;
-
-  SeqNumUnwrapper<uint8_t> tl0_unwrapper_;
+  void HandOffFrames(ReturnVector frames);
 
+  // How far frames have been cleared out of the buffer by RTP sequence number.
+  // A frame will be cleared if it contains a packet with a sequence number
+  // older than |cleared_to_seq_num_|.
+  int cleared_to_seq_num_ = -1;
   const int64_t picture_id_offset_;
+  OnCompleteFrameCallback* frame_callback_;
+  std::unique_ptr<internal::RtpFrameReferenceFinderImpl> impl_;
 };
 
 }  // namespace video_coding
diff --git a/modules/video_coding/rtp_generic_ref_finder.cc b/modules/video_coding/rtp_generic_ref_finder.cc
new file mode 100644
index 0000000000..f5603e3ca9
--- /dev/null
+++ b/modules/video_coding/rtp_generic_ref_finder.cc
@@ -0,0 +1,44 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_generic_ref_finder.h"
+
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::ReturnVector RtpGenericFrameRefFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame,
+    const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
+  // Frame IDs are unwrapped in the RtpVideoStreamReceiver, no need to unwrap
+  // them here.
+  frame->id.picture_id = descriptor.frame_id;
+  frame->SetSpatialIndex(descriptor.spatial_index);
+
+  RtpFrameReferenceFinder::ReturnVector res;
+  if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
+    RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
+    return res;
+  }
+
+  frame->num_references = descriptor.dependencies.size();
+  for (size_t i = 0; i < descriptor.dependencies.size(); ++i) {
+    frame->references[i] = descriptor.dependencies[i];
+  }
+
+  res.push_back(std::move(frame));
+  return res;
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/modules/video_coding/rtp_generic_ref_finder.h b/modules/video_coding/rtp_generic_ref_finder.h
new file mode 100644
index 0000000000..278de2635e
--- /dev/null
+++ b/modules/video_coding/rtp_generic_ref_finder.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_
+#define MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_
+
+#include <memory>
+
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpGenericFrameRefFinder {
+ public:
+  RtpGenericFrameRefFinder() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame,
+      const RTPVideoHeader::GenericDescriptorInfo& descriptor);
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_
diff --git a/modules/video_coding/rtp_seq_num_only_ref_finder.cc b/modules/video_coding/rtp_seq_num_only_ref_finder.cc
new file mode 100644
index 0000000000..7177a14be3
--- /dev/null
+++ b/modules/video_coding/rtp_seq_num_only_ref_finder.cc
@@ -0,0 +1,187 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_seq_num_only_ref_finder.h"
+
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::ReturnVector RtpSeqNumOnlyRefFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  FrameDecision decision = ManageFrameInternal(frame.get());
+
+  RtpFrameReferenceFinder::ReturnVector res;
+  switch (decision) {
+    case kStash:
+      if (stashed_frames_.size() > kMaxStashedFrames)
+        stashed_frames_.pop_back();
+      stashed_frames_.push_front(std::move(frame));
+      return res;
+    case kHandOff:
+      res.push_back(std::move(frame));
+      RetryStashedFrames(res);
+      return res;
+    case kDrop:
+      return res;
+  }
+
+  return res;
+}
+
+RtpSeqNumOnlyRefFinder::FrameDecision
+RtpSeqNumOnlyRefFinder::ManageFrameInternal(RtpFrameObject* frame) {
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    last_seq_num_gop_.insert(std::make_pair(
+        frame->last_seq_num(),
+        std::make_pair(frame->last_seq_num(), frame->last_seq_num())));
+  }
+
+  // We have received a frame but not yet a keyframe, stash this frame.
+  if (last_seq_num_gop_.empty())
+    return kStash;
+
+  // Clean up info for old keyframes but make sure to keep info
+  // for the last keyframe.
+  auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
+  for (auto it = last_seq_num_gop_.begin();
+       it != clean_to && last_seq_num_gop_.size() > 1;) {
+    it = last_seq_num_gop_.erase(it);
+  }
+
+  // Find the last sequence number of the last frame for the keyframe
+  // that this frame indirectly references.
+  auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
+  if (seq_num_it == last_seq_num_gop_.begin()) {
+    RTC_LOG(LS_WARNING) << "Generic frame with packet range ["
+                        << frame->first_seq_num() << ", "
+                        << frame->last_seq_num()
+                        << "] has no GoP, dropping frame.";
+    return kDrop;
+  }
+  seq_num_it--;
+
+  // Make sure the packet sequence numbers are continuous, otherwise stash
+  // this frame.
+  uint16_t last_picture_id_gop = seq_num_it->second.first;
+  uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second;
+  if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
+    uint16_t prev_seq_num = frame->first_seq_num() - 1;
+
+    if (prev_seq_num != last_picture_id_with_padding_gop)
+      return kStash;
+  }
+
+  RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
+
+  // Since keyframes can cause reordering we can't simply assign the
+  // picture id according to some incrementing counter.
+  frame->id.picture_id = frame->last_seq_num();
+  frame->num_references =
+      frame->frame_type() == VideoFrameType::kVideoFrameDelta;
+  frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop);
+  if (AheadOf<uint16_t>(frame->id.picture_id, last_picture_id_gop)) {
+    seq_num_it->second.first = frame->id.picture_id;
+    seq_num_it->second.second = frame->id.picture_id;
+  }
+
+  UpdateLastPictureIdWithPadding(frame->id.picture_id);
+  frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
+  return kHandOff;
+}
+
+void RtpSeqNumOnlyRefFinder::RetryStashedFrames(
+    RtpFrameReferenceFinder::ReturnVector& res) {
+  bool complete_frame = false;
+  do {
+    complete_frame = false;
+    for (auto frame_it = stashed_frames_.begin();
+         frame_it != stashed_frames_.end();) {
+      FrameDecision decision = ManageFrameInternal(frame_it->get());
+
+      switch (decision) {
+        case kStash:
+          ++frame_it;
+          break;
+        case kHandOff:
+          complete_frame = true;
+          res.push_back(std::move(*frame_it));
+          ABSL_FALLTHROUGH_INTENDED;
+        case kDrop:
+          frame_it = stashed_frames_.erase(frame_it);
+      }
+    }
+  } while (complete_frame);
+}
+
+void RtpSeqNumOnlyRefFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
+  auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num);
+
+  // If this padding packet "belongs" to a group of pictures that we don't track
+  // anymore, do nothing.
+  if (gop_seq_num_it == last_seq_num_gop_.begin())
+    return;
+  --gop_seq_num_it;
+
+  // Calculate the next contiuous sequence number and search for it in
+  // the padding packets we have stashed.
+  uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1;
+  auto padding_seq_num_it =
+      stashed_padding_.lower_bound(next_seq_num_with_padding);
+
+  // While there still are padding packets and those padding packets are
+  // continuous, then advance the "last-picture-id-with-padding" and remove
+  // the stashed padding packet.
+  while (padding_seq_num_it != stashed_padding_.end() &&
+         *padding_seq_num_it == next_seq_num_with_padding) {
+    gop_seq_num_it->second.second = next_seq_num_with_padding;
+    ++next_seq_num_with_padding;
+    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
+  }
+
+  // In the case where the stream has been continuous without any new keyframes
+  // for a while there is a risk that new frames will appear to be older than
+  // the keyframe they belong to due to wrapping sequence number. In order
+  // to prevent this we advance the picture id of the keyframe every so often.
+  if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) {
+    auto save = gop_seq_num_it->second;
+    last_seq_num_gop_.clear();
+    last_seq_num_gop_[seq_num] = save;
+  }
+}
+
+RtpFrameReferenceFinder::ReturnVector RtpSeqNumOnlyRefFinder::PaddingReceived(
+    uint16_t seq_num) {
+  auto clean_padding_to =
+      stashed_padding_.lower_bound(seq_num - kMaxPaddingAge);
+  stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to);
+  stashed_padding_.insert(seq_num);
+  UpdateLastPictureIdWithPadding(seq_num);
+  RtpFrameReferenceFinder::ReturnVector res;
+  RetryStashedFrames(res);
+  return res;
+}
+
+void RtpSeqNumOnlyRefFinder::ClearTo(uint16_t seq_num) {
+  auto it = stashed_frames_.begin();
+  while (it != stashed_frames_.end()) {
+    if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+      it = stashed_frames_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/modules/video_coding/rtp_seq_num_only_ref_finder.h b/modules/video_coding/rtp_seq_num_only_ref_finder.h
new file mode 100644
index 0000000000..1b0cc7722a
--- /dev/null
+++ b/modules/video_coding/rtp_seq_num_only_ref_finder.h
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_
+#define MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+#include <utility>
+
+#include "absl/container/inlined_vector.h"
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+#include "rtc_base/numerics/sequence_number_util.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpSeqNumOnlyRefFinder {
+ public:
+  RtpSeqNumOnlyRefFinder() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame);
+  RtpFrameReferenceFinder::ReturnVector PaddingReceived(uint16_t seq_num);
+  void ClearTo(uint16_t seq_num);
+
+ private:
+  static constexpr int kMaxStashedFrames = 100;
+  static constexpr int kMaxPaddingAge = 100;
+
+  enum FrameDecision { kStash, kHandOff, kDrop };
+
+  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+  void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);
+  void UpdateLastPictureIdWithPadding(uint16_t seq_num);
+
+  // For every group of pictures, hold two sequence numbers. The first being
+  // the sequence number of the last packet of the last completed frame, and
+  // the second being the sequence number of the last packet of the last
+  // completed frame advanced by any potential continuous packets of padding.
+  std::map<uint16_t,
+           std::pair<uint16_t, uint16_t>,
+           DescendingSeqNumComp<uint16_t>>
+      last_seq_num_gop_;
+
+  // Padding packets that have been received but that are not yet continuous
+  // with any group of pictures.
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> stashed_padding_;
+
+  // Frames that have been fully received but didn't have all the information
+  // needed to determine their references.
+  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+
+  // Unwrapper used to unwrap generic RTP streams. In a generic stream we derive
+  // a picture id from the packet sequence number.
+  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_;
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_
diff --git a/modules/video_coding/rtp_vp8_ref_finder.cc b/modules/video_coding/rtp_vp8_ref_finder.cc
new file mode 100644
index 0000000000..341bba90a4
--- /dev/null
+++ b/modules/video_coding/rtp_vp8_ref_finder.cc
@@ -0,0 +1,250 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_vp8_ref_finder.h"
+
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::ReturnVector RtpVp8RefFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  FrameDecision decision = ManageFrameInternal(frame.get());
+
+  RtpFrameReferenceFinder::ReturnVector res;
+  switch (decision) {
+    case kStash:
+      if (stashed_frames_.size() > kMaxStashedFrames)
+        stashed_frames_.pop_back();
+      stashed_frames_.push_front(std::move(frame));
+      return res;
+    case kHandOff:
+      res.push_back(std::move(frame));
+      RetryStashedFrames(res);
+      return res;
+    case kDrop:
+      return res;
+  }
+
+  return res;
+}
+
+RtpVp8RefFinder::FrameDecision RtpVp8RefFinder::ManageFrameInternal(
+    RtpFrameObject* frame) {
+  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
+  const RTPVideoHeaderVP8& codec_header =
+      absl::get<RTPVideoHeaderVP8>(video_header.video_type_header);
+
+  // Protect against corrupted packets with arbitrary large temporal idx.
+  if (codec_header.temporalIdx >= kMaxTemporalLayers)
+    return kDrop;
+
+  frame->id.picture_id = codec_header.pictureId & 0x7FFF;
+
+  if (last_picture_id_ == -1)
+    last_picture_id_ = frame->id.picture_id;
+
+  // Clean up info about not yet received frames that are too old.
+  uint16_t old_picture_id =
+      Subtract<kFrameIdLength>(frame->id.picture_id, kMaxNotYetReceivedFrames);
+  auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
+  not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
+                                 clean_frames_to);
+  // Avoid re-adding picture ids that were just erased.
+  if (AheadOf<uint16_t, kFrameIdLength>(old_picture_id, last_picture_id_)) {
+    last_picture_id_ = old_picture_id;
+  }
+  // Find if there has been a gap in fully received frames and save the picture
+  // id of those frames in |not_yet_received_frames_|.
+  if (AheadOf<uint16_t, kFrameIdLength>(frame->id.picture_id,
+                                        last_picture_id_)) {
+    do {
+      last_picture_id_ = Add<kFrameIdLength>(last_picture_id_, 1);
+      not_yet_received_frames_.insert(last_picture_id_);
+    } while (last_picture_id_ != frame->id.picture_id);
+  }
+
+  int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(codec_header.tl0PicIdx & 0xFF);
+
+  // Clean up info for base layers that are too old.
+  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo;
+  auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
+  layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
+
+  if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    if (codec_header.temporalIdx != 0) {
+      return kDrop;
+    }
+    frame->num_references = 0;
+    layer_info_[unwrapped_tl0].fill(-1);
+    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
+    return kHandOff;
+  }
+
+  auto layer_info_it = layer_info_.find(
+      codec_header.temporalIdx == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0);
+
+  // If we don't have the base layer frame yet, stash this frame.
+  if (layer_info_it == layer_info_.end())
+    return kStash;
+
+  // A non keyframe base layer frame has been received, copy the layer info
+  // from the previous base layer frame and set a reference to the previous
+  // base layer frame.
+  if (codec_header.temporalIdx == 0) {
+    layer_info_it =
+        layer_info_.emplace(unwrapped_tl0, layer_info_it->second).first;
+    frame->num_references = 1;
+    int64_t last_pid_on_layer = layer_info_it->second[0];
+
+    // Is this an old frame that has already been used to update the state? If
+    // so, drop it.
+    if (AheadOrAt<uint16_t, kFrameIdLength>(last_pid_on_layer,
+                                            frame->id.picture_id)) {
+      return kDrop;
+    }
+
+    frame->references[0] = last_pid_on_layer;
+    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
+    return kHandOff;
+  }
+
+  // Layer sync frame, this frame only references its base layer frame.
+  if (codec_header.layerSync) {
+    frame->num_references = 1;
+    int64_t last_pid_on_layer = layer_info_it->second[codec_header.temporalIdx];
+
+    // Is this an old frame that has already been used to update the state? If
+    // so, drop it.
+    if (last_pid_on_layer != -1 &&
+        AheadOrAt<uint16_t, kFrameIdLength>(last_pid_on_layer,
+                                            frame->id.picture_id)) {
+      return kDrop;
+    }
+
+    frame->references[0] = layer_info_it->second[0];
+    UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
+    return kHandOff;
+  }
+
+  // Find all references for this frame.
+  frame->num_references = 0;
+  for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
+    // If we have not yet received a previous frame on this temporal layer,
+    // stash this frame.
+    if (layer_info_it->second[layer] == -1)
+      return kStash;
+
+    // If the last frame on this layer is ahead of this frame it means that
+    // a layer sync frame has been received after this frame for the same
+    // base layer frame, drop this frame.
+    if (AheadOf<uint16_t, kFrameIdLength>(layer_info_it->second[layer],
+                                          frame->id.picture_id)) {
+      return kDrop;
+    }
+
+    // If we have not yet received a frame between this frame and the referenced
+    // frame then we have to wait for that frame to be completed first.
+    auto not_received_frame_it =
+        not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
+    if (not_received_frame_it != not_yet_received_frames_.end() &&
+        AheadOf<uint16_t, kFrameIdLength>(frame->id.picture_id,
+                                          *not_received_frame_it)) {
+      return kStash;
+    }
+
+    if (!(AheadOf<uint16_t, kFrameIdLength>(frame->id.picture_id,
+                                            layer_info_it->second[layer]))) {
+      RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id
+                          << " and packet range [" << frame->first_seq_num()
+                          << ", " << frame->last_seq_num()
+                          << "] already received, "
+                             " dropping frame.";
+      return kDrop;
+    }
+
+    ++frame->num_references;
+    frame->references[layer] = layer_info_it->second[layer];
+  }
+
+  UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
+  return kHandOff;
+}
+
+void RtpVp8RefFinder::UpdateLayerInfoVp8(RtpFrameObject* frame,
+                                         int64_t unwrapped_tl0,
+                                         uint8_t temporal_idx) {
+  auto layer_info_it = layer_info_.find(unwrapped_tl0);
+
+  // Update this layer info and newer.
+  while (layer_info_it != layer_info_.end()) {
+    if (layer_info_it->second[temporal_idx] != -1 &&
+        AheadOf<uint16_t, kFrameIdLength>(layer_info_it->second[temporal_idx],
+                                          frame->id.picture_id)) {
+      // The frame was not newer, then no subsequent layer info have to be
+      // update.
+      break;
+    }
+
+    layer_info_it->second[temporal_idx] = frame->id.picture_id;
+    ++unwrapped_tl0;
+    layer_info_it = layer_info_.find(unwrapped_tl0);
+  }
+  not_yet_received_frames_.erase(frame->id.picture_id);
+
+  UnwrapPictureIds(frame);
+}
+
+void RtpVp8RefFinder::RetryStashedFrames(
+    RtpFrameReferenceFinder::ReturnVector& res) {
+  bool complete_frame = false;
+  do {
+    complete_frame = false;
+    for (auto frame_it = stashed_frames_.begin();
+         frame_it != stashed_frames_.end();) {
+      FrameDecision decision = ManageFrameInternal(frame_it->get());
+
+      switch (decision) {
+        case kStash:
+          ++frame_it;
+          break;
+        case kHandOff:
+          complete_frame = true;
+          res.push_back(std::move(*frame_it));
+          ABSL_FALLTHROUGH_INTENDED;
+        case kDrop:
+          frame_it = stashed_frames_.erase(frame_it);
+      }
+    }
+  } while (complete_frame);
+}
+
+void RtpVp8RefFinder::UnwrapPictureIds(RtpFrameObject* frame) {
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = unwrapper_.Unwrap(frame->references[i]);
+  frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
+}
+
+void RtpVp8RefFinder::ClearTo(uint16_t seq_num) {
+  auto it = stashed_frames_.begin();
+  while (it != stashed_frames_.end()) {
+    if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+      it = stashed_frames_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/modules/video_coding/rtp_vp8_ref_finder.h b/modules/video_coding/rtp_vp8_ref_finder.h
new file mode 100644
index 0000000000..55d2de921e
--- /dev/null
+++ b/modules/video_coding/rtp_vp8_ref_finder.h
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_
+#define MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+
+#include "absl/container/inlined_vector.h"
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+#include "rtc_base/numerics/sequence_number_util.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpVp8RefFinder {
+ public:
+  RtpVp8RefFinder() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame);
+  void ClearTo(uint16_t seq_num);
+
+ private:
+  static constexpr int kFrameIdLength = 1 << 15;
+  static constexpr int kMaxLayerInfo = 50;
+  static constexpr int kMaxNotYetReceivedFrames = 100;
+  static constexpr int kMaxStashedFrames = 100;
+  static constexpr int kMaxTemporalLayers = 5;
+
+  enum FrameDecision { kStash, kHandOff, kDrop };
+
+  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+  void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);
+  void UpdateLayerInfoVp8(RtpFrameObject* frame,
+                          int64_t unwrapped_tl0,
+                          uint8_t temporal_idx);
+  void UnwrapPictureIds(RtpFrameObject* frame);
+
+  // Save the last picture id in order to detect when there is a gap in frames
+  // that have not yet been fully received.
+  int last_picture_id_ = -1;
+
+  // Frames earlier than the last received frame that have not yet been
+  // fully received.
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t, kFrameIdLength>>
+      not_yet_received_frames_;
+
+  // Frames that have been fully received but didn't have all the information
+  // needed to determine their references.
+  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+
+  // Holds the information about the last completed frame for a given temporal
+  // layer given an unwrapped Tl0 picture index.
+  std::map<int64_t, std::array<int64_t, kMaxTemporalLayers>> layer_info_;
+
+  // Unwrapper used to unwrap VP8/VP9 streams which have their picture id
+  // specified.
+  SeqNumUnwrapper<uint16_t, kFrameIdLength> unwrapper_;
+
+  SeqNumUnwrapper<uint8_t> tl0_unwrapper_;
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_
diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc
new file mode 100644
index 0000000000..a725a269f0
--- /dev/null
+++ b/modules/video_coding/rtp_vp9_ref_finder.cc
@@ -0,0 +1,347 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/rtp_vp9_ref_finder.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace video_coding {
+
+RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  FrameDecision decision = ManageFrameInternal(frame.get());
+
+  RtpFrameReferenceFinder::ReturnVector res;
+  switch (decision) {
+    case kStash:
+      if (stashed_frames_.size() > kMaxStashedFrames)
+        stashed_frames_.pop_back();
+      stashed_frames_.push_front(std::move(frame));
+      return res;
+    case kHandOff:
+      res.push_back(std::move(frame));
+      RetryStashedFrames(res);
+      return res;
+    case kDrop:
+      return res;
+  }
+
+  return res;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
+    RtpFrameObject* frame) {
+  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
+  const RTPVideoHeaderVP9& codec_header =
+      absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
+
+  // Protect against corrupted packets with arbitrary large temporal idx.
+  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
+      codec_header.spatial_idx >= kMaxSpatialLayers)
+    return kDrop;
+
+  frame->id.spatial_layer = codec_header.spatial_idx;
+  frame->inter_layer_predicted = codec_header.inter_layer_predicted;
+  frame->id.picture_id = codec_header.picture_id & (kFrameIdLength - 1);
+
+  if (last_picture_id_ == -1)
+    last_picture_id_ = frame->id.picture_id;
+
+  if (codec_header.flexible_mode) {
+    if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
+      return kDrop;
+    }
+    frame->num_references = codec_header.num_ref_pics;
+    for (size_t i = 0; i < frame->num_references; ++i) {
+      frame->references[i] = Subtract<kFrameIdLength>(frame->id.picture_id,
+                                                      codec_header.pid_diff[i]);
+    }
+
+    UnwrapPictureIds(frame);
+    return kHandOff;
+  }
+
+  if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
+    RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
+                           "non-flexible mode.";
+    return kDrop;
+  }
+
+  GofInfo* info;
+  int64_t unwrapped_tl0 =
+      tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
+  if (codec_header.ss_data_available) {
+    if (codec_header.temporal_idx != 0) {
+      RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
+                             "layer frame. Scalability structure ignored.";
+    } else {
+      if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
+        return kDrop;
+      }
+
+      for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
+        if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
+          return kDrop;
+        }
+      }
+
+      GofInfoVP9 gof = codec_header.gof;
+      if (gof.num_frames_in_gof == 0) {
+        RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
+                               "that stream has only one temporal layer.";
+        gof.SetGofInfoVP9(kTemporalStructureMode1);
+      }
+
+      current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
+      scalability_structures_[current_ss_idx_] = gof;
+      scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id;
+      gof_info_.emplace(unwrapped_tl0,
+                        GofInfo(&scalability_structures_[current_ss_idx_],
+                                frame->id.picture_id));
+    }
+
+    const auto gof_info_it = gof_info_.find(unwrapped_tl0);
+    if (gof_info_it == gof_info_.end())
+      return kStash;
+
+    info = &gof_info_it->second;
+
+    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+      frame->num_references = 0;
+      FrameReceivedVp9(frame->id.picture_id, info);
+      UnwrapPictureIds(frame);
+      return kHandOff;
+    }
+  } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+    if (frame->id.spatial_layer == 0) {
+      RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
+      return kDrop;
+    }
+    const auto gof_info_it = gof_info_.find(unwrapped_tl0);
+    if (gof_info_it == gof_info_.end())
+      return kStash;
+
+    info = &gof_info_it->second;
+
+    if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
+      frame->num_references = 0;
+      FrameReceivedVp9(frame->id.picture_id, info);
+      UnwrapPictureIds(frame);
+      return kHandOff;
+    }
+  } else {
+    auto gof_info_it = gof_info_.find(
+        (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
+
+    // Gof info for this frame is not available yet, stash this frame.
+    if (gof_info_it == gof_info_.end())
+      return kStash;
+
+    if (codec_header.temporal_idx == 0) {
+      gof_info_it = gof_info_
+                        .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof,
+                                                        frame->id.picture_id))
+                        .first;
+    }
+
+    info = &gof_info_it->second;
+  }
+
+  // Clean up info for base layers that are too old.
+  int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
+  auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
+  gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
+
+  FrameReceivedVp9(frame->id.picture_id, info);
+
+  // Make sure we don't miss any frame that could potentially have the
+  // up switch flag set.
+  if (MissingRequiredFrameVp9(frame->id.picture_id, *info))
+    return kStash;
+
+  if (codec_header.temporal_up_switch)
+    up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx);
+
+  // Clean out old info about up switch frames.
+  uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->id.picture_id, 50);
+  auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
+  up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
+
+  size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
+                                                      frame->id.picture_id);
+  size_t gof_idx = diff % info->gof->num_frames_in_gof;
+
+  if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
+    return kDrop;
+  }
+  // Populate references according to the scalability structure.
+  frame->num_references = info->gof->num_ref_pics[gof_idx];
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] = Subtract<kFrameIdLength>(
+        frame->id.picture_id, info->gof->pid_diff[gof_idx][i]);
+
+    // If this is a reference to a frame earlier than the last up switch point,
+    // then ignore this reference.
+    if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx,
+                              frame->references[i])) {
+      --frame->num_references;
+    }
+  }
+
+  // Override GOF references.
+  if (!codec_header.inter_pic_predicted) {
+    frame->num_references = 0;
+  }
+
+  UnwrapPictureIds(frame);
+  return kHandOff;
+}
+
+bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
+                                              const GofInfo& info) {
+  size_t diff =
+      ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
+  size_t gof_idx = diff % info.gof->num_frames_in_gof;
+  size_t temporal_idx = info.gof->temporal_idx[gof_idx];
+
+  if (temporal_idx >= kMaxTemporalLayers) {
+    RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+                        << " temporal "
+                           "layers are supported.";
+    return true;
+  }
+
+  // For every reference this frame has, check if there is a frame missing in
+  // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
+  // layers. If so, we are missing a required frame.
+  uint8_t num_references = info.gof->num_ref_pics[gof_idx];
+  for (size_t i = 0; i < num_references; ++i) {
+    uint16_t ref_pid =
+        Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
+    for (size_t l = 0; l < temporal_idx; ++l) {
+      auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
+      if (missing_frame_it != missing_frames_for_layer_[l].end() &&
+          AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
+  int last_picture_id = info->last_picture_id;
+  size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
+
+  // If there is a gap, find which temporal layer the missing frames
+  // belong to and add the frame as missing for that temporal layer.
+  // Otherwise, remove this frame from the set of missing frames.
+  if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
+    size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
+                                                        last_picture_id);
+    size_t gof_idx = diff % gof_size;
+
+    last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
+    while (last_picture_id != picture_id) {
+      gof_idx = (gof_idx + 1) % gof_size;
+      RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
+
+      size_t temporal_idx = info->gof->temporal_idx[gof_idx];
+      if (temporal_idx >= kMaxTemporalLayers) {
+        RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+                            << " temporal "
+                               "layers are supported.";
+        return;
+      }
+
+      missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
+      last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
+    }
+
+    info->last_picture_id = last_picture_id;
+  } else {
+    size_t diff =
+        ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
+    size_t gof_idx = diff % gof_size;
+    RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
+
+    size_t temporal_idx = info->gof->temporal_idx[gof_idx];
+    if (temporal_idx >= kMaxTemporalLayers) {
+      RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
+                          << " temporal "
+                             "layers are supported.";
+      return;
+    }
+
+    missing_frames_for_layer_[temporal_idx].erase(picture_id);
+  }
+}
+
+bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
+                                            uint8_t temporal_idx,
+                                            uint16_t pid_ref) {
+  for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
+       up_switch_it != up_switch_.end() &&
+       AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
+       ++up_switch_it) {
+    if (up_switch_it->second < temporal_idx)
+      return true;
+  }
+
+  return false;
+}
+
+void RtpVp9RefFinder::RetryStashedFrames(
+    RtpFrameReferenceFinder::ReturnVector& res) {
+  bool complete_frame = false;
+  do {
+    complete_frame = false;
+    for (auto frame_it = stashed_frames_.begin();
+         frame_it != stashed_frames_.end();) {
+      FrameDecision decision = ManageFrameInternal(frame_it->get());
+
+      switch (decision) {
+        case kStash:
+          ++frame_it;
+          break;
+        case kHandOff:
+          complete_frame = true;
+          res.push_back(std::move(*frame_it));
+          ABSL_FALLTHROUGH_INTENDED;
+        case kDrop:
+          frame_it = stashed_frames_.erase(frame_it);
+      }
+    }
+  } while (complete_frame);
+}
+
+void RtpVp9RefFinder::UnwrapPictureIds(RtpFrameObject* frame) {
+  for (size_t i = 0; i < frame->num_references; ++i)
+    frame->references[i] = unwrapper_.Unwrap(frame->references[i]);
+  frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
+}
+
+void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
+  auto it = stashed_frames_.begin();
+  while (it != stashed_frames_.end()) {
+    if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+      it = stashed_frames_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h
new file mode 100644
index 0000000000..9990c5d684
--- /dev/null
+++ b/modules/video_coding/rtp_vp9_ref_finder.h
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_
+#define MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+
+#include "absl/container/inlined_vector.h"
+#include "modules/video_coding/frame_object.h"
+#include "modules/video_coding/rtp_frame_reference_finder.h"
+#include "rtc_base/numerics/sequence_number_util.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class RtpVp9RefFinder {
+ public:
+  RtpVp9RefFinder() = default;
+
+  RtpFrameReferenceFinder::ReturnVector ManageFrame(
+      std::unique_ptr<RtpFrameObject> frame);
+  void ClearTo(uint16_t seq_num);
+
+ private:
+  static constexpr int kFrameIdLength = 1 << 15;
+  static constexpr int kMaxGofSaved = 50;
+  static constexpr int kMaxLayerInfo = 50;
+  static constexpr int kMaxNotYetReceivedFrames = 100;
+  static constexpr int kMaxStashedFrames = 100;
+  static constexpr int kMaxTemporalLayers = 5;
+
+  enum FrameDecision { kStash, kHandOff, kDrop };
+
+  struct GofInfo {
+    GofInfo(GofInfoVP9* gof, uint16_t last_picture_id)
+        : gof(gof), last_picture_id(last_picture_id) {}
+    GofInfoVP9* gof;
+    uint16_t last_picture_id;
+  };
+
+  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+  void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);
+
+  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
+
+  void FrameReceivedVp9(uint16_t picture_id, GofInfo* info);
+  bool UpSwitchInIntervalVp9(uint16_t picture_id,
+                             uint8_t temporal_idx,
+                             uint16_t pid_ref);
+
+  void UnwrapPictureIds(RtpFrameObject* frame);
+
+  // Save the last picture id in order to detect when there is a gap in frames
+  // that have not yet been fully received.
+  int last_picture_id_ = -1;
+
+  // Frames that have been fully received but didn't have all the information
+  // needed to determine their references.
+  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+
+  // Where the current scalability structure is in the
+  // |scalability_structures_| array.
+  uint8_t current_ss_idx_ = 0;
+
+  // Holds received scalability structures.
+  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_;
+
+  // Holds the the Gof information for a given unwrapped TL0 picture index.
+  std::map<int64_t, GofInfo> gof_info_;
+
+  // Keep track of which picture id and which temporal layer that had the
+  // up switch flag set.
+  std::map<uint16_t, uint8_t, DescendingSeqNumComp<uint16_t, kFrameIdLength>>
+      up_switch_;
+
+  // For every temporal layer, keep a set of which frames that are missing.
+  std::array<std::set<uint16_t, DescendingSeqNumComp<uint16_t, kFrameIdLength>>,
+             kMaxTemporalLayers>
+      missing_frames_for_layer_;
+
+  // Unwrapper used to unwrap VP8/VP9 streams which have their picture id
+  // specified.
+  SeqNumUnwrapper<uint16_t, kFrameIdLength> unwrapper_;
+
+  SeqNumUnwrapper<uint8_t> tl0_unwrapper_;
+};
+
+}  // namespace video_coding
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_