Fix encoded image data injectors.

In encoded image data injectors use the last bytes of the payload, instead of the first. Rewriting first bytes of the video frames payload caused problems as somewhere first are used to check if the frame is key-frame and also to parse QP values. Bug: webrtc:10138 Change-Id: I59b7313ee54a33b31f842ec28ef8d831fe24eea5 Reviewed-on: https://webrtc-review.googlesource.com/c/124490 Commit-Queue: Artem Titov <titovartem@webrtc.org> Reviewed-by: Peter Slatala <psla@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Cr-Commit-Position: refs/heads/master@{#26875}
2019-02-27 13:41:20 +01:00 · 2019-02-27 13:41:20 +01:00 · 1a7a4afd13
commit 1a7a4afd13
parent aec663ed0d
4 changed files with 130 additions and 73 deletions
--- a/test/pc/e2e/analyzer/video/default_encoded_image_data_injector.cc
+++ b/test/pc/e2e/analyzer/video/default_encoded_image_data_injector.cc
@ -10,6 +10,7 @@

 #include "test/pc/e2e/analyzer/video/default_encoded_image_data_injector.h"

+#include <algorithm>
 #include <cstddef>

 #include "absl/memory/memory.h"
@ -30,6 +31,11 @@ constexpr size_t kInitialBufferSize = 2 * 1024;
 constexpr int kPreInitCodingEntitiesCount = 2;
 constexpr size_t kBuffersPoolPerCodingEntity = 256;

+struct ExtractionInfo {
+  size_t length;
+  bool discard;
+};
+
 }  // namespace

 DefaultEncodedImageDataInjector::DefaultEncodedImageDataInjector() {
@ -49,24 +55,26 @@ EncodedImage DefaultEncodedImageDataInjector::InjectData(
  ExtendIfRequired(coding_entity_id);

  EncodedImage out = source;
+  out.Retain();
  std::vector<uint8_t>* buffer = NextBuffer();
  if (buffer->size() < source.size() + kEncodedImageBufferExpansion) {
    buffer->resize(source.size() + kEncodedImageBufferExpansion);
  }
  out.set_buffer(buffer->data(), buffer->size());
  out.set_size(source.size() + kEncodedImageBufferExpansion);
-  memcpy(&out.data()[kEncodedImageBufferExpansion], source.data(),
-         source.size());
-  out.data()[0] = id & 0x00ff;
-  out.data()[1] = (id & 0xff00) >> 8;
-  out.data()[2] = source.size() & 0x000000ff;
-  out.data()[3] = (source.size() & 0x0000ff00) >> 8;
-  out.data()[4] = (source.size() & 0x00ff0000) >> 16;
-  out.data()[5] = (source.size() & 0xff000000) >> 24;
+  memcpy(out.data(), source.data(), source.size());
+  size_t insertion_pos = source.size();
+  out.data()[insertion_pos] = id & 0x00ff;
+  out.data()[insertion_pos + 1] = (id & 0xff00) >> 8;
+  out.data()[insertion_pos + 2] = source.size() & 0x000000ff;
+  out.data()[insertion_pos + 3] = (source.size() & 0x0000ff00) >> 8;
+  out.data()[insertion_pos + 4] = (source.size() & 0x00ff0000) >> 16;
+  out.data()[insertion_pos + 5] = (source.size() & 0xff000000) >> 24;

  // We will store discard flag in the high bit of high byte of the size.
  RTC_CHECK_LT(source.size(), 1U << 31) << "High bit is already in use";
-  out.data()[5] = out.data()[5] | ((discard ? 1 : 0) << 7);
+  out.data()[insertion_pos + 5] =
+      out.data()[insertion_pos + 5] | ((discard ? 1 : 0) << 7);
  return out;
 }

@ -82,36 +90,60 @@ EncodedImageExtractionResult DefaultEncodedImageDataInjector::ExtractData(
  }
  out.set_buffer(buffer->data(), buffer->size());

-  size_t source_pos = 0;
-  size_t out_pos = 0;
+  size_t source_pos = source.size() - 1;
  absl::optional<uint16_t> id = absl::nullopt;
  bool discard = true;
-  while (source_pos < source.size()) {
-    RTC_CHECK_LE(source_pos + kEncodedImageBufferExpansion, source.size());
+  std::vector<ExtractionInfo> extraction_infos;
+  // First make a reverse pass through whole buffer to populate frame id,
+  // discard flags and concatenated encoded images length.
+  while (true) {
+    size_t insertion_pos = source_pos - kEncodedImageBufferExpansion + 1;
+    RTC_CHECK_GE(insertion_pos, 0);
+    RTC_CHECK_LE(insertion_pos + kEncodedImageBufferExpansion, source.size());
    uint16_t next_id =
-        source.data()[source_pos] + (source.data()[source_pos + 1] << 8);
+        source.data()[insertion_pos] + (source.data()[insertion_pos + 1] << 8);
    RTC_CHECK(!id || id.value() == next_id)
        << "Different frames encoded into single encoded image: " << id.value()
        << " vs " << next_id;
    id = next_id;
-    uint32_t length = source.data()[source_pos + 2] +
-                      (source.data()[source_pos + 3] << 8) +
-                      (source.data()[source_pos + 4] << 16) +
-                      ((source.data()[source_pos + 5] << 24) & 0b01111111);
-    bool current_discard = (source.data()[source_pos + 5] & 0b10000000) != 0;
-    RTC_CHECK_LE(source_pos + kEncodedImageBufferExpansion + length,
-                 source.size());
-    if (!current_discard) {
-      // Copy next encoded image payload from concatenated buffer only if it is
-      // not discarded.
-      memcpy(&out.data()[out_pos],
-             &source.data()[source_pos + kEncodedImageBufferExpansion], length);
-      out_pos += length;
-    }
-    source_pos += length + kEncodedImageBufferExpansion;
+    uint32_t length = source.data()[insertion_pos + 2] +
+                      (source.data()[insertion_pos + 3] << 8) +
+                      (source.data()[insertion_pos + 4] << 16) +
+                      ((source.data()[insertion_pos + 5] << 24) & 0b01111111);
+    bool current_discard = (source.data()[insertion_pos + 5] & 0b10000000) != 0;
+    extraction_infos.push_back({length, current_discard});
    // Extraction result is discarded only if all encoded partitions are
    // discarded.
    discard = discard && current_discard;
+    if (source_pos < length + kEncodedImageBufferExpansion) {
+      break;
+    }
+    source_pos -= length + kEncodedImageBufferExpansion;
+  }
+  RTC_CHECK(id);
+  std::reverse(extraction_infos.begin(), extraction_infos.end());
+  if (discard) {
+    out.set_size(0);
+    return EncodedImageExtractionResult{*id, out, true};
+  }
+
+  // Now basing on populated data make a forward pass to copy required pieces
+  // of data to the output buffer.
+  source_pos = 0;
+  size_t out_pos = 0;
+  auto extraction_infos_it = extraction_infos.begin();
+  while (source_pos < source.size()) {
+    const ExtractionInfo& info = *extraction_infos_it;
+    RTC_CHECK_LE(source_pos + kEncodedImageBufferExpansion + info.length,
+                 source.size());
+    if (!info.discard) {
+      // Copy next encoded image payload from concatenated buffer only if it is
+      // not discarded.
+      memcpy(&out.data()[out_pos], &source.data()[source_pos], info.length);
+      out_pos += info.length;
+    }
+    source_pos += info.length + kEncodedImageBufferExpansion;
+    ++extraction_infos_it;
  }
  out.set_size(out_pos);

--- a/test/pc/e2e/analyzer/video/default_encoded_image_data_injector.h
+++ b/test/pc/e2e/analyzer/video/default_encoded_image_data_injector.h
@ -26,34 +26,33 @@ namespace webrtc {
 namespace test {

 // Injects frame id and discard flag into EncodedImage payload buffer. The
-// payload buffer will be prepended in the injector with 2 bytes frame id and 4
+// payload buffer will be appended in the injector with 2 bytes frame id and 4
 // bytes original buffer length. Discarded flag will be put into the highest bit
 // of the length. It is assumed, that frame's data can't be more then 2^31
 // bytes. In the decoder, frame id and discard flag will be extracted and the
-// length will be used to restore original buffer.
+// length will be used to restore original buffer. We can't put this data in the
+// beginning of the payload, because first bytes are used in different parts of
+// WebRTC pipeline.
 //
 // The data in the EncodedImage on encoder side after injection will look like
 // this:
-//        4 bytes frame length + discard flag
-//   _ _ _↓_ _ _ _________________
-//  |   |       | original buffer |
-//   ¯↑¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
-//    2 bytes frame id
+//                         4 bytes frame length + discard flag
+//  _________________ _ _ _↓_ _ _
+// | original buffer |   |       |
+//  ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ¯↑¯ ¯ ¯ ¯ ¯
+//                     2 bytes frame id
 //
 // But on decoder side multiple payloads can be concatenated into single
 // EncodedImage in jitter buffer and its payload will look like this:
-//        _ _ _ _ _ _ _________ _ _ _ _ _ _ _________ _ _ _ _ _ _ _________
-//  buf: |   |       | payload |   |       | payload |   |       | payload |
-//        ¯ ¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯ ¯ ¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯ ¯ ¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯
+//        _________ _ _ _ _ _ _ _________ _ _ _ _ _ _ _________ _ _ _ _ _ _
+//  buf: | payload |   |       | payload |   |       | payload |   |       |
+//        ¯¯¯¯¯¯¯¯¯ ¯ ¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯ ¯ ¯ ¯ ¯ ¯ ¯ ¯¯¯¯¯¯¯¯¯ ¯ ¯ ¯ ¯ ¯ ¯
 // To correctly restore such images we will extract id by this algorithm:
-//   1. pos = 0
-//   2. Extract id from buf[pos] and buf[pos + 1]
-//   3. Extract length from buf[pos + 2]..buf[pos + 5]
-//   4. Extract discard flag from length highest bit.
-//   5. If discard flag is False, copy |length| bytes starting from buf[pos + 6]
-//   to output buffer.
-//   6. pos = pos + length + 6
-//   7. If pos < buf.length - go to the step 2.
+//   1. Make a pass from end to begin of the buffer to restore origin lengths,
+//      frame ids and discard flags from length high bit.
+//   2. If all discard flags are true - discard this encoded image
+//   3. Make a pass from begin to end copying data to the output basing on
+//      previously extracted length
 // Also it will check, that all extracted ids are equals.
 //
 // Because EncodedImage doesn't take ownership of its buffer, injector will keep
--- a/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.cc
+++ b/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.cc
@ -10,6 +10,7 @@

 #include "test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.h"

+#include <algorithm>
 #include <cstddef>

 #include "absl/memory/memory.h"
@ -41,7 +42,8 @@ EncodedImage SingleProcessEncodedImageDataInjector::InjectData(
  ExtractionInfo info;
  info.length = source.size();
  info.discard = discard;
-  memcpy(info.origin_data, source.data(), kUsedBufferSize);
+  size_t insertion_pos = source.size() - kUsedBufferSize;
+  memcpy(info.origin_data, &source.data()[insertion_pos], kUsedBufferSize);
  {
    rtc::CritScope crit(&lock_);
    // Will create new one if missed.
@ -51,9 +53,9 @@ EncodedImage SingleProcessEncodedImageDataInjector::InjectData(
  }

  EncodedImage out = source;
-  out.data()[0] = id & 0x00ff;
-  out.data()[1] = (id & 0xff00) >> 8;
-  out.data()[2] = info.sub_id;
+  out.data()[insertion_pos] = id & 0x00ff;
+  out.data()[insertion_pos + 1] = (id & 0xff00) >> 8;
+  out.data()[insertion_pos + 2] = info.sub_id;
  return out;
 }

@ -67,34 +69,59 @@ EncodedImageExtractionResult SingleProcessEncodedImageDataInjector::ExtractData(
  uint8_t* buffer = out.data();
  size_t size = out.size();

-  size_t pos = 0;
+  // |pos| is pointing to end of current encoded image.
+  size_t pos = size - 1;
  absl::optional<uint16_t> id = absl::nullopt;
  bool discard = true;
-  while (pos < size) {
-    // Extract frame id from first 2 bytes of the payload.
-    uint16_t next_id = buffer[pos] + (buffer[pos + 1] << 8);
-    // Extract frame sub id from second 2 byte of the payload.
-    uint16_t sub_id = buffer[pos + 2];
-
-    RTC_CHECK(!id || id.value() == next_id)
-        << "Different frames encoded into single encoded image: " << id.value()
+  std::vector<ExtractionInfo> extraction_infos;
+  // Go through whole buffer and find all related extraction infos in
+  // order from 1st encoded image to the last.
+  while (true) {
+    size_t insertion_pos = pos - kUsedBufferSize + 1;
+    // Extract frame id from first 2 bytes starting from insertion pos.
+    uint16_t next_id = buffer[insertion_pos] + (buffer[insertion_pos + 1] << 8);
+    // Extract frame sub id from second 3 byte starting from insertion pos.
+    uint16_t sub_id = buffer[insertion_pos + 2];
+    RTC_CHECK(!id || *id == next_id)
+        << "Different frames encoded into single encoded image: " << *id
        << " vs " << next_id;
    id = next_id;
-
    ExtractionInfo info;
    {
      rtc::CritScope crit(&lock_);
      auto ext_vector_it = extraction_cache_.find(next_id);
      RTC_CHECK(ext_vector_it != extraction_cache_.end())
-          << "Unknown frame id " << next_id;
+          << "Unknown frame_id=" << next_id;

      auto info_it = ext_vector_it->second.infos.find(sub_id);
      RTC_CHECK(info_it != ext_vector_it->second.infos.end())
-          << "Unknown sub id " << sub_id << " for frame " << next_id;
+          << "Unknown sub_id=" << sub_id << " for frame_id=" << next_id;
      info = info_it->second;
      ext_vector_it->second.infos.erase(info_it);
    }
+    extraction_infos.push_back(info);
+    // We need to discard encoded image only if all concatenated encoded images
+    // have to be discarded.
+    discard = discard & info.discard;
+    if (pos < info.length) {
+      break;
+    }
+    pos -= info.length;
+  }
+  RTC_CHECK(id);
+  std::reverse(extraction_infos.begin(), extraction_infos.end());
+  if (discard) {
+    out.set_size(0);
+    return EncodedImageExtractionResult{*id, out, true};
+  }

+  // Make a pass from begin to end to restore origin payload and erase discarded
+  // encoded images.
+  pos = 0;
+  auto extraction_infos_it = extraction_infos.begin();
+  while (pos < size) {
+    RTC_DCHECK(extraction_infos_it != extraction_infos.end());
+    const ExtractionInfo& info = *extraction_infos_it;
    if (info.discard) {
      // If this encoded image is marked to be discarded - erase it's payload
      // from the buffer.
@ -102,16 +129,15 @@ EncodedImageExtractionResult SingleProcessEncodedImageDataInjector::ExtractData(
              size - pos - info.length);
      size -= info.length;
    } else {
-      memmove(&buffer[pos], info.origin_data, kUsedBufferSize);
+      memcpy(&buffer[pos + info.length - kUsedBufferSize], info.origin_data,
+             kUsedBufferSize);
      pos += info.length;
    }
-    // We need to discard encoded image only if all concatenated encoded images
-    // have to be discarded.
-    discard = discard & info.discard;
+    ++extraction_infos_it;
  }
  out.set_size(pos);

-  return EncodedImageExtractionResult{id.value(), out, discard};
+  return EncodedImageExtractionResult{*id, out, discard};
 }

 SingleProcessEncodedImageDataInjector::ExtractionInfoVector::
--- a/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.h
+++ b/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.h
@ -28,11 +28,11 @@ namespace test {
 // and uses same QualityAnalyzingVideoContext to obtain
 // EncodedImageDataInjector.
 //
-// To inject frame id and discard flag into EncodedImage injector uses first 2
-// bytes of EncodedImage payload. Then it uses 3rd byte for frame sub id, that
-// is required to distinguish different spatial layers. The origin data from
-// these 3 bytes will be stored inside injector's internal storage and then will
-// be restored during extraction phase.
+// To inject frame id and discard flag into EncodedImage injector uses last 3rd
+// and 2nd bytes of EncodedImage payload. Then it uses last byte for frame
+// sub id, that is required to distinguish different spatial layers. The origin
+// data from these 3 bytes will be stored inside injector's internal storage and
+// then will be restored during extraction phase.
 //
 // This injector won't add any extra overhead into EncodedImage payload and
 // support frames with any size of payload. Also assumes that every EncodedImage