From be9b7b6881e5b0e0b54e7d2fb79c5af5f68c015b Mon Sep 17 00:00:00 2001
From: sprang <sprang@webrtc.org>
Date: Fri, 4 Sep 2015 01:06:57 -0700
Subject: [PATCH] Make sure ByteReader and ByteWriter classes (and their
 specializations) don't perform operations that have implementation-specific
 or undefined behavior.

Pitfalls:

* Left shift of signed integer has undefined behavior
* Right-shift of signed integer has platform-specific behavior is value is negative
* Cast from unsigned to signed has undefined behavior if value is negative

BUG=webrtc:4824

Review URL: https://codereview.webrtc.org/1226993003

Cr-Commit-Position: refs/heads/master@{#9854}
---
 webrtc/modules/rtp_rtcp/source/byte_io.h | 246 +++++++++++++++++++----
 1 file changed, 206 insertions(+), 40 deletions(-)
diff --git a/webrtc/modules/rtp_rtcp/source/byte_io.h b/webrtc/modules/rtp_rtcp/source/byte_io.h
index 2617806dd9..d8903b8483 100644
--- a/webrtc/modules/rtp_rtcp/source/byte_io.h
+++ b/webrtc/modules/rtp_rtcp/source/byte_io.h
@@ -42,40 +42,110 @@
 
 namespace webrtc {
 
+// According to ISO C standard ISO/IEC 9899, section 6.2.6.2 (2), the three
+// representations of signed integers allowed are two's complement, one's
+// complement and sign/magnitude. We can detect which is used by looking at
+// the two last bits of -1, which will be 11 in two's complement, 10 in one's
+// complement and 01 in sign/magnitude.
+// TODO(sprang): In the unlikely event that we actually need to support a
+// platform that doesn't use two's complement, implement conversion to/from
+// wire format.
+
+namespace {
+inline void AssertTwosComplement() {
+  // Assume the if any one signed integer type is two's complement, then all
+  // other will be too.
+  static_assert(
+      (-1 & 0x03) == 0x03,
+      "Only two's complement representation of signed integers supported.");
+}
+// Plain const char* won't work for static_assert, use #define instead.
+#define kSizeErrorMsg "Byte size must be less than or equal to data type size."
+}
+
+// Utility class for getting the unsigned equivalent of a signed type.
+template <typename T>
+struct UnsignedOf;
+
 // Class for reading integers from a sequence of bytes.
-// T = type of integer, B = bytes to read, is_signed = true if signed integer
-// If is_signed is true and B < sizeof(T), sign extension might be needed
-template<typename T, unsigned int B = sizeof(T),
-    bool is_signed = std::numeric_limits<T>::is_signed>
-class ByteReader {
+// T = type of integer, B = bytes to read, is_signed = true if signed integer.
+// If is_signed is true and B < sizeof(T), sign extension might be needed.
+template <typename T,
+          unsigned int B = sizeof(T),
+          bool is_signed = std::numeric_limits<T>::is_signed>
+class ByteReader;
+
+// Specialization of ByteReader for unsigned types.
+template <typename T, unsigned int B>
+class ByteReader<T, B, false> {
  public:
   static T ReadBigEndian(const uint8_t* data) {
-    if (is_signed && B < sizeof(T)) {
-      return SignExtend(InternalReadBigEndian(data));
-    }
+    static_assert(B <= sizeof(T), kSizeErrorMsg);
     return InternalReadBigEndian(data);
   }
 
   static T ReadLittleEndian(const uint8_t* data) {
-    if (is_signed && B < sizeof(T)) {
-      return SignExtend(InternalReadLittleEndian(data));
-    }
+    static_assert(B <= sizeof(T), kSizeErrorMsg);
     return InternalReadLittleEndian(data);
   }
 
  private:
   static T InternalReadBigEndian(const uint8_t* data) {
     T val(0);
-    for (unsigned int i = 0; i < B; ++i) {
+    for (unsigned int i = 0; i < B; ++i)
       val |= static_cast<T>(data[i]) << ((B - 1 - i) * 8);
-    }
     return val;
   }
 
   static T InternalReadLittleEndian(const uint8_t* data) {
     T val(0);
-    for (unsigned int i = 0; i < B; ++i) {
+    for (unsigned int i = 0; i < B; ++i)
       val |= static_cast<T>(data[i]) << (i * 8);
+    return val;
+  }
+};
+
+// Specialization of ByteReader for signed types.
+template <typename T, unsigned int B>
+class ByteReader<T, B, true> {
+ public:
+  typedef typename UnsignedOf<T>::Type U;
+
+  static T ReadBigEndian(const uint8_t* data) {
+    U unsigned_val = ByteReader<T, B, false>::ReadBigEndian(data);
+    if (B < sizeof(T))
+      unsigned_val = SignExtend(unsigned_val);
+    return ReinterpretAsSigned(unsigned_val);
+  }
+
+  static T ReadLittleEndian(const uint8_t* data) {
+    U unsigned_val = ByteReader<T, B, false>::ReadLittleEndian(data);
+    if (B < sizeof(T))
+      unsigned_val = SignExtend(unsigned_val);
+    return ReinterpretAsSigned(unsigned_val);
+  }
+
+ private:
+  // As a hack to avoid implementation-specific or undefined behavior when
+  // bit-shifting or casting signed integers, read as a signed equivalent
+  // instead and convert to signed. This is safe since we have asserted that
+  // two's complement for is used.
+  static T ReinterpretAsSigned(U unsigned_val) {
+    // An unsigned value with only the highest order bit set (ex 0x80).
+    const U kUnsignedHighestBitMask =
+        static_cast<U>(1) << ((sizeof(U) * 8) - 1);
+    // A signed value with only the highest bit set. Since this is two's
+    // complement form, we can use the min value from std::numeric_limits.
+    const T kSignedHighestBitMask = std::numeric_limits<T>::min();
+
+    T val;
+    if ((unsigned_val & kUnsignedHighestBitMask) != 0) {
+      // Casting is only safe when unsigned value can be represented in the
+      // signed target type, so mask out highest bit and mask it back manually.
+      val = static_cast<T>(unsigned_val & ~kUnsignedHighestBitMask);
+      val |= kSignedHighestBitMask;
+    } else {
+      val = static_cast<T>(unsigned_val);
     }
     return val;
   }
@@ -85,16 +155,16 @@ class ByteReader {
   // extend the remaining byte(s) with ones so that the correct negative
   // number is retained.
   // Ex: 0x810A0B -> 0xFF810A0B, but 0x710A0B -> 0x00710A0B
-  static T SignExtend(const T val) {
-    uint8_t msb = static_cast<uint8_t>(val >> ((B - 1) * 8));
-    if (msb & 0x80) {
-      // Sign extension is -1 (all ones) shifted left B bytes.
-      // The "B % sizeof(T)"-part is there to avoid compiler warning for
-      // shifting the whole size of the data type.
-      T sign_extend = (sizeof(T) == B ? 0 :
-          (static_cast<T>(-1L) << ((B % sizeof(T)) * 8)));
-
-      return val | sign_extend;
+  static U SignExtend(const U val) {
+    const uint8_t kMsb = static_cast<uint8_t>(val >> ((B - 1) * 8));
+    if ((kMsb & 0x80) != 0) {
+      // Create a mask where all bits used by the B bytes are set to one,
+      // for instance 0x00FFFFFF for B = 3. Bit-wise invert that mask (to
+      // (0xFF000000 in the example above) and add it to the input value.
+      // The "B % sizeof(T)" is a workaround to undefined values warnings for
+      // B == sizeof(T), in which case this code won't be called anyway.
+      const U kUsedBitsMask = (1 << ((B % sizeof(T)) * 8)) - 1;
+      return ~kUsedBitsMask | val;
     }
     return val;
   }
@@ -102,71 +172,162 @@ class ByteReader {
 
 // Class for writing integers to a sequence of bytes
 // T = type of integer, B = bytes to write
-template<typename T, unsigned int B = sizeof(T)>
-class ByteWriter {
+template <typename T,
+          unsigned int B = sizeof(T),
+          bool is_signed = std::numeric_limits<T>::is_signed>
+class ByteWriter;
+
+// Specialization of ByteWriter for unsigned types.
+template <typename T, unsigned int B>
+class ByteWriter<T, B, false> {
  public:
   static void WriteBigEndian(uint8_t* data, T val) {
+    static_assert(B <= sizeof(T), kSizeErrorMsg);
     for (unsigned int i = 0; i < B; ++i) {
       data[i] = val >> ((B - 1 - i) * 8);
     }
   }
 
   static void WriteLittleEndian(uint8_t* data, T val) {
+    static_assert(B <= sizeof(T), kSizeErrorMsg);
     for (unsigned int i = 0; i < B; ++i) {
       data[i] = val >> (i * 8);
     }
   }
 };
 
+// Specialization of ByteWriter for signed types.
+template <typename T, unsigned int B>
+class ByteWriter<T, B, true> {
+ public:
+  typedef typename UnsignedOf<T>::Type U;
 
-// -------- Below follows specializations for B in { 2, 4, 8 } --------
+  static void WriteBigEndian(uint8_t* data, T val) {
+    ByteWriter<U, B, false>::WriteBigEndian(data, ReinterpretAsUnsigned(val));
+  }
 
+  static void WriteLittleEndian(uint8_t* data, T val) {
+    ByteWriter<U, B, false>::WriteLittleEndian(data,
+                                               ReinterpretAsUnsigned(val));
+  }
 
-// Specializations for two byte words
-template<typename T, bool is_signed>
-class ByteReader<T, 2, is_signed> {
+ private:
+  static U ReinterpretAsUnsigned(T val) {
+    // According to ISO C standard ISO/IEC 9899, section 6.3.1.3 (1, 2) a
+    // conversion from signed to unsigned keeps the value if the new type can
+    // represent it, and otherwise adds one more than the max value of T until
+    // the value is in range. For two's complement, this fortunately means
+    // that the bit-wise value will be intact. Thus, since we have asserted that
+    // two's complement form is actually used, a simple cast is sufficient.
+    return static_cast<U>(val);
+  }
+};
+
+// ----- Below follows specializations of UnsignedOf utility class -----
+
+template <>
+struct UnsignedOf<int8_t> {
+  typedef uint8_t Type;
+};
+template <>
+struct UnsignedOf<int16_t> {
+  typedef uint16_t Type;
+};
+template <>
+struct UnsignedOf<int32_t> {
+  typedef uint32_t Type;
+};
+template <>
+struct UnsignedOf<int64_t> {
+  typedef uint64_t Type;
+};
+
+// ----- Below follows specializations for unsigned, B in { 1, 2, 4, 8 } -----
+
+// TODO(sprang): Check if these actually help or if generic cases will be
+// unrolled to and optimized to similar performance.
+
+// Specializations for single bytes
+template <typename T>
+class ByteReader<T, 1, false> {
  public:
   static T ReadBigEndian(const uint8_t* data) {
+    static_assert(sizeof(T) == 1, kSizeErrorMsg);
+    return data[0];
+  }
+
+  static T ReadLittleEndian(const uint8_t* data) {
+    static_assert(sizeof(T) == 1, kSizeErrorMsg);
+    return data[0];
+  }
+};
+
+template <typename T>
+class ByteWriter<T, 1, false> {
+ public:
+  static void WriteBigEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) == 1, kSizeErrorMsg);
+    data[0] = val;
+  }
+
+  static void WriteLittleEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) == 1, kSizeErrorMsg);
+    data[0] = val;
+  }
+};
+
+// Specializations for two byte words
+template <typename T>
+class ByteReader<T, 2, false> {
+ public:
+  static T ReadBigEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 2, kSizeErrorMsg);
     return (data[0] << 8) | data[1];
   }
 
   static T ReadLittleEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 2, kSizeErrorMsg);
     return data[0] | (data[1] << 8);
   }
 };
 
-template<typename T>
-class ByteWriter<T, 2> {
+template <typename T>
+class ByteWriter<T, 2, false> {
  public:
   static void WriteBigEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 2, kSizeErrorMsg);
     data[0] = val >> 8;
     data[1] = val;
   }
 
   static void WriteLittleEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 2, kSizeErrorMsg);
     data[0] = val;
     data[1] = val >> 8;
   }
 };
 
 // Specializations for four byte words.
-template<typename T, bool is_signed>
-class ByteReader<T, 4, is_signed> {
+template <typename T>
+class ByteReader<T, 4, false> {
  public:
   static T ReadBigEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 4, kSizeErrorMsg);
     return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
   }
 
   static T ReadLittleEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 4, kSizeErrorMsg);
     return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
   }
 };
 
 // Specializations for four byte words.
-template<typename T>
-class ByteWriter<T, 4> {
+template <typename T>
+class ByteWriter<T, 4, false> {
  public:
   static void WriteBigEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 4, kSizeErrorMsg);
     data[0] = val >> 24;
     data[1] = val >> 16;
     data[2] = val >> 8;
@@ -174,6 +335,7 @@ class ByteWriter<T, 4> {
   }
 
   static void WriteLittleEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 4, kSizeErrorMsg);
     data[0] = val;
     data[1] = val >> 8;
     data[2] = val >> 16;
@@ -182,10 +344,11 @@ class ByteWriter<T, 4> {
 };
 
 // Specializations for eight byte words.
-template<typename T, bool is_signed>
-class ByteReader<T, 8, is_signed> {
+template <typename T>
+class ByteReader<T, 8, false> {
  public:
   static T ReadBigEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 8, kSizeErrorMsg);
     return
         (Get(data, 0) << 56) | (Get(data, 1) << 48) |
         (Get(data, 2) << 40) | (Get(data, 3) << 32) |
@@ -194,6 +357,7 @@ class ByteReader<T, 8, is_signed> {
   }
 
   static T ReadLittleEndian(const uint8_t* data) {
+    static_assert(sizeof(T) >= 8, kSizeErrorMsg);
     return
          Get(data, 0)        | (Get(data, 1) << 8)  |
         (Get(data, 2) << 16) | (Get(data, 3) << 24) |
@@ -207,10 +371,11 @@ class ByteReader<T, 8, is_signed> {
   }
 };
 
-template<typename T>
-class ByteWriter<T, 8> {
+template <typename T>
+class ByteWriter<T, 8, false> {
  public:
   static void WriteBigEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 8, kSizeErrorMsg);
     data[0] = val >> 56;
     data[1] = val >> 48;
     data[2] = val >> 40;
@@ -222,6 +387,7 @@ class ByteWriter<T, 8> {
   }
 
   static void WriteLittleEndian(uint8_t* data, T val) {
+    static_assert(sizeof(T) >= 8, kSizeErrorMsg);
     data[0] = val;
     data[1] = val >> 8;
     data[2] = val >> 16;