github · MathiasVP · Jan 30, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
@@ -0,0 +1,4 @@
+---
+category: feature
+---
+* A new predicate `getOffsetInClass` was added to the `Field` class, which computes the byte offset of a field relative to a given `Class`.
@@ -5,6 +5,30 @@
 import semmle.code.cpp.Variable
 import semmle.code.cpp.Enum
 
+private predicate hasAFieldWithOffset(Class c, Field f, int offset) {
+  // Base case: `f` is a field in `c`.
+  f = c.getAField() and
+  offset = f.getByteOffset() and
+  not f.getUnspecifiedType().(Class).hasDefinition()
+  or
+  // Otherwise, we find the struct that is a field of `c` which then has
+  // the field `f` as a member.
+  exists(Field g |
+    g = c.getAField() and
+    // Find the field with the largest offset that's less than or equal to
+    // offset. That's the struct we need to search recursively.
+    g =
+      max(Field cand, int candOffset |
+        cand = c.getAField() and
+        candOffset = cand.getByteOffset() and
+        offset >= candOffset
+      |
+        cand order by candOffset
+      ) and
+    hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
+  )
+}
+
 /**
  * A C structure member or C++ non-static member variable. For example the
  * member variable `m` in the following code (but not `s`):
@@ -76,6 +100,27 @@ class Field extends MemberVariable {
         rank[result + 1](int index | cls.getCanonicalMember(index).(Field).isInitializable())
     )
   }
+
+  /**
+   * Gets the offset (in bytes) of this field starting at `c`.
+   *
+   * For example, consider:
+   * ```cpp
+   * struct S1 {
+   *   int a;
+   *   void* b;
+   * };
+   *
+   * struct S2 {
+   *   S1 s1;
+   *   char c;
+   * };
+   * ```
+   * If `f` represents the field `s1` and `c` represents the class `S2` then
+   * `f.getOffsetInClass(S2) = 0` holds. Likewise, if `f` represents the
+   * field `a`, then `f.getOffsetInClass(c) = 0` holds.
+   */
+  int getOffsetInClass(Class c) { hasAFieldWithOffset(c, this, result) }
 }
 
 /**

@@ -24,6 +24,74 @@ predicate memberMayBeVarSize(Class c, MemberVariable v) {
   exists(ArrayType t | t = v.getUnspecifiedType() | not t.getArraySize() > 1)
 }
 
+/**
+ * Given a chain of accesses of the form `x.f1.f2...fn` this
+ * predicate gives the type of `x`. Note that `x` may be an implicit
+ * `this` expression.
+ */
+private Class getRootType(FieldAccess fa) {
+  // If the object is accessed inside a member function then the root will
+  // be a(n implicit) `this`. And the root type will be the type of `this`.
+  exists(VariableAccess root |
+    root = fa.getQualifier*() and
+    result =
+      root.getQualifier()
+          .(ThisExpr)
+          .getUnspecifiedType()
+          .(PointerType)
+          .getBaseType()
+          .getUnspecifiedType()
+  )
+  or
+  // Otherwise, if this is not inside a member function there will not be
+  // a(n implicit) `this`. And the root type is the type of the outermost
+  // access.
+  exists(VariableAccess root |
+    root = fa.getQualifier+() and
+    not exists(root.getQualifier()) and
+    result = root.getUnspecifiedType()
+  )
+}
+
+/**
+ * Gets the size of the buffer access at `va`.
+ */
+private int getSize(VariableAccess va) {
+  exists(Variable v | va.getTarget() = v |
+    // If `v` is not a field then the size of the buffer is just
+    // the size of the type of `v`.
+    exists(Type t |
+      t = v.getUnspecifiedType() and
+      not v instanceof Field and
+      not t instanceof ReferenceType and
+      result = t.getSize()
+    )
+    or
+    exists(Class c |
+      // Otherwise, we find the "outermost" object and compute the size
+      // as the difference between the size of the type of the "outermost
+      // object" and the offset of the field relative to that type.
+      // For example, consider the following structs:
+      // ```
+      // struct S {
+      //   uint32_t x;
+      //   uint32_t y;
+      // };
+      // struct S2 {
+      //   S s;
+      //   uint32_t z;
+      // };
+      // ```
+      // Given an object `S2 s2` the size of the buffer `&s2.s.y`
+      // is the size of the base object type (i.e., `S2`) minutes the offset
+      // of `y` relative to the type `S2` (i.e., `4`). So the size of the
+      // buffer is `12 - 4 = 8`.
+      c = getRootType(va) and
+      result = c.getSize() - v.(Field).getOffsetInClass(c)
+    )
+  )
+}
+
 /**
  * Holds if `bufferExpr` is an allocation-like expression.
  *
@@ -54,37 +122,11 @@ private int isSource(Expr bufferExpr, Element why) {
   result = bufferExpr.(AllocationExpr).getSizeBytes() and
   why = bufferExpr
   or
-  exists(Type bufferType, Variable v |
+  exists(Variable v |
     v = why and
     // buffer is the address of a variable
     why = bufferExpr.(AddressOfExpr).getAddressable() and
-    bufferType = v.getUnspecifiedType() and
-    not bufferType instanceof ReferenceType and
-    not any(Union u).getAMemberVariable() = why
-  |
-    not v instanceof Field and
-    result = bufferType.getSize()
-    or
-    // If it's an address of a field (i.e., a non-static member variable)
-    // then it's okay to use that address to access the other member variables.
-    // For example, this is okay:
-    // ```
-    // struct S { uint8_t a, b, c; };
-    // S s;
-    // memset(&s.a, 0, sizeof(S) - offsetof(S, a));
-    exists(Field f |
-      v = f and
-      result = f.getDeclaringType().getSize() - f.getByteOffset()
-    )
-  )
-  or
-  exists(Union bufferType |
-    // buffer is the address of a union member; in this case, we
-    // take the size of the union itself rather the union member, since
-    // it's usually OK to access that amount (e.g. clearing with memset).
-    why = bufferExpr.(AddressOfExpr).getAddressable() and
-    bufferType.getAMemberVariable() = why and
-    result = bufferType.getSize()
+    result = getSize(bufferExpr.(AddressOfExpr).getOperand())
   )
 }
 

@@ -14,48 +14,6 @@ import cpp
 import semmle.code.cpp.dataflow.new.DataFlow
 import Flow::PathGraph
 
-/**
- * Holds if `f` is a field located at byte offset `offset` in `c`.
- *
- * Note that predicate is recursive, so that given the following:
- * ```cpp
- * struct S1 {
- *   int a;
- *   void* b;
- * };
- *
- * struct S2 {
- *   S1 s1;
- *   char c;
- * };
- * ```
- * both `hasAFieldWithOffset(S2, s1, 0)` and `hasAFieldWithOffset(S2, a, 0)`
- * holds.
- */
-predicate hasAFieldWithOffset(Class c, Field f, int offset) {
-  // Base case: `f` is a field in `c`.
-  f = c.getAField() and
-  offset = f.getByteOffset() and
-  not f.getUnspecifiedType().(Class).hasDefinition()
-  or
-  // Otherwise, we find the struct that is a field of `c` which then has
-  // the field `f` as a member.
-  exists(Field g |
-    g = c.getAField() and
-    // Find the field with the largest offset that's less than or equal to
-    // offset. That's the struct we need to search recursively.
-    g =
-      max(Field cand, int candOffset |
-        cand = c.getAField() and
-        candOffset = cand.getByteOffset() and
-        offset >= candOffset
-      |
-        cand order by candOffset
-      ) and
-    hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
-  )
-}
-
 /** Holds if `f` is the last field of its declaring class. */
 predicate lastField(Field f) {
   exists(Class c | c = f.getDeclaringType() |
@@ -75,7 +33,7 @@ predicate lastField(Field f) {
 bindingset[f1, offset, c2]
 pragma[inline_late]
 predicate hasCompatibleFieldAtOffset(Field f1, int offset, Class c2) {
-  exists(Field f2 | hasAFieldWithOffset(c2, f2, offset) |
+  exists(Field f2 | offset = f2.getOffsetInClass(c2) |
     // Let's not deal with bit-fields for now.
     f2 instanceof BitField
     or
@@ -100,15 +58,15 @@ predicate prefix(Class c1, Class c2) {
     exists(Field f1, int offset |
       // Let's not deal with bit-fields for now.
       not f1 instanceof BitField and
-      hasAFieldWithOffset(c1, f1, offset)
+      offset = f1.getOffsetInClass(c1)
     |
       hasCompatibleFieldAtOffset(f1, offset, c2)
     )
   else
     forall(Field f1, int offset |
       // Let's not deal with bit-fields for now.
       not f1 instanceof BitField and
-      hasAFieldWithOffset(c1, f1, offset)
+      offset = f1.getOffsetInClass(c1)
     |
       hasCompatibleFieldAtOffset(f1, offset, c2)
     )

@@ -53,6 +53,33 @@
 | tests.cpp:712:3:712:8 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
 | tests.cpp:716:3:716:8 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 16 bytes. | tests.cpp:692:16:692:16 | b | destination buffer |
 | tests.cpp:727:2:727:7 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
+| tests.cpp:753:5:753:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
+| tests.cpp:756:5:756:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
+| tests.cpp:760:5:760:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
+| tests.cpp:761:5:761:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
+| tests.cpp:763:5:763:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
+| tests.cpp:764:5:764:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
+| tests.cpp:774:5:774:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
+| tests.cpp:777:5:777:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
+| tests.cpp:795:5:795:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:790:16:790:16 | b | destination buffer |
+| tests.cpp:822:5:822:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
+| tests.cpp:825:5:825:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
+| tests.cpp:827:5:827:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
+| tests.cpp:830:5:830:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
+| tests.cpp:831:5:831:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
+| tests.cpp:833:5:833:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
+| tests.cpp:835:5:835:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
+| tests.cpp:846:5:846:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
+| tests.cpp:847:5:847:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
+| tests.cpp:848:5:848:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
+| tests.cpp:849:5:849:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
+| tests.cpp:851:5:851:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
+| tests.cpp:862:5:862:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
+| tests.cpp:863:5:863:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
+| tests.cpp:864:5:864:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
+| tests.cpp:865:5:865:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
+| tests.cpp:866:5:866:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
+| tests.cpp:867:5:867:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
 | tests_restrict.c:12:2:12:7 | call to memcpy | This 'memcpy' operation accesses 2 bytes but the $@ is only 1 byte. | tests_restrict.c:7:6:7:13 | smallbuf | source buffer |
 | unions.cpp:26:2:26:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:21:10:21:11 | mu | destination buffer |
 | unions.cpp:30:2:30:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:15:7:15:11 | small | destination buffer |

@@ -27,8 +27,8 @@ edges
 | main.cpp:9:29:9:32 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance |  |
 | main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | **argv | provenance |  |
 | main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | *argv | provenance |  |
-| main.cpp:10:20:10:23 | **argv | tests.cpp:730:32:730:35 | **argv | provenance |  |
-| main.cpp:10:20:10:23 | *argv | tests.cpp:730:32:730:35 | *argv | provenance |  |
+| main.cpp:10:20:10:23 | **argv | tests.cpp:872:32:872:35 | **argv | provenance |  |
+| main.cpp:10:20:10:23 | *argv | tests.cpp:872:32:872:35 | *argv | provenance |  |
 | overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | **argv | provenance |  |
 | overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | *argv | provenance |  |
 | test_buffer_overrun.cpp:32:46:32:49 | **argv | test_buffer_overrun.cpp:32:46:32:49 | **argv | provenance |  |
@@ -41,12 +41,12 @@ edges
 | tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:14:628:19 | *home | provenance |  |
 | tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:16:628:19 | *home | provenance |  |
 | tests.cpp:628:16:628:19 | *home | tests.cpp:628:14:628:19 | *home | provenance |  |
-| tests.cpp:730:32:730:35 | **argv | tests.cpp:755:9:755:15 | *access to array | provenance |  |
-| tests.cpp:730:32:730:35 | **argv | tests.cpp:756:9:756:15 | *access to array | provenance |  |
-| tests.cpp:730:32:730:35 | *argv | tests.cpp:755:9:755:15 | *access to array | provenance |  |
-| tests.cpp:730:32:730:35 | *argv | tests.cpp:756:9:756:15 | *access to array | provenance |  |
-| tests.cpp:755:9:755:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance |  |
-| tests.cpp:756:9:756:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance |  |
+| tests.cpp:872:32:872:35 | **argv | tests.cpp:897:9:897:15 | *access to array | provenance |  |
+| tests.cpp:872:32:872:35 | **argv | tests.cpp:898:9:898:15 | *access to array | provenance |  |
+| tests.cpp:872:32:872:35 | *argv | tests.cpp:897:9:897:15 | *access to array | provenance |  |
+| tests.cpp:872:32:872:35 | *argv | tests.cpp:898:9:898:15 | *access to array | provenance |  |
+| tests.cpp:897:9:897:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance |  |
+| tests.cpp:898:9:898:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance |  |
 | tests_restrict.c:15:41:15:44 | **argv | tests_restrict.c:15:41:15:44 | **argv | provenance |  |
 | tests_restrict.c:15:41:15:44 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance |  |
 nodes
@@ -80,10 +80,10 @@ nodes
 | tests.cpp:628:14:628:14 | *s [*home] | semmle.label | *s [*home] |
 | tests.cpp:628:14:628:19 | *home | semmle.label | *home |
 | tests.cpp:628:16:628:19 | *home | semmle.label | *home |
-| tests.cpp:730:32:730:35 | **argv | semmle.label | **argv |
-| tests.cpp:730:32:730:35 | *argv | semmle.label | *argv |
-| tests.cpp:755:9:755:15 | *access to array | semmle.label | *access to array |
-| tests.cpp:756:9:756:15 | *access to array | semmle.label | *access to array |
+| tests.cpp:872:32:872:35 | **argv | semmle.label | **argv |
+| tests.cpp:872:32:872:35 | *argv | semmle.label | *argv |
+| tests.cpp:897:9:897:15 | *access to array | semmle.label | *access to array |
+| tests.cpp:898:9:898:15 | *access to array | semmle.label | *access to array |
 | tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
 | tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
 | tests_restrict.c:15:41:15:44 | *argv | semmle.label | *argv |