Skip to content

Commit f35fea3

Browse files
authored
Merge pull request #18629 from MathiasVP/fix-more-fps-in-buffer-overflow
C++: Fix more FPs in `cpp/overflow-buffer`
2 parents 0a8b76c + 02cf458 commit f35fea3

File tree

7 files changed

+303
-85
lines changed

7 files changed

+303
-85
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: feature
3+
---
4+
* A new predicate `getOffsetInClass` was added to the `Field` class, which computes the byte offset of a field relative to a given `Class`.

cpp/ql/lib/semmle/code/cpp/Field.qll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,30 @@
55
import semmle.code.cpp.Variable
66
import semmle.code.cpp.Enum
77

8+
private predicate hasAFieldWithOffset(Class c, Field f, int offset) {
9+
// Base case: `f` is a field in `c`.
10+
f = c.getAField() and
11+
offset = f.getByteOffset() and
12+
not f.getUnspecifiedType().(Class).hasDefinition()
13+
or
14+
// Otherwise, we find the struct that is a field of `c` which then has
15+
// the field `f` as a member.
16+
exists(Field g |
17+
g = c.getAField() and
18+
// Find the field with the largest offset that's less than or equal to
19+
// offset. That's the struct we need to search recursively.
20+
g =
21+
max(Field cand, int candOffset |
22+
cand = c.getAField() and
23+
candOffset = cand.getByteOffset() and
24+
offset >= candOffset
25+
|
26+
cand order by candOffset
27+
) and
28+
hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
29+
)
30+
}
31+
832
/**
933
* A C structure member or C++ non-static member variable. For example the
1034
* member variable `m` in the following code (but not `s`):
@@ -76,6 +100,27 @@ class Field extends MemberVariable {
76100
rank[result + 1](int index | cls.getCanonicalMember(index).(Field).isInitializable())
77101
)
78102
}
103+
104+
/**
105+
* Gets the offset (in bytes) of this field starting at `c`.
106+
*
107+
* For example, consider:
108+
* ```cpp
109+
* struct S1 {
110+
* int a;
111+
* void* b;
112+
* };
113+
*
114+
* struct S2 {
115+
* S1 s1;
116+
* char c;
117+
* };
118+
* ```
119+
* If `f` represents the field `s1` and `c` represents the class `S2` then
120+
* `f.getOffsetInClass(S2) = 0` holds. Likewise, if `f` represents the
121+
* field `a`, then `f.getOffsetInClass(c) = 0` holds.
122+
*/
123+
int getOffsetInClass(Class c) { hasAFieldWithOffset(c, this, result) }
79124
}
80125

81126
/**

cpp/ql/lib/semmle/code/cpp/commons/Buffer.qll

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,74 @@ predicate memberMayBeVarSize(Class c, MemberVariable v) {
2424
exists(ArrayType t | t = v.getUnspecifiedType() | not t.getArraySize() > 1)
2525
}
2626

27+
/**
28+
* Given a chain of accesses of the form `x.f1.f2...fn` this
29+
* predicate gives the type of `x`. Note that `x` may be an implicit
30+
* `this` expression.
31+
*/
32+
private Class getRootType(FieldAccess fa) {
33+
// If the object is accessed inside a member function then the root will
34+
// be a(n implicit) `this`. And the root type will be the type of `this`.
35+
exists(VariableAccess root |
36+
root = fa.getQualifier*() and
37+
result =
38+
root.getQualifier()
39+
.(ThisExpr)
40+
.getUnspecifiedType()
41+
.(PointerType)
42+
.getBaseType()
43+
.getUnspecifiedType()
44+
)
45+
or
46+
// Otherwise, if this is not inside a member function there will not be
47+
// a(n implicit) `this`. And the root type is the type of the outermost
48+
// access.
49+
exists(VariableAccess root |
50+
root = fa.getQualifier+() and
51+
not exists(root.getQualifier()) and
52+
result = root.getUnspecifiedType()
53+
)
54+
}
55+
56+
/**
57+
* Gets the size of the buffer access at `va`.
58+
*/
59+
private int getSize(VariableAccess va) {
60+
exists(Variable v | va.getTarget() = v |
61+
// If `v` is not a field then the size of the buffer is just
62+
// the size of the type of `v`.
63+
exists(Type t |
64+
t = v.getUnspecifiedType() and
65+
not v instanceof Field and
66+
not t instanceof ReferenceType and
67+
result = t.getSize()
68+
)
69+
or
70+
exists(Class c |
71+
// Otherwise, we find the "outermost" object and compute the size
72+
// as the difference between the size of the type of the "outermost
73+
// object" and the offset of the field relative to that type.
74+
// For example, consider the following structs:
75+
// ```
76+
// struct S {
77+
// uint32_t x;
78+
// uint32_t y;
79+
// };
80+
// struct S2 {
81+
// S s;
82+
// uint32_t z;
83+
// };
84+
// ```
85+
// Given an object `S2 s2` the size of the buffer `&s2.s.y`
86+
// is the size of the base object type (i.e., `S2`) minutes the offset
87+
// of `y` relative to the type `S2` (i.e., `4`). So the size of the
88+
// buffer is `12 - 4 = 8`.
89+
c = getRootType(va) and
90+
result = c.getSize() - v.(Field).getOffsetInClass(c)
91+
)
92+
)
93+
}
94+
2795
/**
2896
* Holds if `bufferExpr` is an allocation-like expression.
2997
*
@@ -54,37 +122,11 @@ private int isSource(Expr bufferExpr, Element why) {
54122
result = bufferExpr.(AllocationExpr).getSizeBytes() and
55123
why = bufferExpr
56124
or
57-
exists(Type bufferType, Variable v |
125+
exists(Variable v |
58126
v = why and
59127
// buffer is the address of a variable
60128
why = bufferExpr.(AddressOfExpr).getAddressable() and
61-
bufferType = v.getUnspecifiedType() and
62-
not bufferType instanceof ReferenceType and
63-
not any(Union u).getAMemberVariable() = why
64-
|
65-
not v instanceof Field and
66-
result = bufferType.getSize()
67-
or
68-
// If it's an address of a field (i.e., a non-static member variable)
69-
// then it's okay to use that address to access the other member variables.
70-
// For example, this is okay:
71-
// ```
72-
// struct S { uint8_t a, b, c; };
73-
// S s;
74-
// memset(&s.a, 0, sizeof(S) - offsetof(S, a));
75-
exists(Field f |
76-
v = f and
77-
result = f.getDeclaringType().getSize() - f.getByteOffset()
78-
)
79-
)
80-
or
81-
exists(Union bufferType |
82-
// buffer is the address of a union member; in this case, we
83-
// take the size of the union itself rather the union member, since
84-
// it's usually OK to access that amount (e.g. clearing with memset).
85-
why = bufferExpr.(AddressOfExpr).getAddressable() and
86-
bufferType.getAMemberVariable() = why and
87-
result = bufferType.getSize()
129+
result = getSize(bufferExpr.(AddressOfExpr).getOperand())
88130
)
89131
}
90132

cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.ql

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,48 +14,6 @@ import cpp
1414
import semmle.code.cpp.dataflow.new.DataFlow
1515
import Flow::PathGraph
1616

17-
/**
18-
* Holds if `f` is a field located at byte offset `offset` in `c`.
19-
*
20-
* Note that predicate is recursive, so that given the following:
21-
* ```cpp
22-
* struct S1 {
23-
* int a;
24-
* void* b;
25-
* };
26-
*
27-
* struct S2 {
28-
* S1 s1;
29-
* char c;
30-
* };
31-
* ```
32-
* both `hasAFieldWithOffset(S2, s1, 0)` and `hasAFieldWithOffset(S2, a, 0)`
33-
* holds.
34-
*/
35-
predicate hasAFieldWithOffset(Class c, Field f, int offset) {
36-
// Base case: `f` is a field in `c`.
37-
f = c.getAField() and
38-
offset = f.getByteOffset() and
39-
not f.getUnspecifiedType().(Class).hasDefinition()
40-
or
41-
// Otherwise, we find the struct that is a field of `c` which then has
42-
// the field `f` as a member.
43-
exists(Field g |
44-
g = c.getAField() and
45-
// Find the field with the largest offset that's less than or equal to
46-
// offset. That's the struct we need to search recursively.
47-
g =
48-
max(Field cand, int candOffset |
49-
cand = c.getAField() and
50-
candOffset = cand.getByteOffset() and
51-
offset >= candOffset
52-
|
53-
cand order by candOffset
54-
) and
55-
hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
56-
)
57-
}
58-
5917
/** Holds if `f` is the last field of its declaring class. */
6018
predicate lastField(Field f) {
6119
exists(Class c | c = f.getDeclaringType() |
@@ -75,7 +33,7 @@ predicate lastField(Field f) {
7533
bindingset[f1, offset, c2]
7634
pragma[inline_late]
7735
predicate hasCompatibleFieldAtOffset(Field f1, int offset, Class c2) {
78-
exists(Field f2 | hasAFieldWithOffset(c2, f2, offset) |
36+
exists(Field f2 | offset = f2.getOffsetInClass(c2) |
7937
// Let's not deal with bit-fields for now.
8038
f2 instanceof BitField
8139
or
@@ -100,15 +58,15 @@ predicate prefix(Class c1, Class c2) {
10058
exists(Field f1, int offset |
10159
// Let's not deal with bit-fields for now.
10260
not f1 instanceof BitField and
103-
hasAFieldWithOffset(c1, f1, offset)
61+
offset = f1.getOffsetInClass(c1)
10462
|
10563
hasCompatibleFieldAtOffset(f1, offset, c2)
10664
)
10765
else
10866
forall(Field f1, int offset |
10967
// Let's not deal with bit-fields for now.
11068
not f1 instanceof BitField and
111-
hasAFieldWithOffset(c1, f1, offset)
69+
offset = f1.getOffsetInClass(c1)
11270
|
11371
hasCompatibleFieldAtOffset(f1, offset, c2)
11472
)

cpp/ql/test/query-tests/Security/CWE/CWE-119/semmle/tests/OverflowBuffer.expected

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,33 @@
5353
| tests.cpp:712:3:712:8 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
5454
| tests.cpp:716:3:716:8 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 16 bytes. | tests.cpp:692:16:692:16 | b | destination buffer |
5555
| tests.cpp:727:2:727:7 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
56+
| tests.cpp:753:5:753:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
57+
| tests.cpp:756:5:756:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
58+
| tests.cpp:760:5:760:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
59+
| tests.cpp:761:5:761:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
60+
| tests.cpp:763:5:763:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
61+
| tests.cpp:764:5:764:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
62+
| tests.cpp:774:5:774:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
63+
| tests.cpp:777:5:777:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
64+
| tests.cpp:795:5:795:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:790:16:790:16 | b | destination buffer |
65+
| tests.cpp:822:5:822:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
66+
| tests.cpp:825:5:825:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
67+
| tests.cpp:827:5:827:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
68+
| tests.cpp:830:5:830:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
69+
| tests.cpp:831:5:831:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
70+
| tests.cpp:833:5:833:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
71+
| tests.cpp:835:5:835:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
72+
| tests.cpp:846:5:846:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
73+
| tests.cpp:847:5:847:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
74+
| tests.cpp:848:5:848:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
75+
| tests.cpp:849:5:849:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
76+
| tests.cpp:851:5:851:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
77+
| tests.cpp:862:5:862:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
78+
| tests.cpp:863:5:863:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
79+
| tests.cpp:864:5:864:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
80+
| tests.cpp:865:5:865:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
81+
| tests.cpp:866:5:866:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
82+
| tests.cpp:867:5:867:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
5683
| tests_restrict.c:12:2:12:7 | call to memcpy | This 'memcpy' operation accesses 2 bytes but the $@ is only 1 byte. | tests_restrict.c:7:6:7:13 | smallbuf | source buffer |
5784
| unions.cpp:26:2:26:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:21:10:21:11 | mu | destination buffer |
5885
| unions.cpp:30:2:30:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:15:7:15:11 | small | destination buffer |

cpp/ql/test/query-tests/Security/CWE/CWE-119/semmle/tests/UnboundedWrite.expected

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ edges
2727
| main.cpp:9:29:9:32 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance | |
2828
| main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | **argv | provenance | |
2929
| main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | *argv | provenance | |
30-
| main.cpp:10:20:10:23 | **argv | tests.cpp:730:32:730:35 | **argv | provenance | |
31-
| main.cpp:10:20:10:23 | *argv | tests.cpp:730:32:730:35 | *argv | provenance | |
30+
| main.cpp:10:20:10:23 | **argv | tests.cpp:872:32:872:35 | **argv | provenance | |
31+
| main.cpp:10:20:10:23 | *argv | tests.cpp:872:32:872:35 | *argv | provenance | |
3232
| overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | **argv | provenance | |
3333
| overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | *argv | provenance | |
3434
| test_buffer_overrun.cpp:32:46:32:49 | **argv | test_buffer_overrun.cpp:32:46:32:49 | **argv | provenance | |
@@ -41,12 +41,12 @@ edges
4141
| tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:14:628:19 | *home | provenance | |
4242
| tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:16:628:19 | *home | provenance | |
4343
| tests.cpp:628:16:628:19 | *home | tests.cpp:628:14:628:19 | *home | provenance | |
44-
| tests.cpp:730:32:730:35 | **argv | tests.cpp:755:9:755:15 | *access to array | provenance | |
45-
| tests.cpp:730:32:730:35 | **argv | tests.cpp:756:9:756:15 | *access to array | provenance | |
46-
| tests.cpp:730:32:730:35 | *argv | tests.cpp:755:9:755:15 | *access to array | provenance | |
47-
| tests.cpp:730:32:730:35 | *argv | tests.cpp:756:9:756:15 | *access to array | provenance | |
48-
| tests.cpp:755:9:755:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance | |
49-
| tests.cpp:756:9:756:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance | |
44+
| tests.cpp:872:32:872:35 | **argv | tests.cpp:897:9:897:15 | *access to array | provenance | |
45+
| tests.cpp:872:32:872:35 | **argv | tests.cpp:898:9:898:15 | *access to array | provenance | |
46+
| tests.cpp:872:32:872:35 | *argv | tests.cpp:897:9:897:15 | *access to array | provenance | |
47+
| tests.cpp:872:32:872:35 | *argv | tests.cpp:898:9:898:15 | *access to array | provenance | |
48+
| tests.cpp:897:9:897:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance | |
49+
| tests.cpp:898:9:898:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance | |
5050
| tests_restrict.c:15:41:15:44 | **argv | tests_restrict.c:15:41:15:44 | **argv | provenance | |
5151
| tests_restrict.c:15:41:15:44 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance | |
5252
nodes
@@ -80,10 +80,10 @@ nodes
8080
| tests.cpp:628:14:628:14 | *s [*home] | semmle.label | *s [*home] |
8181
| tests.cpp:628:14:628:19 | *home | semmle.label | *home |
8282
| tests.cpp:628:16:628:19 | *home | semmle.label | *home |
83-
| tests.cpp:730:32:730:35 | **argv | semmle.label | **argv |
84-
| tests.cpp:730:32:730:35 | *argv | semmle.label | *argv |
85-
| tests.cpp:755:9:755:15 | *access to array | semmle.label | *access to array |
86-
| tests.cpp:756:9:756:15 | *access to array | semmle.label | *access to array |
83+
| tests.cpp:872:32:872:35 | **argv | semmle.label | **argv |
84+
| tests.cpp:872:32:872:35 | *argv | semmle.label | *argv |
85+
| tests.cpp:897:9:897:15 | *access to array | semmle.label | *access to array |
86+
| tests.cpp:898:9:898:15 | *access to array | semmle.label | *access to array |
8787
| tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
8888
| tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
8989
| tests_restrict.c:15:41:15:44 | *argv | semmle.label | *argv |

0 commit comments

Comments
 (0)