Skip to content

Commit 060d0b4

Browse files
committed
Python: Remove imprecise container steps
- remove `tupleStoreStep` and `dictStoreStep` from `containerStep` These are imprecise compared to the content being precise. - add implicit reads to recover taint at sinks - add implicit read steps for decoders to supplement the `AdditionalTaintStep` that now only covers when the full container is tainted.
1 parent c7e3682 commit 060d0b4

File tree

16 files changed

+180
-55
lines changed

16 files changed

+180
-55
lines changed

python/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ private module Input implements InputSig<Location, PythonDataFlow> {
3434
// parameter, but dataflow-consistency queries should _not_ complain about there not
3535
// being a post-update node for the synthetic `**kwargs` parameter.
3636
n instanceof SynthDictSplatParameterNode
37+
or
38+
Private::Conversions::readStep(n, _, _)
3739
}
3840

3941
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
928928
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
929929
or
930930
VariableCapture::readStep(nodeFrom, c, nodeTo)
931+
or
932+
Conversions::readStep(nodeFrom, c, nodeTo)
931933
}
932934

933935
/** Data flows from a sequence to a subscript of the sequence. */
@@ -983,6 +985,40 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
983985
nodeTo.accesses(nodeFrom, c.getAttribute())
984986
}
985987

988+
module Conversions {
989+
private import semmle.python.Concepts
990+
991+
predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
992+
exists(Decoding decoding |
993+
nodeFrom = decoding.getAnInput() and
994+
nodeTo = decoding.getOutput()
995+
) and
996+
(
997+
c instanceof TupleElementContent
998+
or
999+
c instanceof DictionaryElementContent
1000+
)
1001+
}
1002+
1003+
predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
1004+
exists(Encoding encoding |
1005+
nodeFrom = encoding.getAnInput() and
1006+
nodeTo = encoding.getOutput()
1007+
) and
1008+
(
1009+
c instanceof TupleElementContent
1010+
or
1011+
c instanceof DictionaryElementContent
1012+
)
1013+
}
1014+
1015+
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
1016+
decoderReadStep(nodeFrom, c, nodeTo)
1017+
or
1018+
encoderReadStep(nodeFrom, c, nodeTo)
1019+
}
1020+
}
1021+
9861022
/**
9871023
* Holds if values stored inside content `c` are cleared at node `n`. For example,
9881024
* any value stored inside `f` is cleared at the pre-update node associated with `x`

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,16 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
1616
* of `c` at sinks and inputs to additional taint steps.
1717
*/
1818
bindingset[node]
19-
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
19+
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
20+
// We allow implicit reads of precise content
21+
// imprecise content has already bubled up.
22+
exists(node) and
23+
(
24+
c instanceof DataFlow::TupleElementContent
25+
or
26+
c instanceof DataFlow::DictionaryElementContent
27+
)
28+
}
2029

2130
private module Cached {
2231
/**
@@ -178,10 +187,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
178187
or
179188
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
180189
or
181-
DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
182-
or
183-
DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
184-
or
185190
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
186191
// resulting list of the list-comprehension.
187192
//

python/ql/test/library-tests/dataflow/sensitive-data/test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,5 @@ def call_wrapper(func):
131131
print(password) # $ SensitiveUse=password
132132
_config = {"sleep_timer": 5, "mysql_password": password}
133133

134-
# since we have taint-step from store of `password`, we will consider any item in the
135-
# dictionary to be a password :(
136-
print(_config["sleep_timer"]) # $ SPURIOUS: SensitiveUse=password
134+
# since we have precise dictionary content, other items of the config are not tainted
135+
print(_config["sleep_timer"])

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def str_methods():
1717
ts.casefold(), # $ tainted
1818

1919
ts.format_map({}), # $ tainted
20-
"{unsafe}".format_map({"unsafe": ts}), # $ tainted
20+
"{unsafe}".format_map({"unsafe": ts}), # $ MISSING: tainted
2121
)
2222

2323

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ def test_construction():
2929

3030
ensure_tainted(
3131
list(tainted_list), # $ tainted
32-
list(tainted_tuple), # $ tainted
32+
list(tainted_tuple), # $ MISSING: tainted
3333
list(tainted_set), # $ tainted
34-
list(tainted_dict.values()), # $ tainted
35-
list(tainted_dict.items()), # $ tainted
34+
list(tainted_dict.values()), # $ MISSING: tainted
35+
list(tainted_dict.items()), # $ MISSING: tainted
3636

3737
tuple(tainted_list), # $ tainted
3838
set(tainted_list), # $ tainted

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def percent_fmt():
115115
ensure_tainted(
116116
tainted_fmt % (1, 2), # $ tainted
117117
"%s foo bar" % ts, # $ tainted
118-
"%s %s %s" % (1, 2, ts), # $ tainted
118+
"%s %s %s" % (1, 2, ts), # $ MISSING: tainted
119119
)
120120

121121

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_unpacking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def contrived_1():
5353

5454
(a, b, c), (d, e, f) = tainted_list, no_taint_list
5555
ensure_tainted(a, b, c) # $ tainted
56-
ensure_not_tainted(d, e, f) # $ SPURIOUS: tainted
56+
ensure_not_tainted(d, e, f)
5757

5858

5959
def contrived_2():

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@
7474
)
7575

7676
ensure_not_tainted(
77-
re.subn(pat, repl="safe", string=ts),
7877
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
7978
)
8079
ensure_tainted(
80+
re.subn(pat, repl="safe", string=ts), # $ tainted // implicit read at sink
8181
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
8282
)

python/ql/test/library-tests/frameworks/tornado/taint_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get(self, name = "World!", number="0", foo="foo"): # $ requestHandler route
6363
request.headers["header-name"], # $ tainted
6464
request.headers.get_list("header-name"), # $ tainted
6565
request.headers.get_all(), # $ tainted
66-
[(k, v) for (k, v) in request.headers.get_all()], # $ tainted
66+
[(k, v) for (k, v) in request.headers.get_all()], # $ MISSING: tainted
6767

6868
# Dict[str, http.cookies.Morsel]
6969
request.cookies, # $ tainted

0 commit comments

Comments
 (0)