Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python: Remove imprecise container steps #17493

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ private module Input implements InputSig<Location, PythonDataFlow> {
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
or
Private::Conversions::readStep(n, _, _)
}

predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
or
Conversions::readStep(nodeFrom, c, nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
Expand Down Expand Up @@ -989,6 +991,51 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}

module Conversions {
private import semmle.python.Concepts

predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
(
c instanceof TupleElementContent
or
c instanceof DictionaryElementContent
)
}

predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
) and
(
c instanceof TupleElementContent
or
c instanceof DictionaryElementContent
)
}

predicate formatReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
// % formatting
exists(BinaryExprNode fmt | fmt = nodeTo.asCfgNode() |
fmt.getOp() instanceof Mod and
fmt.getRight() = nodeFrom.asCfgNode()
) and
c instanceof TupleElementContent
}

predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
decoderReadStep(nodeFrom, c, nodeTo)
or
encoderReadStep(nodeFrom, c, nodeTo)
or
formatReadStep(nodeFrom, c, nodeTo)
}
}

/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,16 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
// We allow implicit reads of precise content
// imprecise content has already bubled up.
exists(node) and
(
c instanceof DataFlow::TupleElementContent
or
c instanceof DataFlow::DictionaryElementContent
)
}

private module Cached {
/**
Expand Down Expand Up @@ -176,10 +185,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
or
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
or
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
// resulting list of the list-comprehension.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,5 @@ def call_wrapper(func):
print(password) # $ SensitiveUse=password
_config = {"sleep_timer": 5, "mysql_password": password}

# since we have taint-step from store of `password`, we will consider any item in the
# dictionary to be a password :(
print(_config["sleep_timer"]) # $ SPURIOUS: SensitiveUse=password
# since we have precise dictionary content, other items of the config are not tainted
print(_config["sleep_timer"])
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def str_methods():
ts.casefold(), # $ tainted

ts.format_map({}), # $ tainted
"{unsafe}".format_map({"unsafe": ts}), # $ tainted
"{unsafe}".format_map({"unsafe": ts}), # $ MISSING: tainted
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def test_construction():

ensure_tainted(
list(tainted_list), # $ tainted
list(tainted_tuple), # $ tainted
list(tainted_tuple), # $ MISSING: tainted
list(tainted_set), # $ tainted
list(tainted_dict.values()), # $ tainted
list(tainted_dict.items()), # $ tainted
list(tainted_dict.values()), # $ MISSING: tainted
list(tainted_dict.items()), # $ MISSING: tainted

tuple(tainted_list), # $ tainted
set(tainted_list), # $ tainted
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def contrived_1():

(a, b, c), (d, e, f) = tainted_list, no_taint_list
ensure_tainted(a, b, c) # $ tainted
ensure_not_tainted(d, e, f) # $ SPURIOUS: tainted
ensure_not_tainted(d, e, f)


def contrived_2():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@
)

ensure_not_tainted(
re.subn(pat, repl="safe", string=ts),
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
)
ensure_tainted(
re.subn(pat, repl="safe", string=ts), # $ tainted // implicit read at sink
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
)
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get(self, name = "World!", number="0", foo="foo"): # $ requestHandler route
request.headers["header-name"], # $ tainted
request.headers.get_list("header-name"), # $ tainted
request.headers.get_all(), # $ tainted
[(k, v) for (k, v) in request.headers.get_all()], # $ tainted
[(k, v) for (k, v) in request.headers.get_all()], # $ MISSING: tainted

# Dict[str, http.cookies.Morsel]
request.cookies, # $ tainted
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
| hmac.new [**] | 1 | 1 |
| hmac.new [keyword msg] | 1 | 1 |
| hmac.new [position 1] | 1 | 1 |
| unknown.lib.func [keyword kw] | 2 | 1 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ edges
| test.py:23:16:23:27 | ControlFlowNode for Attribute | test.py:23:16:23:39 | ControlFlowNode for Attribute() | provenance | dict.get |
| test.py:23:16:23:39 | ControlFlowNode for Attribute() | test.py:23:5:23:12 | ControlFlowNode for data_raw | provenance | |
| test.py:24:5:24:8 | ControlFlowNode for data | test.py:25:44:25:47 | ControlFlowNode for data | provenance | |
| test.py:24:5:24:8 | ControlFlowNode for data | test.py:25:44:25:47 | ControlFlowNode for data | provenance | |
| test.py:25:44:25:47 | ControlFlowNode for data | test.py:25:15:25:74 | SynthDictSplatArgumentNode | provenance | |
| test.py:34:5:34:8 | ControlFlowNode for data | test.py:35:10:35:13 | ControlFlowNode for data | provenance | |
| test.py:34:5:34:8 | ControlFlowNode for data | test.py:36:13:36:16 | ControlFlowNode for data | provenance | |
| test.py:34:12:34:18 | ControlFlowNode for request | test.py:34:12:34:23 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
Expand Down Expand Up @@ -45,6 +47,8 @@ nodes
| test.py:23:16:23:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:23:16:23:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:24:5:24:8 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:25:15:25:74 | SynthDictSplatArgumentNode | semmle.label | SynthDictSplatArgumentNode |
| test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:34:5:34:8 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:34:12:34:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
Expand All @@ -68,6 +72,7 @@ nodes
subpaths
#select
| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [position 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:25:15:25:74 | SynthDictSplatArgumentNode | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:15:25:74 | SynthDictSplatArgumentNode | Call to hmac.new [**] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [keyword msg] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:35:10:35:13 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:35:10:35:13 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ edges
| test.py:50:29:50:31 | ControlFlowNode for err | test.py:50:16:50:32 | ControlFlowNode for format_error() | provenance | |
| test.py:50:29:50:31 | ControlFlowNode for err | test.py:52:18:52:20 | ControlFlowNode for msg | provenance | |
| test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr | provenance | |
| test.py:65:25:65:25 | ControlFlowNode for e | test.py:66:24:66:40 | ControlFlowNode for Dict | provenance | |
| test.py:65:25:65:25 | ControlFlowNode for e | test.py:66:34:66:39 | ControlFlowNode for str() | provenance | |
| test.py:66:24:66:40 | ControlFlowNode for Dict [Dictionary element at key error] | test.py:66:24:66:40 | ControlFlowNode for Dict | provenance | |
| test.py:66:34:66:39 | ControlFlowNode for str() | test.py:66:24:66:40 | ControlFlowNode for Dict [Dictionary element at key error] | provenance | |
nodes
| test.py:16:16:16:37 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:23:25:23:25 | ControlFlowNode for e | semmle.label | ControlFlowNode for e |
Expand All @@ -23,6 +25,8 @@ nodes
| test.py:53:12:53:27 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| test.py:65:25:65:25 | ControlFlowNode for e | semmle.label | ControlFlowNode for e |
| test.py:66:24:66:40 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
| test.py:66:24:66:40 | ControlFlowNode for Dict [Dictionary element at key error] | semmle.label | ControlFlowNode for Dict [Dictionary element at key error] |
| test.py:66:34:66:39 | ControlFlowNode for str() | semmle.label | ControlFlowNode for str() |
subpaths
| test.py:50:29:50:31 | ControlFlowNode for err | test.py:52:18:52:20 | ControlFlowNode for msg | test.py:53:12:53:27 | ControlFlowNode for BinaryExpr | test.py:50:16:50:32 | ControlFlowNode for format_error() |
#select
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ edges
| test.py:67:38:67:48 | ControlFlowNode for bank_number | test.py:70:15:70:25 | ControlFlowNode for bank_number | provenance | |
| test.py:67:76:67:78 | ControlFlowNode for ccn | test.py:73:15:73:17 | ControlFlowNode for ccn | provenance | |
| test.py:67:81:67:88 | ControlFlowNode for user_ccn | test.py:74:15:74:22 | ControlFlowNode for user_ccn | provenance | |
| test.py:101:5:101:10 | ControlFlowNode for config | test.py:105:11:105:31 | ControlFlowNode for Subscript | provenance | |
| test.py:103:21:103:37 | ControlFlowNode for Attribute | test.py:101:5:101:10 | ControlFlowNode for config | provenance | |
nodes
| test.py:19:5:19:12 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
Expand Down Expand Up @@ -62,9 +60,6 @@ nodes
| test.py:70:15:70:25 | ControlFlowNode for bank_number | semmle.label | ControlFlowNode for bank_number |
| test.py:73:15:73:17 | ControlFlowNode for ccn | semmle.label | ControlFlowNode for ccn |
| test.py:74:15:74:22 | ControlFlowNode for user_ccn | semmle.label | ControlFlowNode for user_ccn |
| test.py:101:5:101:10 | ControlFlowNode for config | semmle.label | ControlFlowNode for config |
| test.py:103:21:103:37 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:105:11:105:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
#select
| test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | This expression logs $@ as clear text. | test.py:19:16:19:29 | ControlFlowNode for get_password() | sensitive data (password) |
Expand All @@ -89,4 +84,3 @@ subpaths
| test.py:70:15:70:25 | ControlFlowNode for bank_number | test.py:67:38:67:48 | ControlFlowNode for bank_number | test.py:70:15:70:25 | ControlFlowNode for bank_number | This expression logs $@ as clear text. | test.py:67:38:67:48 | ControlFlowNode for bank_number | sensitive data (private) |
| test.py:73:15:73:17 | ControlFlowNode for ccn | test.py:67:76:67:78 | ControlFlowNode for ccn | test.py:73:15:73:17 | ControlFlowNode for ccn | This expression logs $@ as clear text. | test.py:67:76:67:78 | ControlFlowNode for ccn | sensitive data (private) |
| test.py:74:15:74:22 | ControlFlowNode for user_ccn | test.py:67:81:67:88 | ControlFlowNode for user_ccn | test.py:74:15:74:22 | ControlFlowNode for user_ccn | This expression logs $@ as clear text. | test.py:67:81:67:88 | ControlFlowNode for user_ccn | sensitive data (private) |
| test.py:105:11:105:31 | ControlFlowNode for Subscript | test.py:103:21:103:37 | ControlFlowNode for Attribute | test.py:105:11:105:31 | ControlFlowNode for Subscript | This expression logs $@ as clear text. | test.py:103:21:103:37 | ControlFlowNode for Attribute | sensitive data (password) |
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,19 @@ edges
| full_partial_test.py:53:5:53:7 | ControlFlowNode for url | full_partial_test.py:54:18:54:20 | ControlFlowNode for url | provenance | |
| full_partial_test.py:57:5:57:14 | ControlFlowNode for user_input | full_partial_test.py:61:5:61:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:57:5:57:14 | ControlFlowNode for user_input | full_partial_test.py:64:5:64:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:57:5:57:14 | ControlFlowNode for user_input | full_partial_test.py:67:5:67:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:57:5:57:14 | ControlFlowNode for user_input | full_partial_test.py:67:38:67:47 | ControlFlowNode for user_input | provenance | |
| full_partial_test.py:57:18:57:24 | ControlFlowNode for request | full_partial_test.py:57:5:57:14 | ControlFlowNode for user_input | provenance | AdditionalTaintStep |
| full_partial_test.py:57:18:57:24 | ControlFlowNode for request | full_partial_test.py:58:5:58:13 | ControlFlowNode for query_val | provenance | AdditionalTaintStep |
| full_partial_test.py:58:5:58:13 | ControlFlowNode for query_val | full_partial_test.py:67:5:67:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:58:5:58:13 | ControlFlowNode for query_val | full_partial_test.py:67:50:67:58 | ControlFlowNode for query_val | provenance | |
| full_partial_test.py:58:17:58:23 | ControlFlowNode for request | full_partial_test.py:58:5:58:13 | ControlFlowNode for query_val | provenance | AdditionalTaintStep |
| full_partial_test.py:61:5:61:7 | ControlFlowNode for url | full_partial_test.py:62:18:62:20 | ControlFlowNode for url | provenance | |
| full_partial_test.py:64:5:64:7 | ControlFlowNode for url | full_partial_test.py:65:18:65:20 | ControlFlowNode for url | provenance | |
| full_partial_test.py:67:5:67:7 | ControlFlowNode for url | full_partial_test.py:68:18:68:20 | ControlFlowNode for url | provenance | |
| full_partial_test.py:67:11:67:59 | ControlFlowNode for BinaryExpr | full_partial_test.py:67:5:67:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:67:38:67:47 | ControlFlowNode for user_input | full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 0] | provenance | |
| full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 0] | full_partial_test.py:67:11:67:59 | ControlFlowNode for BinaryExpr | provenance | |
| full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 1] | full_partial_test.py:67:11:67:59 | ControlFlowNode for BinaryExpr | provenance | |
| full_partial_test.py:67:50:67:58 | ControlFlowNode for query_val | full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 1] | provenance | |
| full_partial_test.py:71:5:71:14 | ControlFlowNode for user_input | full_partial_test.py:75:5:75:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:71:5:71:14 | ControlFlowNode for user_input | full_partial_test.py:78:5:78:7 | ControlFlowNode for url | provenance | |
| full_partial_test.py:71:5:71:14 | ControlFlowNode for user_input | full_partial_test.py:81:5:81:7 | ControlFlowNode for url | provenance | |
Expand Down Expand Up @@ -138,6 +143,11 @@ nodes
| full_partial_test.py:64:5:64:7 | ControlFlowNode for url | semmle.label | ControlFlowNode for url |
| full_partial_test.py:65:18:65:20 | ControlFlowNode for url | semmle.label | ControlFlowNode for url |
| full_partial_test.py:67:5:67:7 | ControlFlowNode for url | semmle.label | ControlFlowNode for url |
| full_partial_test.py:67:11:67:59 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| full_partial_test.py:67:38:67:47 | ControlFlowNode for user_input | semmle.label | ControlFlowNode for user_input |
| full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 0] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 0] |
| full_partial_test.py:67:38:67:58 | ControlFlowNode for Tuple [Tuple element at index 1] | semmle.label | ControlFlowNode for Tuple [Tuple element at index 1] |
| full_partial_test.py:67:50:67:58 | ControlFlowNode for query_val | semmle.label | ControlFlowNode for query_val |
| full_partial_test.py:68:18:68:20 | ControlFlowNode for url | semmle.label | ControlFlowNode for url |
| full_partial_test.py:71:5:71:14 | ControlFlowNode for user_input | semmle.label | ControlFlowNode for user_input |
| full_partial_test.py:71:18:71:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
Expand Down
Loading
Loading