Skip to content

Commit 91f1cf1

Browse files
authored
Merge pull request #17454 from yoff/stdlib-optparse
Python: Several standard library models
2 parents 97c2387 + 56d0aff commit 91f1cf1

File tree

9 files changed

+213
-20
lines changed

9 files changed

+213
-20
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added several models of standard library functions and classes, in anticipation of no longer extracting the standard library in a future release.

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ private module Cached {
4646
or
4747
containerStep(nodeFrom, nodeTo)
4848
or
49-
copyStep(nodeFrom, nodeTo)
50-
or
5149
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
5250
or
5351
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
@@ -191,18 +189,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
191189
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
192190
}
193191

194-
/**
195-
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
196-
*/
197-
predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
198-
exists(DataFlow::CallCfgNode call | call = nodeTo |
199-
call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
200-
call.getArg(0) = nodeFrom
201-
)
202-
or
203-
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, "copy")
204-
}
205-
206192
/**
207193
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
208194
* such that the whole expression `await x` is tainted if `x` is tainted.

python/ql/lib/semmle/python/frameworks/Stdlib.model.yml

Lines changed: 154 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,161 @@ extensions:
2121
- ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs']
2222

2323
- ['os', 'Member[read].ReturnValue', 'file']
24+
25+
- addsTo:
26+
pack: codeql/python-all
27+
extensible: sinkModel
28+
data:
29+
- ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"]
30+
2431
- addsTo:
2532
pack: codeql/python-all
2633
extensible: summaryModel
2734
data:
28-
- ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint']
29-
# note: taint of attribute lookups is handled in QL
35+
# See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser
36+
# note: taint flow for attribute lookups on `argparse.ArgumentParser` is handled in QL
37+
- ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"]
38+
- ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"]
39+
# See https://docs.python.org/3/library/cgi.html#higher-level-interface
40+
- ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"]
41+
# See
42+
# - https://docs.python.org/3/glossary.html#term-mapping
43+
# - https://docs.python.org/3/library/stdtypes.html#dict.get
44+
- ["collections.abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"]
45+
# See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack
46+
- ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"]
47+
# See https://docs.python.org/3/library/copy.html#copy.deepcopy
48+
- ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"]
49+
# See
50+
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer
51+
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer
52+
- ["ctypes", "Member[create_string_buffer,create_unicode_buffer]", "Argument[0,init:,init_or_size:]", "ReturnValue", "taint"]
53+
# See https://docs.python.org/3.11/distutils/apiref.html#distutils.util.change_root
54+
- ["distutils", "Member[util].Member[change_root]", "Argument[0,new_root:,1,pathname:]", "ReturnValue", "taint"]
55+
# See https://docs.python.org/3/library/email.header.html#email.header.Header
56+
- ["email.header.Header!", "Subclass.Call", "Argument[0,s:]", "ReturnValue", "taint"]
57+
# See https://docs.python.org/3/library/email.utils.html#email.utils.parseaddr
58+
- ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue", "taint"]
59+
- ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue.TupleElement[0,1]", "taint"]
60+
# See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter
61+
- ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"]
62+
- ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"]
63+
# See https://docs.python.org/3/library/getopt.html#getopt.getopt
64+
- ["getopt", "Member[getopt]", "Argument[0,args:]", "ReturnValue.TupleElement[1]", "taint"]
65+
- ["getopt", "Member[getopt]", "Argument[1,shortopts:,2,longopts:]", "ReturnValue.TupleElement[0].ListElement.TupleElement[0]", "taint"]
66+
# See https://docs.python.org/3/library/gettext.html#gettext.gettext
67+
- ["gettext", "Member[gettext]", "Argument[0,message:]", "ReturnValue", "taint"]
68+
# See https://docs.python.org/3/library/gzip.html#gzip.GzipFile
69+
- ["gzip.GzipFile!", "Subclass.Call", "Argument[0,filename:]", "ReturnValue", "taint"]
70+
# See
71+
# - https://docs.python.org/3/library/html.html#html.escape
72+
# - https://docs.python.org/3/library/html.html#html.unescape
73+
- ["html", "Member[escape,unescape]", "Argument[0,s:]", "ReturnValue", "taint"]
74+
# See https://docs.python.org/3/library/html.parser.html#html.parser.HTMLParser.feed
75+
- ["html.parser.HTMLParser", "Member[feed]", "Argument[0,data:]", "Argument[self]", "taint"]
76+
# See https://docs.python.org/3.11/library/imp.html#imp.find_module
77+
- ["imp", "Member[find_module]", "Argument[0,name:,1,path:]", "ReturnValue", "taint"]
78+
# See https://docs.python.org/3/library/logging.html#logging.getLevelName
79+
# specifically the no matching case
80+
- ["logging", "Member[getLevelName]", "Argument[0,level:]", "ReturnValue", "taint"]
81+
# See https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage
82+
- ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"]
83+
# See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type
84+
- ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"]
85+
# See https://github.com/python/cpython/blob/main/Lib/nturl2path.py
86+
# No user-facing documentation, unfortunately.
87+
- ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"]
88+
- ["nturl2path", "Member[url2pathname]", "Argument[0,url:]", "ReturnValue", "taint"]
89+
# See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args
90+
- ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"]
91+
# See https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L972-L973
92+
- ["pathlib.Path", ".Member[__enter__]", "Argument[self]", "ReturnValue", "taint"]
93+
# See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__
94+
- ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"]
95+
# See
96+
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get
97+
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get_nowait
98+
- ["queue.Queue", "Member[get,get_nowait]", "Argument[self].ListElement", "ReturnValue", "value"]
99+
- ["queue.Queue", "Member[get,get_nowait]", "Argument[self]", "ReturnValue", "taint"]
100+
# See
101+
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put
102+
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait
103+
- ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self].ListElement", "value"]
104+
- ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"]
105+
# See
106+
# - https://docs.python.org/3/library/random.html#random.choice
107+
# - https://docs.python.org/3/library/random.html#module-random
108+
- ["random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"]
109+
- ["random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"]
110+
- ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"]
111+
- ["random.Random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"]
112+
- ["random.Random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"]
113+
- ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"]
114+
# See https://docs.python.org/3/library/shlex.html#shlex.quote
115+
- ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"]
116+
# See https://docs.python.org/3/library/shutil.html#shutil.rmtree
117+
- ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Parameter[1]", "taint"]
118+
# See https://docs.python.org/3/library/shutil.html#shutil.which
119+
- ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"]
120+
# See https://docs.python.org/3/library/subprocess.html#subprocess.Popen
121+
- ["subprocess.Popen!", "Subclass.Call", "Argument[0,args:]", "ReturnValue", "taint"]
122+
# See
123+
# - https://docs.python.org/3/library/tarfile.html#tarfile.open
124+
# - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.open
125+
- ["tarfile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"]
126+
- ["tarfile.TarFile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"]
127+
# See https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp
128+
- ["tempfile", "Member[mkdtemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue", "taint"]
129+
# See https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp
130+
- ["tempfile", "Member[mkstemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue.TupleElement[0,1]", "taint"]
131+
# See https://docs.python.org/3/library/textwrap.html#textwrap.dedent
132+
- ["textwrap", "Member[dedent]", "Argument[0,text:]", "ReturnValue", "taint"]
133+
# See https://docs.python.org/3/library/traceback.html#traceback.StackSummary.from_list
134+
- ["traceback.StackSummary", "Member[from_list]", "Argument[0,a_list:]", "ReturnValue", "taint"]
135+
# See https://docs.python.org/3/library/typing.html#typing.cast
136+
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
137+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
138+
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
139+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
140+
- ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"]
141+
# See https://epydoc.sourceforge.net/stdlib/urllib-module.html
142+
- ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0,1]", "taint"]
143+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote
144+
- ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"]
145+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus
146+
- ["urllib", "Member[parse].Member[unquote_plus]", "Argument[0,string:]", "ReturnValue", "taint"]
147+
# We could consider a more precise source than the first argument, namely tuple or dict content.
148+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
149+
- ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"]
150+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
151+
- ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"]
152+
# See the internal documentation
153+
# https://github.com/python/cpython/blob/3.12/Lib/zipfile/_path/__init__.py#L103-L105
154+
- ["zipfile.CompleteDirs", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"]
155+
# See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile
156+
# it may be necessary to read the code to understand the taint propagation
157+
# Constructor: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1266
158+
- ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue", "taint"]
159+
- ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue.Attribute[filelist].ListElement.Attribute[filename]", "value"]
160+
# _extract_member: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1761
161+
- ["zipfile.ZipFile", "Member[_extract_member]", "Argument[1,targetpath:]", "ReturnValue", "taint"]
162+
# infolist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1498-L1501
163+
- ["zipfile.ZipFile", "Member[infolist]", "Argument[self]", "ReturnValue", "taint"]
164+
- ["zipfile.ZipFile", "Member[infolist]", "Argument[self].Attribute[filelist]", "ReturnValue", "value"]
165+
# namelist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1494-L1496
166+
- ["zipfile.ZipFile", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"]
167+
168+
- addsTo:
169+
pack: codeql/python-all
170+
extensible: neutralModel
171+
data: []
172+
173+
- addsTo:
174+
pack: codeql/python-all
175+
extensible: typeModel
176+
data: []
177+
178+
- addsTo:
179+
pack: codeql/python-all
180+
extensible: typeVariableModel
181+
data: []

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,14 @@ module Stdlib {
254254
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
255255
*/
256256
module Logger {
257+
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DD
258+
257259
/** Gets a reference to the `logging.Logger` class or any subclass. */
258260
API::Node subclassRef() {
259261
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
260262
or
263+
result = API::moduleImport("logging").getMember("getLoggerClass").getReturn().getASubclass*()
264+
or
261265
result = ModelOutput::getATypeNode("logging.Logger~Subclass").getASubclass*()
262266
}
263267

@@ -277,6 +281,13 @@ module Stdlib {
277281
ClassInstantiation() {
278282
this = subclassRef().getACall()
279283
or
284+
this =
285+
DD::selfTracker(subclassRef()
286+
.getAValueReachableFromSource()
287+
.asExpr()
288+
.(ClassExpr)
289+
.getInnerScope())
290+
or
280291
this = API::moduleImport("logging").getMember("root").asSource()
281292
or
282293
this = API::moduleImport("logging").getMember("getLogger").getACall()
@@ -1492,6 +1503,9 @@ module StdlibPrivate {
14921503
or
14931504
// io.open is a special case, since it is an alias for the builtin `open`
14941505
result = API::moduleImport("io").getMember("open")
1506+
or
1507+
// similarly, coecs.open calls the builtin `open`: https://github.com/python/cpython/blob/3.12/Lib/codecs.py#L918
1508+
result = API::moduleImport("codecs").getMember("open")
14951509
}
14961510

14971511
/**
@@ -3260,8 +3274,13 @@ module StdlibPrivate {
32603274

32613275
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
32623276
input in ["Argument[0]", "Argument[pattern:]"] and
3263-
output = "ReturnValue.Attribute[pattern]" and
3264-
preservesValue = true
3277+
(
3278+
output = "ReturnValue.Attribute[pattern]" and
3279+
preservesValue = true
3280+
or
3281+
output = "ReturnValue" and
3282+
preservesValue = false
3283+
)
32653284
}
32663285
}
32673286

python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ module UnsafeShellCommandConstructionConfig implements DataFlow::ConfigSig {
4545
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
4646

4747
predicate isBarrier(DataFlow::Node node) {
48+
node instanceof Sanitizer or
4849
node instanceof CommandInjection::Sanitizer // using all sanitizers from `py/command-injection`
4950
}
5051

python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
private import python
1010
import semmle.python.dataflow.new.DataFlow
11-
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TTP
11+
private import semmle.python.ApiGraphs
1212

1313
/**
1414
* Provides a data-flow configuration for detecting modifications of a parameters default value.
@@ -73,7 +73,13 @@ module ModificationOfParameterWithDefault {
7373
or
7474
// the target of a copy step is (presumably) a different object, and hence modifications of
7575
// this object no longer matter for the purposes of this query.
76-
TTP::copyStep(_, node) and state in [true, false]
76+
copyTarget(node) and state in [true, false]
77+
}
78+
79+
private predicate copyTarget(DataFlow::Node node) {
80+
node = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall()
81+
or
82+
node.(DataFlow::MethodCallNode).calls(_, "copy")
7783
}
7884
}
7985

python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ edges
7575
| UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | |
7676
| UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | |
7777
| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config |
78+
| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:67 |
7879
| UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | |
7980
| UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | |
8081
| UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | |

0 commit comments

Comments
 (0)