From df406b4fca88a73d345fc8d34ce7b59b3d5876dd Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 2 Feb 2024 10:14:17 +0100 Subject: [PATCH 01/26] python: Start modelling using MaD - empty models for now - `summaryModel` of `codeql/python-all` will be added to shortly. --- python/ql/lib/ext/StdLib.model.yml | 30 ++++++++++++++++++++++++++++++ python/ql/lib/qlpack.yml | 2 ++ 2 files changed, 32 insertions(+) create mode 100644 python/ql/lib/ext/StdLib.model.yml diff --git a/python/ql/lib/ext/StdLib.model.yml b/python/ql/lib/ext/StdLib.model.yml new file mode 100644 index 000000000000..6c1f0ec89908 --- /dev/null +++ b/python/ql/lib/ext/StdLib.model.yml @@ -0,0 +1,30 @@ +extensions: + - addsTo: + pack: codeql/python-all + extensible: sourceModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: sinkModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: neutralModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: typeModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: typeVariableModel + data: [] diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 36d43473f2a6..765cdb11c2e1 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -15,4 +15,6 @@ dependencies: codeql/yaml: ${workspace} dataExtensions: - semmle/python/frameworks/**/*.model.yml + - ext/*.model.yml + - ext/generated/*.model.yml warnOnImplicitThis: true From 281ac05868d722e514ead85373da9de4c27168be Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 8 Mar 2024 11:12:33 +0100 Subject: [PATCH 02/26] python: add modelling for `urlib.parse` - `quote` together with `re.compile` recover regex injection alerts on haiwen/seahub - `quote_plus` recovers the URL redirection alert on DemocracyClub/EveryElection - `unquote` recovers path injection alerts on `cloudera/hue` - it was tedious finding justifications for the rest.. --- python/ql/lib/ext/StdLib.model.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/ext/StdLib.model.yml b/python/ql/lib/ext/StdLib.model.yml index 6c1f0ec89908..16f62bd77481 100644 --- a/python/ql/lib/ext/StdLib.model.yml +++ b/python/ql/lib/ext/StdLib.model.yml @@ -12,8 +12,26 @@ extensions: - addsTo: pack: codeql/python-all extensible: summaryModel - data: [] - + data: + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote + - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus + - ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://epydoc.sourceforge.net/stdlib/urllib-module.html + - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0]", "taint"] + - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[1]", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote + - ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus + - ["urllib", "Member[parse].Member[unquote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] + # We could consider a more precise source than the first argument, namely tuple or dict content. + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode + - ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin + - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:]", "ReturnValue", "taint"] + - ["urllib", "Member[parse].Member[urljoin]", "Argument[1,url:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/re.html#re.compile + - ["re", "Member[compile]", "Argument[0,pattern:]", "ReturnValue", "taint"] - addsTo: pack: codeql/python-all extensible: neutralModel From c004ffaca8e721834f4f10ef0deb7bf6f18ff421 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 14 Mar 2024 09:22:08 +0100 Subject: [PATCH 03/26] python: move model to `Stdlib.yml` There is already a model there so we add to that one. We did observe that this existing model was blocked by the external MaD model. This is concerning and needs to be cleared up. --- python/ql/lib/ext/StdLib.model.yml | 2 -- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/ext/StdLib.model.yml b/python/ql/lib/ext/StdLib.model.yml index 16f62bd77481..df4feaf04249 100644 --- a/python/ql/lib/ext/StdLib.model.yml +++ b/python/ql/lib/ext/StdLib.model.yml @@ -30,8 +30,6 @@ extensions: # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:]", "ReturnValue", "taint"] - ["urllib", "Member[parse].Member[urljoin]", "Argument[1,url:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/re.html#re.compile - - ["re", "Member[compile]", "Argument[0,pattern:]", "ReturnValue", "taint"] - addsTo: pack: codeql/python-all extensible: neutralModel diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 3c23b3929911..7a373a523e47 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3251,8 +3251,13 @@ module StdlibPrivate { override predicate propagatesFlow(string input, string output, boolean preservesValue) { input in ["Argument[0]", "Argument[pattern:]"] and - output = "ReturnValue.Attribute[pattern]" and - preservesValue = true + ( + output = "ReturnValue.Attribute[pattern]" and + preservesValue = true + or + output = "ReturnValue" and + preservesValue = false + ) } } From d410136852410110010987d5d6476ff8f7562324 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 9 Apr 2024 13:14:42 +0200 Subject: [PATCH 04/26] python: compress models --- python/ql/lib/ext/StdLib.model.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/ext/StdLib.model.yml b/python/ql/lib/ext/StdLib.model.yml index df4feaf04249..e3cc9cd61c87 100644 --- a/python/ql/lib/ext/StdLib.model.yml +++ b/python/ql/lib/ext/StdLib.model.yml @@ -18,8 +18,7 @@ extensions: # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus - ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] # See https://epydoc.sourceforge.net/stdlib/urllib-module.html - - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0]", "taint"] - - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[1]", "taint"] + - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0,1]", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote - ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus @@ -28,8 +27,7 @@ extensions: # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode - ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin - - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:]", "ReturnValue", "taint"] - - ["urllib", "Member[parse].Member[urljoin]", "Argument[1,url:]", "ReturnValue", "taint"] + - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"] - addsTo: pack: codeql/python-all extensible: neutralModel From 1e97600c4aa9abf1ea7562b6367b06d9efb9334b Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 9 Apr 2024 13:31:26 +0200 Subject: [PATCH 05/26] Python: move models --- .../lib/{ext => semmle/python/frameworks/Stdlib}/StdLib.model.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/ql/lib/{ext => semmle/python/frameworks/Stdlib}/StdLib.model.yml (100%) diff --git a/python/ql/lib/ext/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml similarity index 100% rename from python/ql/lib/ext/StdLib.model.yml rename to python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml From b80a711b27033581261bd2af8c3bc8e56a889ff2 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 9 Apr 2024 13:33:05 +0200 Subject: [PATCH 06/26] python: undo changes to qlpack --- python/ql/lib/qlpack.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 765cdb11c2e1..36d43473f2a6 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -15,6 +15,4 @@ dependencies: codeql/yaml: ${workspace} dataExtensions: - semmle/python/frameworks/**/*.model.yml - - ext/*.model.yml - - ext/generated/*.model.yml warnOnImplicitThis: true From 2118f233b9d020a32126535282277644ac13d6e3 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 25 Jun 2024 14:40:23 +0200 Subject: [PATCH 07/26] Python: model optparse.OptionParser.parse_arg --- python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index e3cc9cd61c87..1b359ce05ea4 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -13,6 +13,8 @@ extensions: pack: codeql/python-all extensible: summaryModel data: + # See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args + - ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus From 501cda4e8c1f5d4568740b31fe437ea21eea38e8 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 25 Jun 2024 14:44:39 +0200 Subject: [PATCH 08/26] Python: model `fnmatch.filter` --- python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index 1b359ce05ea4..c2d8546b9dfb 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -13,6 +13,9 @@ extensions: pack: codeql/python-all extensible: summaryModel data: + # See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter + - ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"] + - ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args - ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote From bc551174f95b687acb78970ac855b56f5c5a6b7f Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 25 Jun 2024 14:53:06 +0200 Subject: [PATCH 09/26] Python: model `copy.deepcopy` as a value step --- .../dataflow/new/internal/TaintTrackingPrivate.qll | 14 -------------- .../python/frameworks/Stdlib/StdLib.model.yml | 2 ++ 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll index 572e67c28a9c..b268d6290603 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll @@ -46,8 +46,6 @@ private module Cached { or containerStep(nodeFrom, nodeTo) or - copyStep(nodeFrom, nodeTo) - or DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo) or DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo) @@ -191,18 +189,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo) } -/** - * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying. - */ -predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) { - exists(DataFlow::CallCfgNode call | call = nodeTo | - call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and - call.getArg(0) = nodeFrom - ) - or - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, "copy") -} - /** * Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step, * such that the whole expression `await x` is tainted if `x` is tainted. diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index c2d8546b9dfb..3a320407d9c4 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -13,6 +13,8 @@ extensions: pack: codeql/python-all extensible: summaryModel data: + # See https://docs.python.org/3/library/copy.html#copy.deepcopy + - ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"] # See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter - ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"] - ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"] From bdc48088e6dcc0ded83f81c0fdc9d6b04a3a7da8 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 25 Jun 2024 15:21:29 +0200 Subject: [PATCH 10/26] Python: MaD summary models Two of the generated summaries have been excluded: - ["re", "Member[split]", "Argument[0,pattern:]", "ReturnValue", "taint"] From the documentation, it is not clear why pattern should figure in the return value, as that is the part denoting split point and thus all those instances are filtered out. From the implementation Spit function: https://github.com/python/cpython/blob/3.12/Lib/re/__init__.py#L199 _compile function being called by split: https://github.com/python/cpython/blob/3.12/Lib/re/__init__.py#L280 We see that in case the pattern is already a compiled `Pattern`, it is returned directly from _compile and could thus be part of the return value from split. This is probably not possible to arrange for an attacker, and so an FP in practice. - ["urllib2", "Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"] urllib2 seems to be only in Python2 (e.g. https://docs.python.org/2.7/library/urllib2.html) and I cannot locate the function unquote. --- .../python/frameworks/Stdlib/StdLib.model.yml | 105 +++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index 3a320407d9c4..1d7e0c070fe4 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -7,19 +7,107 @@ extensions: - addsTo: pack: codeql/python-all extensible: sinkModel - data: [] + data: + - ["subprocess.Popen!","Subclass.Call.Argument[0,args:]", "log-injection"] + - ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"] - addsTo: pack: codeql/python-all extensible: summaryModel data: + # See + # - https://docs.python.org/3/glossary.html#term-mapping + # - https://docs.python.org/3/library/stdtypes.html#dict.get + - ["_collections_abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser + - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"] + - ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/cgi.html#higher-level-interface + - ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack + - ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/copy.html#copy.deepcopy - ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"] + # See + # - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer + # - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer + - ["ctypes", "Member[create_string_buffer,create_unicode_buffer]", "Argument[0,init:,init_or_size:]", "ReturnValue", "taint"] + # See https://docs.python.org/3.11/distutils/apiref.html#distutils.util.change_root + - ["distutils", "Member[util].Member[change_root]", "Argument[0,new_root:,1,pathname:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/email.header.html#email.header.Header + - ["email.header.Header!", "Subclass.Call", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/email.utils.html#email.utils.parseaddr + - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue", "taint"] + - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue.TupleElement[0,1]", "taint"] # See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter - ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"] - ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/getopt.html#getopt.getopt + - ["getopt", "Member[getopt]", "Argument[0,args:]", "ReturnValue.TupleElement[1]", "taint"] + - ["getopt", "Member[getopt]", "Argument[1,shortopts:,2,longopts:]", "ReturnValue.TupleElement[0].ListElement.TupleElement[0]", "taint"] + # See https://docs.python.org/3/library/gettext.html#gettext.gettext + - ["gettext", "Member[gettext]", "Argument[0,message:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/gzip.html#gzip.GzipFile + - ["gzip.GzipFile!", "Subclass.Call", "Argument[0,filename:]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/html.html#html.escape + # - https://docs.python.org/3/library/html.html#html.unescape + - ["html", "Member[escape,unescape]", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/html.parser.html#html.parser.HTMLParser.feed + - ["html.parser.HTMLParser", "Member[feed]", "Argument[0,data:]", "Argument[self]", "taint"] + # See https://docs.python.org/3.11/library/imp.html#imp.find_module + - ["imp", "Member[find_module]", "Argument[0,name:,1,path:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/logging.html#logging.getLevelName + # specifically the no matching case + - ["logging", "Member[getLevelName]", "Argument[0,level:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage + - ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type + - ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.connection.Listener + - ["multiprocessing.connection.Listener!", "Subclass.Call", "Argument[3,authkey:]", "ReturnValue", "taint"] + # See https://github.com/python/cpython/blob/main/Lib/nturl2path.py + # No user-facing documentation, unfortunately. + - ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"] + - ["nturl2path", "Member[url2pathname]", "Argument[0,url:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args - ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L972-L973 + - ["pathlib.Path", ".Member[__enter__]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__ + - ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait + - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"] + # See + # - https://docs.python.org/3/library/random.html#random.choice + # - https://docs.python.org/3/library/random.html#module-random + - ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] + - ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/shlex.html#shlex.quote + - ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/shutil.html#shutil.rmtree + - ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Argument[1]", "taint"] + # See https://docs.python.org/3/library/shutil.html#shutil.which + - ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/subprocess.html#subprocess.Popen + - ["subprocess.Popen!", "Subclass.Call", "Argument[0,args:]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/tarfile.html#tarfile.open + # - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.open + - ["tarfile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] + - ["tarfile.TarFile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp + - ["tempfile", "Member[mkdtemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp + - ["tempfile", "Member[mkstemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See https://docs.python.org/3/library/textwrap.html#textwrap.dedent + - ["textwrap", "Member[dedent]", "Argument[0,text:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/traceback.html#traceback.StackSummary.from_list + - ["traceback.StackSummary", "Member[from_list]", "Argument[0,a_list:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/typing.html#typing.cast + - ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus @@ -35,6 +123,21 @@ extensions: - ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"] + # See the internal documentation + # https://github.com/python/cpython/blob/3.12/Lib/zipfile/_path/__init__.py#L103-L105 + - ["zipfile.CompleteDirs", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile + # it may be necessary to read the code to understand the taint propagation + # Constructor: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1266 + - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue", "taint"] + - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue.Attribute[filelist].ListElement.Attribute[filename]", "value"] + # _extract_member: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1761 + - ["zipfile.ZipFile", "Member[_extract_member]", "Argument[1,targetpath:]", "ReturnValue", "taint"] + # infolist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1498-L1501 + - ["zipfile.ZipFile", "Member[infolist]", "Argument[self]", "ReturnValue", "taint"] + - ["zipfile.ZipFile", "Member[infolist]", "Argument[self].Attribute[filelist]", "ReturnValue", "value"] + # namelist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1494-L1496 + - ["zipfile.ZipFile", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] - addsTo: pack: codeql/python-all extensible: neutralModel From eb32cbe8a5fa65d85894c62c01dd42a5a8cbec98 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 26 Jun 2024 00:57:59 +0200 Subject: [PATCH 11/26] Python: codecs.open --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 7a373a523e47..74e33429ed05 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -1492,6 +1492,9 @@ module StdlibPrivate { or // io.open is a special case, since it is an alias for the builtin `open` result = API::moduleImport("io").getMember("open") + or + // similarly, coecs.open calls the builtin `open`: https://github.com/python/cpython/blob/3.12/Lib/codecs.py#L918 + result = API::moduleImport("codecs").getMember("open") } /** From 571be8be3e052737e13c50d1fcc4201891aafe4f Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 26 Jun 2024 01:00:38 +0200 Subject: [PATCH 12/26] Python: model more loggers --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 74e33429ed05..57bceeda79aa 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -254,10 +254,14 @@ module Stdlib { * See https://docs.python.org/3.9/library/logging.html#logging.Logger. */ module Logger { + private import semmle.python.dataflow.new.internal.DataFlowDispatch as DD + /** Gets a reference to the `logging.Logger` class or any subclass. */ API::Node subclassRef() { result = API::moduleImport("logging").getMember("Logger").getASubclass*() or + result = API::moduleImport("logging").getMember("getLoggerClass").getReturn().getASubclass*() + or result = ModelOutput::getATypeNode("logging.Logger~Subclass").getASubclass*() } @@ -277,6 +281,13 @@ module Stdlib { ClassInstantiation() { this = subclassRef().getACall() or + this = + DD::selfTracker(subclassRef() + .getAValueReachableFromSource() + .asExpr() + .(ClassExpr) + .getInnerScope()) + or this = API::moduleImport("logging").getMember("root").asSource() or this = API::moduleImport("logging").getMember("getLogger").getACall() From b261145f4346ee7ee666ba4445d199af076374f0 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 26 Jun 2024 10:46:38 +0200 Subject: [PATCH 13/26] Python: fix compilation --- .../functions/ModificationOfParameterWithDefault.qll | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll index 77dc4ccafcc0..68194309e1dd 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefault.qll @@ -8,7 +8,7 @@ private import python import semmle.python.dataflow.new.DataFlow -private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TTP +private import semmle.python.ApiGraphs /** * Provides a data-flow configuration for detecting modifications of a parameters default value. @@ -73,7 +73,13 @@ module ModificationOfParameterWithDefault { or // the target of a copy step is (presumably) a different object, and hence modifications of // this object no longer matter for the purposes of this query. - TTP::copyStep(_, node) and state in [true, false] + copyTarget(node) and state in [true, false] + } + + private predicate copyTarget(DataFlow::Node node) { + node = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() + or + node.(DataFlow::MethodCallNode).calls(_, "copy") } } From a3076f4f724884a71e41535f592ae8e717127c1c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 26 Jun 2024 13:27:32 +0200 Subject: [PATCH 14/26] Python: fix test expectations, add missing sanitizer --- .../UnsafeShellCommandConstructionQuery.qll | 1 + .../CWE-022-UnsafeUnpacking/UnsafeUnpack.expected | 1 + .../Security/CWE-409/DecompressionBombs.expected | 15 +++++++++++++++ .../CommandInjection.expected | 2 +- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll index 73205fdeb28c..6d292a88b6c7 100644 --- a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll @@ -45,6 +45,7 @@ module UnsafeShellCommandConstructionConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node sink) { sink instanceof Sink } predicate isBarrier(DataFlow::Node node) { + node instanceof Sanitizer or node instanceof CommandInjection::Sanitizer // using all sanitizers from `py/command-injection` } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index ca4d7ebafff0..e1710375df25 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -75,6 +75,7 @@ edges | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config | +| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:49 | | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 5689deb01a03..92af7f59efe7 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -1,13 +1,23 @@ edges | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | | +| test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | +| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | | +| test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | +| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | | +| test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | +| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | | +| test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | +| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | | +| test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | +| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:64 | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | | | test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config | @@ -37,14 +47,19 @@ edges | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | provenance | | nodes | test.py:10:16:10:24 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:11:5:11:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:11:5:11:52 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:11:21:11:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:12:5:12:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:12:5:12:48 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:12:21:12:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:14:10:14:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:14:26:14:34 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:15:14:15:29 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test.py:18:10:18:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:18:26:18:34 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:19:14:19:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test.py:22:5:22:30 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:22:5:22:60 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | test.py:22:21:22:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:24:5:24:52 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | diff --git a/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected b/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected index 1e75c67db66b..4a1856ba98ad 100644 --- a/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected @@ -12,7 +12,7 @@ edges | command_injection.py:11:13:11:19 | ControlFlowNode for request | command_injection.py:11:13:11:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | | command_injection.py:11:13:11:24 | ControlFlowNode for Attribute | command_injection.py:11:13:11:41 | ControlFlowNode for Attribute() | provenance | dict.get | | command_injection.py:11:13:11:41 | ControlFlowNode for Attribute() | command_injection.py:11:5:11:9 | ControlFlowNode for files | provenance | | -| command_injection.py:18:5:18:9 | ControlFlowNode for files | command_injection.py:20:22:20:34 | ControlFlowNode for BinaryExpr | provenance | | +| command_injection.py:18:5:18:9 | ControlFlowNode for files | command_injection.py:20:22:20:34 | ControlFlowNode for BinaryExpr | provenance | Sink:MaD:11 | | command_injection.py:18:13:18:19 | ControlFlowNode for request | command_injection.py:18:13:18:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | | command_injection.py:18:13:18:24 | ControlFlowNode for Attribute | command_injection.py:18:13:18:41 | ControlFlowNode for Attribute() | provenance | dict.get | | command_injection.py:18:13:18:41 | ControlFlowNode for Attribute() | command_injection.py:18:5:18:9 | ControlFlowNode for files | provenance | | From bbc3ff2dfedf7cbf087209f7863d1049db7b4bca Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 28 Jun 2024 14:39:03 +0200 Subject: [PATCH 15/26] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- .../ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index 1d7e0c070fe4..16bd96ffe252 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -18,7 +18,7 @@ extensions: # See # - https://docs.python.org/3/glossary.html#term-mapping # - https://docs.python.org/3/library/stdtypes.html#dict.get - - ["_collections_abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"] + - ["collections.abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"] - ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"] @@ -88,7 +88,7 @@ extensions: # See https://docs.python.org/3/library/shlex.html#shlex.quote - ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/shutil.html#shutil.rmtree - - ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Argument[1]", "taint"] + - ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Parameter[1]", "taint"] # See https://docs.python.org/3/library/shutil.html#shutil.which - ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/subprocess.html#subprocess.Popen From 59f953269a22300e011546a9ced4cc3a2c4c9234 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 28 Jun 2024 14:42:24 +0200 Subject: [PATCH 16/26] Python: remove strange sink It is not clear from the code how this could happen and I do not remember the path I saw, perhaps it was unreasonable. --- python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index 16bd96ffe252..aee88e1d9419 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -8,7 +8,6 @@ extensions: pack: codeql/python-all extensible: sinkModel data: - - ["subprocess.Popen!","Subclass.Call.Argument[0,args:]", "log-injection"] - ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"] - addsTo: From 5ddfe75a0da516249e7a7ff3c0a06bbbc2200b87 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 28 Jun 2024 15:10:08 +0200 Subject: [PATCH 17/26] Python: Add value steps for sequence elements It would be nice to simplify to a single sequence content type.. --- .../semmle/python/frameworks/Stdlib/StdLib.model.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index aee88e1d9419..569ad9e48e7f 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -76,13 +76,23 @@ extensions: # See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__ - ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"] # See + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get_nowait + - ["queue.Queue", "Member[get,get_nowait]", "Argument[self].ListElement", "ReturnValue", "value"] + - ["queue.Queue", "Member[get,get_nowait]", "Argument[self]", "ReturnValue", "taint"] + # See # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait + - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self].ListElement", "value"] - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"] # See # - https://docs.python.org/3/library/random.html#random.choice # - https://docs.python.org/3/library/random.html#module-random + - ["random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] + - ["random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] - ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] + - ["random.Random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] + - ["random.Random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] - ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/shlex.html#shlex.quote - ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"] From 77a00873a974c4ecec229f19a59f2cef7ede0c02 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 28 Jun 2024 15:25:17 +0200 Subject: [PATCH 18/26] Python: add tests for loggers --- .../ql/test/library-tests/frameworks/stdlib/Logging.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/stdlib/Logging.py b/python/ql/test/library-tests/frameworks/stdlib/Logging.py index cb2e3fddc902..72a5175fef85 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Logging.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Logging.py @@ -43,3 +43,12 @@ class MyLogger(logging.Logger): pass MyLogger("bar").info("hello") # $ loggingInput="hello" + +class CustomLogger(logging.getLoggerClass()): + pass + +CustomLogger("baz").info("hello") # $ loggingInput="hello" + +class LoggerSubClassUsingSelf(logging.Logger): + def foo(self): + self.info("hello") # $ loggingInput="hello" \ No newline at end of file From e40ae2e52d3a3d378aa00a0b9f5bd6c5f9a88bc3 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 28 Jun 2024 21:56:11 +0200 Subject: [PATCH 19/26] Python: adjust test expectations MaD row numbers in provenance column --- .../CWE-022-UnsafeUnpacking/UnsafeUnpack.expected | 2 +- .../Security/CWE-409/DecompressionBombs.expected | 10 +++++----- .../CWE-078-CommandInjection/CommandInjection.expected | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index e1710375df25..ea08c95d7b64 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -75,7 +75,7 @@ edges | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config | -| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:49 | +| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:55 | | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 92af7f59efe7..19d715d8a868 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -1,23 +1,23 @@ edges | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | +| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | | | test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | +| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | | | test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | +| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | | | test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:64 | +| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | | | test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:64 | +| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:70 | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | | | test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config | diff --git a/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected b/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected index 4a1856ba98ad..1e75c67db66b 100644 --- a/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-078-CommandInjection/CommandInjection.expected @@ -12,7 +12,7 @@ edges | command_injection.py:11:13:11:19 | ControlFlowNode for request | command_injection.py:11:13:11:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | | command_injection.py:11:13:11:24 | ControlFlowNode for Attribute | command_injection.py:11:13:11:41 | ControlFlowNode for Attribute() | provenance | dict.get | | command_injection.py:11:13:11:41 | ControlFlowNode for Attribute() | command_injection.py:11:5:11:9 | ControlFlowNode for files | provenance | | -| command_injection.py:18:5:18:9 | ControlFlowNode for files | command_injection.py:20:22:20:34 | ControlFlowNode for BinaryExpr | provenance | Sink:MaD:11 | +| command_injection.py:18:5:18:9 | ControlFlowNode for files | command_injection.py:20:22:20:34 | ControlFlowNode for BinaryExpr | provenance | | | command_injection.py:18:13:18:19 | ControlFlowNode for request | command_injection.py:18:13:18:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | | command_injection.py:18:13:18:24 | ControlFlowNode for Attribute | command_injection.py:18:13:18:41 | ControlFlowNode for Attribute() | provenance | dict.get | | command_injection.py:18:13:18:41 | ControlFlowNode for Attribute() | command_injection.py:18:5:18:9 | ControlFlowNode for files | provenance | | From e30f725e71e6637f037bd232ee1a5573ce1c2aa7 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 22 Jul 2024 15:43:06 +0200 Subject: [PATCH 20/26] Python: Remove questionable model for `multiprocessing.connection.Listener` --- python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml index 569ad9e48e7f..19a97b16537e 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml @@ -63,8 +63,6 @@ extensions: - ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type - ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.connection.Listener - - ["multiprocessing.connection.Listener!", "Subclass.Call", "Argument[3,authkey:]", "ReturnValue", "taint"] # See https://github.com/python/cpython/blob/main/Lib/nturl2path.py # No user-facing documentation, unfortunately. - ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"] From 3434c38da78189324578285f405f0bd0bb180786 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 22 Jul 2024 17:03:29 +0200 Subject: [PATCH 21/26] Python: update test expectations This is MaD... --- .../CWE-022-UnsafeUnpacking/UnsafeUnpack.expected | 2 +- .../Security/CWE-409/DecompressionBombs.expected | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index ea08c95d7b64..0c6c30857220 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -75,7 +75,7 @@ edges | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config | -| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:55 | +| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:54 | | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 19d715d8a868..073533bcc092 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -1,23 +1,23 @@ edges | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | +| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | | | test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | +| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | | | test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | +| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | | | test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:70 | +| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | | | test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:70 | +| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:69 | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | | | test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config | From f95926e1a810b677ec2b8ce45711ea1ea0e24518 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 24 Sep 2024 20:23:39 +0200 Subject: [PATCH 22/26] Python: add change note --- python/ql/lib/change-notes/2024-09-24-std-lib-models.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2024-09-24-std-lib-models.md diff --git a/python/ql/lib/change-notes/2024-09-24-std-lib-models.md b/python/ql/lib/change-notes/2024-09-24-std-lib-models.md new file mode 100644 index 000000000000..20b522576a30 --- /dev/null +++ b/python/ql/lib/change-notes/2024-09-24-std-lib-models.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added several models of standard library functions and classes. This should make the analysis much more complete in cases where the standard library is not extracted. \ No newline at end of file From 2eac11edd604be61b40d392dd9a4a441be865e3b Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 1 Oct 2024 11:47:42 +0200 Subject: [PATCH 23/26] Update python/ql/lib/change-notes/2024-09-24-std-lib-models.md Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/change-notes/2024-09-24-std-lib-models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/change-notes/2024-09-24-std-lib-models.md b/python/ql/lib/change-notes/2024-09-24-std-lib-models.md index 20b522576a30..3166e0c8ff0f 100644 --- a/python/ql/lib/change-notes/2024-09-24-std-lib-models.md +++ b/python/ql/lib/change-notes/2024-09-24-std-lib-models.md @@ -1,4 +1,4 @@ --- category: minorAnalysis --- -* Added several models of standard library functions and classes. This should make the analysis much more complete in cases where the standard library is not extracted. \ No newline at end of file +* Added several models of standard library functions and classes, in anticipation of no longer extracting the standard library in a future release. \ No newline at end of file From cef8744a3776fdff4e3f6566080a4d0e590b9b59 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 1 Oct 2024 12:56:21 +0200 Subject: [PATCH 24/26] Python: consolidate models in one file --- .../semmle/python/frameworks/Stdlib.model.yml | 154 ++++++++++++++++- .../python/frameworks/Stdlib/StdLib.model.yml | 161 ------------------ 2 files changed, 153 insertions(+), 162 deletions(-) delete mode 100644 python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 53d918d07ac3..946c4d5ed4f6 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -21,9 +21,161 @@ extensions: - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] - ['os', 'Member[read].ReturnValue', 'file'] + + - addsTo: + pack: codeql/python-all + extensible: sinkModel + data: + - ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"] + - addsTo: pack: codeql/python-all extensible: summaryModel data: - - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint'] + # See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser + - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"] # note: taint of attribute lookups is handled in QL + - ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/cgi.html#higher-level-interface + - ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/glossary.html#term-mapping + # - https://docs.python.org/3/library/stdtypes.html#dict.get + - ["collections.abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack + - ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/copy.html#copy.deepcopy + - ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"] + # See + # - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer + # - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer + - ["ctypes", "Member[create_string_buffer,create_unicode_buffer]", "Argument[0,init:,init_or_size:]", "ReturnValue", "taint"] + # See https://docs.python.org/3.11/distutils/apiref.html#distutils.util.change_root + - ["distutils", "Member[util].Member[change_root]", "Argument[0,new_root:,1,pathname:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/email.header.html#email.header.Header + - ["email.header.Header!", "Subclass.Call", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/email.utils.html#email.utils.parseaddr + - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue", "taint"] + - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter + - ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"] + - ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/getopt.html#getopt.getopt + - ["getopt", "Member[getopt]", "Argument[0,args:]", "ReturnValue.TupleElement[1]", "taint"] + - ["getopt", "Member[getopt]", "Argument[1,shortopts:,2,longopts:]", "ReturnValue.TupleElement[0].ListElement.TupleElement[0]", "taint"] + # See https://docs.python.org/3/library/gettext.html#gettext.gettext + - ["gettext", "Member[gettext]", "Argument[0,message:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/gzip.html#gzip.GzipFile + - ["gzip.GzipFile!", "Subclass.Call", "Argument[0,filename:]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/html.html#html.escape + # - https://docs.python.org/3/library/html.html#html.unescape + - ["html", "Member[escape,unescape]", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/html.parser.html#html.parser.HTMLParser.feed + - ["html.parser.HTMLParser", "Member[feed]", "Argument[0,data:]", "Argument[self]", "taint"] + # See https://docs.python.org/3.11/library/imp.html#imp.find_module + - ["imp", "Member[find_module]", "Argument[0,name:,1,path:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/logging.html#logging.getLevelName + # specifically the no matching case + - ["logging", "Member[getLevelName]", "Argument[0,level:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage + - ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type + - ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"] + # See https://github.com/python/cpython/blob/main/Lib/nturl2path.py + # No user-facing documentation, unfortunately. + - ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"] + - ["nturl2path", "Member[url2pathname]", "Argument[0,url:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args + - ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L972-L973 + - ["pathlib.Path", ".Member[__enter__]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__ + - ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get_nowait + - ["queue.Queue", "Member[get,get_nowait]", "Argument[self].ListElement", "ReturnValue", "value"] + - ["queue.Queue", "Member[get,get_nowait]", "Argument[self]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put + # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait + - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self].ListElement", "value"] + - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"] + # See + # - https://docs.python.org/3/library/random.html#random.choice + # - https://docs.python.org/3/library/random.html#module-random + - ["random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] + - ["random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] + - ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] + - ["random.Random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] + - ["random.Random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] + - ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/shlex.html#shlex.quote + - ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/shutil.html#shutil.rmtree + - ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Parameter[1]", "taint"] + # See https://docs.python.org/3/library/shutil.html#shutil.which + - ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/subprocess.html#subprocess.Popen + - ["subprocess.Popen!", "Subclass.Call", "Argument[0,args:]", "ReturnValue", "taint"] + # See + # - https://docs.python.org/3/library/tarfile.html#tarfile.open + # - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.open + - ["tarfile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] + - ["tarfile.TarFile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp + - ["tempfile", "Member[mkdtemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp + - ["tempfile", "Member[mkstemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See https://docs.python.org/3/library/textwrap.html#textwrap.dedent + - ["textwrap", "Member[dedent]", "Argument[0,text:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/traceback.html#traceback.StackSummary.from_list + - ["traceback.StackSummary", "Member[from_list]", "Argument[0,a_list:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/typing.html#typing.cast + - ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote + - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus + - ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://epydoc.sourceforge.net/stdlib/urllib-module.html + - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0,1]", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote + - ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus + - ["urllib", "Member[parse].Member[unquote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] + # We could consider a more precise source than the first argument, namely tuple or dict content. + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode + - ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin + - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"] + # See the internal documentation + # https://github.com/python/cpython/blob/3.12/Lib/zipfile/_path/__init__.py#L103-L105 + - ["zipfile.CompleteDirs", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] + # See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile + # it may be necessary to read the code to understand the taint propagation + # Constructor: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1266 + - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue", "taint"] + - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue.Attribute[filelist].ListElement.Attribute[filename]", "value"] + # _extract_member: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1761 + - ["zipfile.ZipFile", "Member[_extract_member]", "Argument[1,targetpath:]", "ReturnValue", "taint"] + # infolist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1498-L1501 + - ["zipfile.ZipFile", "Member[infolist]", "Argument[self]", "ReturnValue", "taint"] + - ["zipfile.ZipFile", "Member[infolist]", "Argument[self].Attribute[filelist]", "ReturnValue", "value"] + # namelist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1494-L1496 + - ["zipfile.ZipFile", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] + + - addsTo: + pack: codeql/python-all + extensible: neutralModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: typeModel + data: [] + + - addsTo: + pack: codeql/python-all + extensible: typeVariableModel + data: [] \ No newline at end of file diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml deleted file mode 100644 index 19a97b16537e..000000000000 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/StdLib.model.yml +++ /dev/null @@ -1,161 +0,0 @@ -extensions: - - addsTo: - pack: codeql/python-all - extensible: sourceModel - data: [] - - - addsTo: - pack: codeql/python-all - extensible: sinkModel - data: - - ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"] - - - addsTo: - pack: codeql/python-all - extensible: summaryModel - data: - # See - # - https://docs.python.org/3/glossary.html#term-mapping - # - https://docs.python.org/3/library/stdtypes.html#dict.get - - ["collections.abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser - - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"] - - ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/cgi.html#higher-level-interface - - ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack - - ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/copy.html#copy.deepcopy - - ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"] - # See - # - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer - # - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer - - ["ctypes", "Member[create_string_buffer,create_unicode_buffer]", "Argument[0,init:,init_or_size:]", "ReturnValue", "taint"] - # See https://docs.python.org/3.11/distutils/apiref.html#distutils.util.change_root - - ["distutils", "Member[util].Member[change_root]", "Argument[0,new_root:,1,pathname:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/email.header.html#email.header.Header - - ["email.header.Header!", "Subclass.Call", "Argument[0,s:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/email.utils.html#email.utils.parseaddr - - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue", "taint"] - - ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue.TupleElement[0,1]", "taint"] - # See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter - - ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"] - - ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/getopt.html#getopt.getopt - - ["getopt", "Member[getopt]", "Argument[0,args:]", "ReturnValue.TupleElement[1]", "taint"] - - ["getopt", "Member[getopt]", "Argument[1,shortopts:,2,longopts:]", "ReturnValue.TupleElement[0].ListElement.TupleElement[0]", "taint"] - # See https://docs.python.org/3/library/gettext.html#gettext.gettext - - ["gettext", "Member[gettext]", "Argument[0,message:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/gzip.html#gzip.GzipFile - - ["gzip.GzipFile!", "Subclass.Call", "Argument[0,filename:]", "ReturnValue", "taint"] - # See - # - https://docs.python.org/3/library/html.html#html.escape - # - https://docs.python.org/3/library/html.html#html.unescape - - ["html", "Member[escape,unescape]", "Argument[0,s:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/html.parser.html#html.parser.HTMLParser.feed - - ["html.parser.HTMLParser", "Member[feed]", "Argument[0,data:]", "Argument[self]", "taint"] - # See https://docs.python.org/3.11/library/imp.html#imp.find_module - - ["imp", "Member[find_module]", "Argument[0,name:,1,path:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/logging.html#logging.getLevelName - # specifically the no matching case - - ["logging", "Member[getLevelName]", "Argument[0,level:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage - - ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type - - ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"] - # See https://github.com/python/cpython/blob/main/Lib/nturl2path.py - # No user-facing documentation, unfortunately. - - ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"] - - ["nturl2path", "Member[url2pathname]", "Argument[0,url:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args - - ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"] - # See https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L972-L973 - - ["pathlib.Path", ".Member[__enter__]", "Argument[self]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__ - - ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"] - # See - # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get - # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get_nowait - - ["queue.Queue", "Member[get,get_nowait]", "Argument[self].ListElement", "ReturnValue", "value"] - - ["queue.Queue", "Member[get,get_nowait]", "Argument[self]", "ReturnValue", "taint"] - # See - # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put - # - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait - - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self].ListElement", "value"] - - ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"] - # See - # - https://docs.python.org/3/library/random.html#random.choice - # - https://docs.python.org/3/library/random.html#module-random - - ["random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] - - ["random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] - - ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] - - ["random.Random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"] - - ["random.Random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"] - - ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/shlex.html#shlex.quote - - ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/shutil.html#shutil.rmtree - - ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Parameter[1]", "taint"] - # See https://docs.python.org/3/library/shutil.html#shutil.which - - ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/subprocess.html#subprocess.Popen - - ["subprocess.Popen!", "Subclass.Call", "Argument[0,args:]", "ReturnValue", "taint"] - # See - # - https://docs.python.org/3/library/tarfile.html#tarfile.open - # - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.open - - ["tarfile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] - - ["tarfile.TarFile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp - - ["tempfile", "Member[mkdtemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp - - ["tempfile", "Member[mkstemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue.TupleElement[0,1]", "taint"] - # See https://docs.python.org/3/library/textwrap.html#textwrap.dedent - - ["textwrap", "Member[dedent]", "Argument[0,text:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/traceback.html#traceback.StackSummary.from_list - - ["traceback.StackSummary", "Member[from_list]", "Argument[0,a_list:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/typing.html#typing.cast - - ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"] - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote - - ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus - - ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] - # See https://epydoc.sourceforge.net/stdlib/urllib-module.html - - ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0,1]", "taint"] - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote - - ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus - - ["urllib", "Member[parse].Member[unquote_plus]", "Argument[0,string:]", "ReturnValue", "taint"] - # We could consider a more precise source than the first argument, namely tuple or dict content. - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode - - ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin - - ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"] - # See the internal documentation - # https://github.com/python/cpython/blob/3.12/Lib/zipfile/_path/__init__.py#L103-L105 - - ["zipfile.CompleteDirs", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] - # See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile - # it may be necessary to read the code to understand the taint propagation - # Constructor: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1266 - - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue", "taint"] - - ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue.Attribute[filelist].ListElement.Attribute[filename]", "value"] - # _extract_member: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1761 - - ["zipfile.ZipFile", "Member[_extract_member]", "Argument[1,targetpath:]", "ReturnValue", "taint"] - # infolist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1498-L1501 - - ["zipfile.ZipFile", "Member[infolist]", "Argument[self]", "ReturnValue", "taint"] - - ["zipfile.ZipFile", "Member[infolist]", "Argument[self].Attribute[filelist]", "ReturnValue", "value"] - # namelist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1494-L1496 - - ["zipfile.ZipFile", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"] - - addsTo: - pack: codeql/python-all - extensible: neutralModel - data: [] - - - addsTo: - pack: codeql/python-all - extensible: typeModel - data: [] - - - addsTo: - pack: codeql/python-all - extensible: typeVariableModel - data: [] From 05910de8d1321cb523cfb29be365756a6961f129 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 1 Oct 2024 13:21:22 +0200 Subject: [PATCH 25/26] Python: MaD expectations --- .../CWE-022-UnsafeUnpacking/UnsafeUnpack.expected | 2 +- .../Security/CWE-409/DecompressionBombs.expected | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected index 0c6c30857220..38d8719bfd0a 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-022-UnsafeUnpacking/UnsafeUnpack.expected @@ -75,7 +75,7 @@ edges | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | | | UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config | -| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:54 | +| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:67 | | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | | | UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 073533bcc092..2b16f9ef178f 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -1,23 +1,23 @@ edges | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | +| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:82 | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config | | test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | | | test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | +| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:82 | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config | | test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | | | test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | +| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:82 | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config | | test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | | | test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:69 | +| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:82 | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config | | test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | | | test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | -| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:69 | +| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:82 | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config | | test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | | | test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config | From 56d0affe38fbe469e241c7854232b4f43adde619 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 3 Oct 2024 10:18:25 +0200 Subject: [PATCH 26/26] Update python/ql/lib/semmle/python/frameworks/Stdlib.model.yml Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/semmle/python/frameworks/Stdlib.model.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 946c4d5ed4f6..107fffdbfb84 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -33,8 +33,8 @@ extensions: extensible: summaryModel data: # See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser + # note: taint flow for attribute lookups on `argparse.ArgumentParser` is handled in QL - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"] - # note: taint of attribute lookups is handled in QL - ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/cgi.html#higher-level-interface - ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"]