From 7c7d6395b9df8fa0c0d8e873b8b2b33cb6c556d6 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 19 May 2025 11:56:35 +0200 Subject: [PATCH 1/4] DataFlow: Single-inheritance activation --- shared/dataflow/codeql/dataflow/DataFlow.qll | 2 ++ .../codeql/dataflow/internal/DataFlowImpl.qll | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 93327f5ad6a3..6925e9755d70 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -689,6 +689,8 @@ module DataFlowMake Lang> { predicate flowToExpr(DataFlowExpr sink); } + class DiffInformedQuery = DiffInformedQueryImpl; + /** * Constructs a global data flow computation. */ diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a13c71f554cc..bbe9de2f9a34 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -169,6 +169,19 @@ module MakeImpl Lang> { } } + /** + * This private type is used instead of `Unit` to ensure that users of this + * library don't take an accidental dependency on convertability to `Unit`, + * allowing us to change the implementation in the future. + */ + private newtype TDiffInformedBase = MkDiffInformedBase() + + abstract class DiffInformedQueryImpl extends TDiffInformedBase { + DiffInformedQueryImpl() { any() } + + string toString() { result = "DataFlow::DiffInformedQuery" } + } + /** * Constructs a data flow computation given a full input configuration, and * an initial stage 1 pruning. From f8922f10237144bb5aa393897f45f68729f211c0 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 20 May 2025 08:11:54 +0200 Subject: [PATCH 2/4] Java: XSS with abstract class (some duplication) I haven't figured out how to avoid the redundancy between `getASelected{Source,Sink}Location` in the module and the class. Maybe we need a strong notion of primary and secondary data-flow configurations. --- java/ql/lib/semmle/code/java/security/XSS.qll | 20 +++++++++++++++++++ .../semmle/code/java/security/XssQuery.qll | 2 +- java/ql/src/Security/CWE/CWE-079/XSS.ql | 10 ++++++++++ .../codeql/dataflow/internal/DataFlowImpl.qll | 18 +++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/java/ql/lib/semmle/code/java/security/XSS.qll b/java/ql/lib/semmle/code/java/security/XSS.qll index 9aafcd15f683..69b9dddca9ae 100644 --- a/java/ql/lib/semmle/code/java/security/XSS.qll +++ b/java/ql/lib/semmle/code/java/security/XSS.qll @@ -71,6 +71,26 @@ private module XssVulnerableWriterSourceToWritingMethodFlowConfig implements Dat sink.asExpr() = ma.getQualifier() and ma.getMethod() instanceof WritingMethod ) } + + predicate observeDiffInformedIncrementalMode() { + // Since this configuration is for finding sinks to be used in a main + // data-flow configuration, this configuration should only restrict the + // sinks to be found if there are no main-configuration sources in the + // diff range. That's because if there is such a source, we need to + // report query results for it even with sinks outside the diff range. + exists(DataFlow::DiffInformedQuery q | not q.hasSourceInDiffRange()) + } + + // The sources are not exposed outside this file module, so we know the + // query will not select them. + Location getASelectedSourceLocation(DataFlow::Node source) { none() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + // This code mirrors `DefaultXssSink()`. + exists(MethodCall ma | result = ma.getAnArgument().getLocation() | + sink.asExpr() = ma.getQualifier() and ma.getMethod() instanceof WritingMethod + ) + } } private module XssVulnerableWriterSourceToWritingMethodFlow = diff --git a/java/ql/lib/semmle/code/java/security/XssQuery.qll b/java/ql/lib/semmle/code/java/security/XssQuery.qll index c0d7035a4f9a..e830f5a3e49d 100644 --- a/java/ql/lib/semmle/code/java/security/XssQuery.qll +++ b/java/ql/lib/semmle/code/java/security/XssQuery.qll @@ -21,7 +21,7 @@ module XssConfig implements DataFlow::ConfigSig { any(XssAdditionalTaintStep s).step(node1, node2) } - predicate observeDiffInformedIncrementalMode() { any() } + predicate observeDiffInformedIncrementalMode() { exists(DataFlow::DiffInformedQuery q) } } /** Tracks flow from remote sources to cross site scripting vulnerabilities. */ diff --git a/java/ql/src/Security/CWE/CWE-079/XSS.ql b/java/ql/src/Security/CWE/CWE-079/XSS.ql index 9ae92a7e362e..9e714738b150 100644 --- a/java/ql/src/Security/CWE/CWE-079/XSS.ql +++ b/java/ql/src/Security/CWE/CWE-079/XSS.ql @@ -15,6 +15,16 @@ import java import semmle.code.java.security.XssQuery import XssFlow::PathGraph +class IsDiffInformed extends DataFlow::DiffInformedQuery { + // This predicate is overridden to be more precise than the default + // implementation in order to support secondary secondary data-flow + // configurations that find sinks. + override Location getASelectedSourceLocation(DataFlow::Node source) { + XssConfig::isSource(source) and + result = source.getLocation() + } +} + from XssFlow::PathNode source, XssFlow::PathNode sink where XssFlow::flowPath(source, sink) select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to a $@.", diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index bbe9de2f9a34..453f26a8db03 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -18,6 +18,9 @@ module MakeImpl Lang> { private import DataFlowImplCommon::MakeImplCommon private import DataFlowImplCommonPublic private import Aliases + private import codeql.util.AlertFiltering + + private module AlertFiltering = AlertFilteringImpl; /** * An input configuration for data flow using flow state. This signature equals @@ -180,6 +183,21 @@ module MakeImpl Lang> { DiffInformedQueryImpl() { any() } string toString() { result = "DataFlow::DiffInformedQuery" } + + // TODO: how to wire this up? It overlaps with the data-flow configuration signature! + Location getASelectedSourceLocation(Node source) { result = source.getLocation() } + + Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } + + pragma[nomagic] + predicate hasSourceInDiffRange() { + AlertFiltering::filterByLocation(this.getASelectedSourceLocation(_)) + } + + pragma[nomagic] + predicate hasSinkInDiffRange() { + AlertFiltering::filterByLocation(this.getASelectedSinkLocation(_)) + } } /** From 7600a73f827cd453296dd61d0a90e15bde3016e7 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 26 May 2025 11:35:00 +0200 Subject: [PATCH 3/4] DataFlow: primary and secondary configurations For now I've only implemented what XSS.qll needs --- java/ql/lib/semmle/code/java/security/XSS.qll | 27 +++--- .../semmle/code/java/security/XssQuery.qll | 4 +- shared/dataflow/codeql/dataflow/DataFlow.qll | 29 +++++++ .../codeql/dataflow/TaintTracking.qll | 55 ++++++++++++ .../codeql/dataflow/internal/DataFlowImpl.qll | 85 ++++++++++++++++++- 5 files changed, 180 insertions(+), 20 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/XSS.qll b/java/ql/lib/semmle/code/java/security/XSS.qll index 69b9dddca9ae..24b2e8e1e35a 100644 --- a/java/ql/lib/semmle/code/java/security/XSS.qll +++ b/java/ql/lib/semmle/code/java/security/XSS.qll @@ -71,30 +71,25 @@ private module XssVulnerableWriterSourceToWritingMethodFlowConfig implements Dat sink.asExpr() = ma.getQualifier() and ma.getMethod() instanceof WritingMethod ) } +} - predicate observeDiffInformedIncrementalMode() { - // Since this configuration is for finding sinks to be used in a main - // data-flow configuration, this configuration should only restrict the - // sinks to be found if there are no main-configuration sources in the - // diff range. That's because if there is such a source, we need to - // report query results for it even with sinks outside the diff range. - exists(DataFlow::DiffInformedQuery q | not q.hasSourceInDiffRange()) - } - - // The sources are not exposed outside this file module, so we know the - // query will not select them. - Location getASelectedSourceLocation(DataFlow::Node source) { none() } - - Location getASelectedSinkLocation(DataFlow::Node sink) { +private module XssVulnerableWriterSourceToWritingMethodFlowSecondaryConfig implements + DataFlow::SecondaryConfig +{ + DataFlow::Node getPrimaryOfSecondaryNode( + DataFlow::IsSourceOrSink sourceOrSink, DataFlow::Node sink + ) { + sourceOrSink instanceof DataFlow::IsSink and // This code mirrors `DefaultXssSink()`. - exists(MethodCall ma | result = ma.getAnArgument().getLocation() | + exists(MethodCall ma | result.asExpr() = ma.getAnArgument() | sink.asExpr() = ma.getQualifier() and ma.getMethod() instanceof WritingMethod ) } } private module XssVulnerableWriterSourceToWritingMethodFlow = - TaintTracking::Global; + TaintTracking::FindSinks; /** A method that can be used to output data to an output stream or writer. */ private class WritingMethod extends Method { diff --git a/java/ql/lib/semmle/code/java/security/XssQuery.qll b/java/ql/lib/semmle/code/java/security/XssQuery.qll index e830f5a3e49d..3a47b35e965e 100644 --- a/java/ql/lib/semmle/code/java/security/XssQuery.qll +++ b/java/ql/lib/semmle/code/java/security/XssQuery.qll @@ -20,9 +20,7 @@ module XssConfig implements DataFlow::ConfigSig { predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { any(XssAdditionalTaintStep s).step(node1, node2) } - - predicate observeDiffInformedIncrementalMode() { exists(DataFlow::DiffInformedQuery q) } } /** Tracks flow from remote sources to cross site scripting vulnerabilities. */ -module XssFlow = TaintTracking::Global; +module XssFlow = TaintTracking::Primary; diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 6925e9755d70..964a58a490e9 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -691,6 +691,8 @@ module DataFlowMake Lang> { class DiffInformedQuery = DiffInformedQueryImpl; + import SecondaryConfigHelpers + /** * Constructs a global data flow computation. */ @@ -744,6 +746,33 @@ module DataFlowMake Lang> { import Flow } + module Primary implements GlobalFlowSig { + private module Config0 implements FullStateConfigSig { + import DefaultState + import Config + + predicate accessPathLimit = Config::accessPathLimit/0; + + predicate isAdditionalFlowStep(Node node1, Node node2, string model) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + } + + private module C implements FullStateConfigSig { + import MakePrimaryDiffInformed + + predicate accessPathLimit = Config0::accessPathLimit/0; + } + + private module Stage1 = ImplStage1; + + import Stage1::PartialFlow + + private module Flow = Impl; + + import Flow + } + signature class PathNodeSig { /** Gets a textual representation of this element. */ string toString(); diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index 24aea44320e0..07079bb9d8c8 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -143,6 +143,61 @@ module TaintFlowMake< import Flow } + /** + * Constructs a global taint tracking computation. + */ + module Primary implements DataFlow::GlobalFlowSig { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::DefaultState + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::MakePrimaryDiffInformed> + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::Impl; + + import Flow + } + + module FindSinks implements + DataFlow::GlobalFlowSig + { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::DefaultState + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::MakeSinkFinder, SC> + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::Impl; + + import Flow + } + signature int speculationLimitSig(); private module AddSpeculativeTaintSteps< diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 453f26a8db03..db9bc37c49fe 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -184,7 +184,6 @@ module MakeImpl Lang> { string toString() { result = "DataFlow::DiffInformedQuery" } - // TODO: how to wire this up? It overlaps with the data-flow configuration signature! Location getASelectedSourceLocation(Node source) { result = source.getLocation() } Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } @@ -200,6 +199,90 @@ module MakeImpl Lang> { } } + module MakePrimaryDiffInformed implements FullStateConfigSig { + // Workaround for name clash + predicate accessPathLimit = Config::accessPathLimit/0; + + import Config + + predicate observeDiffInformedIncrementalMode() { + // Add to existing configuration to support composition of config transformers + Config::observeDiffInformedIncrementalMode() + or + exists(DiffInformedQueryImpl q) + } + + Location getASelectedSourceLocation(Node source) { + result = Config::getASelectedSourceLocation(source) + or + exists(DiffInformedQueryImpl q | result = q.getASelectedSourceLocation(source)) + } + + Location getASelectedSinkLocation(Node sink) { + result = Config::getASelectedSinkLocation(sink) + or + exists(DiffInformedQueryImpl q | result = q.getASelectedSinkLocation(sink)) + } + } + + module SecondaryConfigHelpers { + newtype IsSourceOrSink = + IsSource() or + IsSink() + + signature module SecondaryConfig { + /** + * Gets the source/sink node from the primary configuration that is + * informed by a given source/sink node from the secondary configuration. + * Whether the secondary node is a source or a sink is determined by + * `sourceOrSink`. + */ + bindingset[sourceOrSink, secondaryNode] + Node getPrimaryOfSecondaryNode(IsSourceOrSink sourceOrSink, Node secondaryNode); + } + } + + module MakeSinkFinder implements FullStateConfigSig + { + // Workaround for name clash + predicate accessPathLimit = Config::accessPathLimit/0; + + import Config + + predicate observeDiffInformedIncrementalMode() { + // Add to existing configuration to support composition of config transformers + Config::observeDiffInformedIncrementalMode() + or + // Because this configuration is for finding sinks to be used in a main + // data-flow configuration, this configuration should only restrict the + // sinks to be found if there are no main-configuration sources in the + // diff range. That's because if there is such a source, we need to + // report query results for it even with sinks outside the diff range. + // + // The `MakeSinkFinder` and `MakeSourceFinder` modules are separate + // because each can only call one of `hasSourceInDiffRange` or + // `hasSinkInDiffRange`. Otherwise it would look like a non-monotonic + // recursion. + exists(DiffInformedQuery q | not q.hasSourceInDiffRange()) + } + + Location getASelectedSourceLocation(Node source) { + result = Config::getASelectedSourceLocation(source) + or + exists(DiffInformedQueryImpl q, IsSource s | + result = q.getASelectedSourceLocation(SC::getPrimaryOfSecondaryNode(s, source)) + ) + } + + Location getASelectedSinkLocation(Node sink) { + result = Config::getASelectedSinkLocation(sink) + or + exists(DiffInformedQueryImpl q, IsSink s | + result = q.getASelectedSinkLocation(SC::getPrimaryOfSecondaryNode(s, sink)) + ) + } + } + /** * Constructs a data flow computation given a full input configuration, and * an initial stage 1 pruning. From be3bd4a6385c0c05ec97e12b2e939735060e6695 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 26 May 2025 16:11:02 +0200 Subject: [PATCH 4/4] Java: avoid some duplication in XSS.qll --- java/ql/lib/semmle/code/java/security/XSS.qll | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/XSS.qll b/java/ql/lib/semmle/code/java/security/XSS.qll index 24b2e8e1e35a..9148f0b9d895 100644 --- a/java/ql/lib/semmle/code/java/security/XSS.qll +++ b/java/ql/lib/semmle/code/java/security/XSS.qll @@ -44,10 +44,10 @@ private class DefaultXssSink extends XssSink { DefaultXssSink() { sinkNode(this, ["html-injection", "js-injection"]) or - exists(MethodCall ma | - ma.getMethod() instanceof WritingMethod and - XssVulnerableWriterSourceToWritingMethodFlow::flowToExpr(ma.getQualifier()) and - this.asExpr() = ma.getArgument(_) + exists(DataFlow::Node n | + XssVulnerableWriterSourceToWritingMethodFlow::flowTo(n) and + XssVulnerableWriterSourceToWritingMethodFlowSecondaryConfig::getPrimaryOfSecondaryNode(_, n) = + this ) } } @@ -80,9 +80,10 @@ private module XssVulnerableWriterSourceToWritingMethodFlowSecondaryConfig imple DataFlow::IsSourceOrSink sourceOrSink, DataFlow::Node sink ) { sourceOrSink instanceof DataFlow::IsSink and - // This code mirrors `DefaultXssSink()`. - exists(MethodCall ma | result.asExpr() = ma.getAnArgument() | - sink.asExpr() = ma.getQualifier() and ma.getMethod() instanceof WritingMethod + exists(MethodCall ma | + XssVulnerableWriterSourceToWritingMethodFlowConfig::isSink(sink) and + sink.asExpr() = ma.getQualifier() and + result.asExpr() = ma.getAnArgument() ) } }