diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 784d248d8dce4..589d75c3635d4 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -760,7 +760,7 @@ ContentApprox getContentApprox(Content c) { /** * Holds if the the content `c` is a container. */ -predicate containerContent(Content c) { +predicate containerContent(ContentSet c) { c instanceof ArrayContent or c instanceof CollectionContent or c instanceof MapKeyContent or diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ab5de0d019792..6cf36ea88cfde 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -1,635 +1,292 @@ /** - * Provides classes and predicates related to capturing summary, source, - * and sink models of the Standard or a 3rd party library. + * Provides predicates related to capturing summary models of the Standard or a 3rd party library. */ -private import CaptureModelsSpecific -private import CaptureModelsPrinting +private import java as J +private import semmle.code.java.dataflow.DataFlow +private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow +private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow +private import semmle.code.java.dataflow.internal.DataFlowDispatch +private import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import semmle.code.java.dataflow.internal.DataFlowImplSpecific +private import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil +private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import semmle.code.java.dataflow.internal.ModelExclusions +private import semmle.code.java.dataflow.internal.TaintTrackingImplSpecific +private import semmle.code.java.dataflow.SSA as Ssa +private import semmle.code.java.dataflow.TaintTracking +private import codeql.mad.modelgenerator.ModelGeneratorImpl /** - * A node from which flow can return to the caller. This is either a regular - * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + * Holds if the type `t` is a primitive type used for bulk data. */ -private class ReturnNodeExt extends DataFlow::Node { - private DataFlowImplCommon::ReturnKindExt kind; - - ReturnNodeExt() { - kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or - kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind() - } - - /** - * Gets the kind of the return node. - */ - DataFlowImplCommon::ReturnKindExt getKind() { result = kind } -} - -bindingset[c] -private signature string printCallableParamSig(Callable c, ParameterPosition p); - -private module PrintReturnNodeExt { - string getOutput(ReturnNodeExt node) { - node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and - result = "ReturnValue" - or - exists(ParameterPosition pos | - pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and - result = printCallableParam(returnNodeEnclosingCallable(node), pos) - ) - } -} - -string getOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} - -string getContentOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} - -class DataFlowSummaryTargetApi extends SummaryTargetApi { - DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } -} - -class DataFlowSourceTargetApi = SourceTargetApi; - -class DataFlowSinkTargetApi = SinkTargetApi; - -private module ModelPrintingInput implements ModelPrintingSig { - class SummaryApi = DataFlowSummaryTargetApi; - - class SourceOrSinkApi = SourceOrSinkTargetApi; - - string getProvenance() { result = "df-generated" } -} - -module Printing = ModelPrinting; - -/** - * Holds if `c` is a relevant content kind, where the underlying type is relevant. - */ -private predicate isRelevantTypeInContent(DataFlow::ContentSet c) { - isRelevantType(getUnderlyingContentType(c)) -} - -/** - * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. - */ -private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(DataFlow::ContentSet f | - DataFlowPrivate::readStep(node1, f, node2) and - // Partially restrict the content types used for intermediate steps. - (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) - ) - or - exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f)) -} - -/** - * Holds if content `c` is either a field, a synthetic field or language specific - * content of a relevant type or a container like content. - */ -pragma[nomagic] -private predicate isRelevantContent0(DataFlow::ContentSet c) { - isRelevantTypeInContent(c) or - containerContent(c) -} - -/** - * Gets the MaD string representation of the parameter node `p`. - */ -string parameterNodeAsInput(DataFlow::ParameterNode p) { - result = parameterAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode -} - -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterNodeAsContentInput(DataFlow::ParameterNode p) { - result = parameterContentAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode +predicate isPrimitiveTypeUsedForBulkData(J::Type t) { + t.hasName(["byte", "char", "Byte", "Character"]) } -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) } - -/** - * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). - */ -string captureQualifierFlow(DataFlowSummaryTargetApi api) { - exists(ReturnNodeExt ret | - api = returnNodeEnclosingCallable(ret) and - isOwnInstanceAccessNode(ret) - ) and - result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") -} +module ModelGeneratorInput implements ModelGeneratorInputSig { + class Type = J::Type; -private int accessPathLimit0() { result = 2 } + class Parameter = J::Parameter; -private newtype TTaintState = - TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or - TTaintStore(int n) { n in [1 .. accessPathLimit0()] } - -abstract private class TaintState extends TTaintState { - abstract string toString(); -} - -/** - * A FlowState representing a tainted read. - */ -private class TaintRead extends TaintState, TTaintRead { - private int step; + class Callable = J::Callable; - TaintRead() { this = TTaintRead(step) } + class NodeExtended = DataFlow::Node; - /** - * Gets the flow state step number. - */ - int getStep() { result = step } + class ParameterNodeExtended = DataFlow::ParameterNode; - override string toString() { result = "TaintRead(" + step + ")" } -} + private predicate isInfrequentlyUsed(J::CompilationUnit cu) { + cu.getPackage().getName().matches("javax.swing%") or + cu.getPackage().getName().matches("java.awt%") + } -/** - * A FlowState representing a tainted write. - */ -private class TaintStore extends TaintState, TTaintStore { - private int step; + private predicate relevant(Callable api) { + api.isPublic() and + api.getDeclaringType().isPublic() and + api.fromSource() and + not isUninterestingForModels(api) and + not isInfrequentlyUsed(api.getCompilationUnit()) + } - TaintStore() { this = TTaintStore(step) } + private J::Method getARelevantOverride(J::Method m) { + result = m.getAnOverride() and + relevant(result) and + // Other exclusions for overrides. + not m instanceof J::ToStringMethod + } /** - * Gets the flow state step number. + * Gets the super implementation of `m` if it is relevant. + * If such a super implementations does not exist, returns `m` if it is relevant. */ - int getStep() { result = step } - - override string toString() { result = "TaintStore(" + step + ")" } -} - -/** - * A data-flow configuration for tracking flow through APIs. - * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. - * - * This can be used to generate Flow summaries for APIs from parameter to return. - */ -module PropagateFlowConfig implements DataFlow::StateConfigSig { - class FlowState = TaintState; - - predicate isSource(DataFlow::Node source, FlowState state) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and - state.(TaintRead).getStep() = 0 + private J::Callable liftedImpl(J::Callable m) { + ( + result = getARelevantOverride(m) + or + result = m and relevant(m) + ) and + not exists(getARelevantOverride(result)) } - predicate isSink(DataFlow::Node sink, FlowState state) { - sink instanceof ReturnNodeExt and - not isOwnInstanceAccessNode(sink) and - not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and - (state instanceof TaintRead or state instanceof TaintStore) + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() } - predicate isAdditionalFlowStep( - DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 - ) { - exists(DataFlow::ContentSet c | - DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and - isRelevantContent0(c) and - ( - state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 - or - state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() - ) - ) - or - exists(DataFlow::ContentSet c | - DataFlowPrivate::readStep(node1, c, node2) and - isRelevantContent0(c) and - state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() - ) + private predicate hasManualSourceModel(Callable api) { + api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable() } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + private predicate hasManualSinkModel(Callable api) { + api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable() } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + predicate isUninterestingForDataFlowModels(Callable api) { + api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) } -} - -module PropagateFlow = TaintTracking::GlobalWithState; - -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow0( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt -) { - exists(string input, string output | - p.getEnclosingCallable() = api and - returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and - input = parameterNodeAsInput(p) and - output = getOutput(returnNodeExt) and - input != output and - result = Printing::asLiftedTaintModel(api, input, output) - ) -} -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow(DataFlowSummaryTargetApi api) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - PropagateFlow::flow(p, returnNodeExt) and - result = captureThroughFlow0(api, p, returnNodeExt) - ) -} - -private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi + class SourceOrSinkTargetApi extends Callable { + SourceOrSinkTargetApi() { relevant(this) } } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi + class SinkTargetApi extends SourceOrSinkTargetApi { + SinkTargetApi() { not hasManualSinkModel(this) } } - predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; - - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + class SourceTargetApi extends SourceOrSinkTargetApi { + SourceTargetApi() { not hasManualSourceModel(this) } } - int accessPathLimit() { result = 2 } - - predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } + class SummaryTargetApi extends Callable { + private Callable lift; - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext - } -} + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } -private module PropagateContentFlow = ContentDataFlow::Global; + Callable lift() { result = lift } -private string getContent(PropagateContentFlow::AccessPath ap, int i) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - i = 0 and - result = "." + printContent(head) - or - i > 0 and result = getContent(tail, i - 1) - ) -} - -/** - * Gets the MaD string representation of a store step access path. - */ -private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i) -} - -/** - * Gets the MaD string representation of a read step access path. - */ -private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i desc) -} - -/** - * Holds if the access path `ap` contains a field or synthetic field access. - */ -private predicate mentionsField(PropagateContentFlow::AccessPath ap) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - mentionsField(tail) or isField(head) - ) -} - -private predicate apiFlow( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} - -/** - * A class of APIs relevant for modeling using content flow. - * The following heuristic is applied: - * Content flow is only relevant for an API, if - * #content flow <= 2 * #parameters + 3 - * If an API produces more content flow, it is likely that - * 1. Types are not sufficiently constrained leading to a combinatorial - * explosion in dispatch and thus in the generated summaries. - * 2. It is a reasonable approximation to use the non-content based flow - * detection instead, as reads and stores would use a significant - * part of an objects internal state. - */ -private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { - ContentDataFlowSummaryTargetApi() { - count(string input, string output | - exists( - DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores - | - apiFlow(this, p, reads, returnNodeExt, stores, _) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) - ) - ) <= 2 * this.getNumberOfParameters() + 3 + predicate isRelevant() { relevant(this) } } -} - -pragma[nomagic] -private predicate apiContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} -/** - * Holds if any of the content sets in `path` translates into a synthetic field. - */ -private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { - exists(PropagateContentFlow::AccessPath tail, ContentSet head | - head = path.getHead() and - tail = path.getTail() - | - exists(getSyntheticName(head)) or - hasSyntheticContent(tail) - ) -} + private string isExtensible(Callable c) { + if c.getDeclaringType().isFinal() then result = "false" else result = "true" + } -/** - * A module containing predicates for validating access paths containing content sets - * that translates into synthetic fields, when used for generated summary models. - */ -private module AccessPathSyntheticValidation { /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`). + * Holds if the callable `c` is in package `package` + * and is a member of `type`. */ - private predicate step( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - p.getType() = t1 and - returnNodeExt.getType() = t2 and - apiContentFlow(_, p, read, returnNodeExt, store, _) + private predicate qualifiedName(Callable c, string package, string type) { + exists(RefType t | t = c.getDeclaringType() | + package = t.getCompilationUnit().getPackage().getName() and + type = t.getErasure().(J::RefType).getNestedName() ) } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. - * - * Step A -> Synth. - */ - private predicate synthPathEntry( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - not hasSyntheticContent(read) and - hasSyntheticContent(store) and - step(t1, read, t2, store) + predicate isRelevantType(Type t) { + not t instanceof J::TypeClass and + not t instanceof J::EnumType and + not t instanceof J::PrimitiveType and + not t instanceof J::BoxedType and + not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and + not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and + ( + not t.(J::Array).getElementType() instanceof J::PrimitiveType or + isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) + ) and + ( + not t.(J::Array).getElementType() instanceof J::BoxedType or + isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) + ) and + ( + not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or + isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType()) + ) } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` has synthetic content - * and `store` does not. - * - * Step Synth -> A. - */ - private predicate synthPathExit( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - not hasSyntheticContent(store) and - step(t1, read, t2, store) + Type getUnderlyingContentType(DataFlow::ContentSet c) { + result = c.(DataFlow::FieldContent).getField().getType() or + result = c.(DataFlow::SyntheticFieldContent).getField().getType() } - /** - * Holds if there exists a path of steps from `read` to an exit. - * - * read ->* Synth -> A - */ - private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { - synthPathExit(t, read, _, _) - or - hasSyntheticContent(read) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(t, read, midType, mid) and - reachesSynthExit(midType, mid.reverse()) - ) + string qualifierString() { result = "Argument[this]" } + + string parameterAccess(J::Parameter p) { + if + p.getType() instanceof J::Array and + not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType()) + then result = "Argument[" + p.getPosition() + "].ArrayElement" + else + if p.getType() instanceof ContainerFlow::ContainerType + then result = "Argument[" + p.getPosition() + "].Element" + else result = "Argument[" + p.getPosition() + "]" } - /** - * Holds if there exists a path of steps from an entry to `store`. - * - * A -> Synth ->* store - */ - private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { - synthPathEntry(_, _, t, store) - or - hasSyntheticContent(store) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(midType, mid, t, store) and - synthEntryReaches(midType, mid.reverse()) - ) - } + string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" } - /** - * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) - * contain content that will be translated into a synthetic field, when being used in - * a MaD summary model, and if there is a range of APIs, such that - * when chaining their flow access paths, there exists access paths `A` and `B` where - * A ->* read -> store ->* B and where `A` and `B` do not contain content that will - * be translated into a synthetic field. - * - * This is needed because we don't want to include summaries that reads from or - * stores into a "dead" synthetic field. - * - * Example: - * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and - * `setX`, which gets and sets a private field `X` on `t`. - * This would lead to the following content flows - * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. - * setX : Argument[0] -> Argument[this].SyntheticField[t.X] - * As the reads and stores are on synthetic fields we should only make summaries - * if both of these methods exist. - */ - pragma[nomagic] - predicate acceptReadStore( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) + class InstanceParameterNode = DataFlow::InstanceParameterNode; + + bindingset[c] + string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) { + result = parameterAccess(c.getParameter(pos)) or - exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | - synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) - or - synthEntryReaches(t1, store0) and - step(t1, read, t2, store) and - reachesSynthExit(t2, store.reverse()) - ) + result = qualifierString() and pos = -1 } -} -/** - * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. - * Flow is considered relevant, - * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. - * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if - * the synthetic content is "live" on the relevant declaring type. - */ -private predicate apiRelevantContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath store, boolean preservesValue -) { - apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and - ( - not hasSyntheticContent(read) and not hasSyntheticContent(store) + bindingset[c] + string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { + result = parameterContentAccess(c.getParameter(pos)) or - AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store) - ) -} + result = qualifierString() and pos = -1 + } -pragma[nomagic] -private predicate captureContentFlow0( - ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, - boolean lift -) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads, - PropagateContentFlow::AccessPath stores - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) - ) -} + Callable returnNodeEnclosingCallable(DataFlow::Node ret) { + result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable() + } -/** - * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to - * the return value or a parameter). - * - * Models are lifted to the best type in case the read and store access paths do not - * contain a field or synthetic field access. - */ -string captureContentFlow(ContentDataFlowSummaryTargetApi api) { - exists(string input, string output, boolean lift, boolean preservesValue | - captureContentFlow0(api, input, output, _, lift) and - preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and - result = Printing::asModel(api, input, output, preservesValue, lift) - ) -} + predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { + node.asExpr().(J::ThisAccess).isOwnInstanceAccess() + } -/** - * A dataflow configuration used for finding new sources. - * The sources are the already known existing sources and the sinks are the API return nodes. - * - * This can be used to generate Source summaries for an API, if the API expose an already known source - * via its return (then the API itself becomes a source). - */ -module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - ExternalFlow::sourceNode(source, kind) + predicate sinkModelSanitizer(DataFlow::Node node) { + // exclude variable capture jump steps + exists(Ssa::SsaImplicitInit closure | + closure.captures(_) and + node.asExpr() = closure.getAFirstUse() ) } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi + predicate apiSource(DataFlow::Node source) { + ( + source.asExpr().(J::FieldAccess).isOwnFieldAccess() or + source instanceof DataFlow::ParameterNode + ) and + exists(J::RefType t | + t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and + not t instanceof J::TypeObject and + t.isPublic() + ) } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } + predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + string getInputArgument(DataFlow::Node source) { + exists(int pos | + source.(DataFlow::ParameterNode).isParameterOf(_, pos) and + if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() + ) + or + source.asExpr() instanceof J::FieldAccess and + result = qualifierString() } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + bindingset[kind] + predicate isRelevantSinkKind(string kind) { + not kind = "log-injection" and + not kind.matches("regex-use%") and + not kind = "file-content-store" } -} -private module PropagateFromSource = TaintTracking::Global; + bindingset[kind] + predicate isRelevantSourceKind(string kind) { any() } -/** - * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. - */ -string captureSource(DataFlowSourceTargetApi api) { - exists(DataFlow::Node source, ReturnNodeExt sink, string kind | - PropagateFromSource::flow(source, sink) and - ExternalFlow::sourceNode(source, kind) and - api = sink.getEnclosingCallable() and - not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and - result = Printing::asSourceModel(api, getOutput(sink), kind) - ) -} + predicate containerContent = DataFlowPrivate::containerContent/1; -/** - * A dataflow configuration used for finding new sinks. - * The sources are the parameters of the API and the fields of the enclosing type. - * - * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) - * into an existing known sink (then the API itself becomes a sink). - */ -module PropagateToSinkConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi + predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and + not exists(DataFlow::Content f | + DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) + ) } - predicate isSink(DataFlow::Node sink) { - exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind)) + predicate isField(DataFlow::ContentSet c) { + c instanceof DataFlowUtil::FieldContent or + c instanceof DataFlowUtil::SyntheticFieldContent } - predicate isBarrier(DataFlow::Node node) { - exists(Type t | t = node.getType() and not isRelevantType(t)) + string getSyntheticName(DataFlow::ContentSet c) { + exists(Field f | + not f.isPublic() and + f = c.(DataFlowUtil::FieldContent).getField() and + result = f.getQualifiedName() + ) or - sinkModelSanitizer(node) + result = c.(DataFlowUtil::SyntheticFieldContent).getField() } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext } + string printContent(DataFlow::ContentSet c) { + exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() | + result = "Field[" + f.getQualifiedName() + "]" + ) + or + result = "SyntheticField[" + getSyntheticName(c) + "]" + or + c instanceof DataFlowUtil::CollectionContent and result = "Element" + or + c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement" + or + c instanceof DataFlowUtil::MapValueContent and result = "MapValue" + or + c instanceof DataFlowUtil::MapKeyContent and result = "MapKey" + } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + predicate partialModel( + Callable api, string package, string type, string extensible, string name, string parameters + ) { + qualifiedName(api, package, type) and + extensible = isExtensible(api) and + name = api.getName() and + parameters = ExternalFlow::paramsString(api) } -} -private module PropagateToSink = TaintTracking::Global; + predicate sourceNode = ExternalFlow::sourceNode/2; -/** - * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. - */ -string captureSink(DataFlowSinkTargetApi api) { - exists(DataFlow::Node src, DataFlow::Node sink, string kind | - PropagateToSink::flow(src, sink) and - ExternalFlow::sinkNode(sink, kind) and - api = src.getEnclosingCallable() and - result = Printing::asSinkModel(api, asInputArgument(src), kind) - ) + predicate sinkNode = ExternalFlow::sinkNode/2; } + +import MakeModelGenerator diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 33d176c3d893a..57dc74ccca556 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,11 +1,11 @@ private import java as J private import codeql.mad.modelgenerator.ModelPrinting -private import CaptureModelsSpecific as Specific +private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = J::Callable; - predicate partialModel = Specific::partialModel/6; + predicate partialModel = ModelGeneratorInput::partialModel/6; } import ModelPrintingImpl diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll deleted file mode 100644 index f359d59973ca9..0000000000000 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ /dev/null @@ -1,380 +0,0 @@ -/** - * Provides predicates related to capturing summary models of the Standard or a 3rd party library. - */ - -private import java as J -private import semmle.code.java.dataflow.internal.DataFlowPrivate -private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil -private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow -private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.java.dataflow.internal.ModelExclusions -private import semmle.code.java.dataflow.DataFlow as Df -private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf -private import semmle.code.java.dataflow.SSA as Ssa -private import semmle.code.java.dataflow.TaintTracking as Tt -import semmle.code.java.dataflow.ExternalFlow as ExternalFlow -import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon -import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate -import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch - -module DataFlow = Df::DataFlow; - -module ContentDataFlow = Cdf::ContentDataFlow; - -module TaintTracking = Tt::TaintTracking; - -class Type = J::Type; - -class Unit = J::Unit; - -class Callable = J::Callable; - -class ContentSet = DataFlowUtil::ContentSet; - -private predicate isInfrequentlyUsed(J::CompilationUnit cu) { - cu.getPackage().getName().matches("javax.swing%") or - cu.getPackage().getName().matches("java.awt%") -} - -private predicate relevant(Callable api) { - api.isPublic() and - api.getDeclaringType().isPublic() and - api.fromSource() and - not isUninterestingForModels(api) and - not isInfrequentlyUsed(api.getCompilationUnit()) -} - -private J::Method getARelevantOverride(J::Method m) { - result = m.getAnOverride() and - relevant(result) and - // Other exclusions for overrides. - not m instanceof J::ToStringMethod -} - -/** - * Gets the super implementation of `m` if it is relevant. - * If such a super implementations does not exist, returns `m` if it is relevant. - */ -private J::Callable liftedImpl(J::Callable m) { - ( - result = getARelevantOverride(m) - or - result = m and relevant(m) - ) and - not exists(getARelevantOverride(result)) -} - -private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() -} - -private predicate hasManualSourceModel(Callable api) { - api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable() -} - -private predicate hasManualSinkModel(Callable api) { - api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable() -} - -/** - * Holds if it is irrelevant to generate models for `api` based on data flow analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForDataFlowModels(Callable api) { - api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) -} - -/** - * A class of callables that are potentially relevant for generating source or - * sink models. - */ -class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } -} - -/** - * A class of callables that are potentially relevant for generating sink models. - */ -class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } -} - -/** - * A class of callables that are potentially relevant for generating source models. - */ -class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { not hasManualSourceModel(this) } -} - -/** - * Holds if it is irrelevant to generate models for `api` based on type-based analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForTypeBasedFlowModels(Callable api) { none() } - -/** - * A class of callables that are potentially relevant for generating summary or - * neutral models. - * - * In the Standard library and 3rd party libraries it is the callables (or callables that have a - * super implementation) that can be called from outside the library itself. - */ -class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - /** - * Gets the callable that a model will be lifted to. - */ - Callable lift() { result = lift } - - /** - * Holds if this callable is relevant in terms of generating models. - */ - predicate isRelevant() { relevant(this) } -} - -private string isExtensible(Callable c) { - if c.getDeclaringType().isFinal() then result = "false" else result = "true" -} - -/** - * Holds if the callable `c` is in package `package` - * and is a member of `type`. - */ -private predicate qualifiedName(Callable c, string package, string type) { - exists(RefType t | t = c.getDeclaringType() | - package = t.getCompilationUnit().getPackage().getName() and - type = t.getErasure().(J::RefType).getNestedName() - ) -} - -predicate partialModel( - Callable api, string package, string type, string extensible, string name, string parameters -) { - qualifiedName(api, package, type) and - extensible = isExtensible(api) and - name = api.getName() and - parameters = ExternalFlow::paramsString(api) -} - -predicate isPrimitiveTypeUsedForBulkData(J::Type t) { - t.hasName(["byte", "char", "Byte", "Character"]) -} - -/** - * Holds for type `t` for fields that are relevant as an intermediate - * read or write step in the data flow analysis. - */ -predicate isRelevantType(J::Type t) { - not t instanceof J::TypeClass and - not t instanceof J::EnumType and - not t instanceof J::PrimitiveType and - not t instanceof J::BoxedType and - not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and - not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and - ( - not t.(J::Array).getElementType() instanceof J::PrimitiveType or - isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) - ) and - ( - not t.(J::Array).getElementType() instanceof J::BoxedType or - isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) - ) and - ( - not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or - isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType()) - ) -} - -/** - * Gets the underlying type of the content `c`. - */ -J::Type getUnderlyingContentType(DataFlow::Content c) { - result = c.(DataFlow::FieldContent).getField().getType() or - result = c.(DataFlow::SyntheticFieldContent).getField().getType() -} - -/** - * Gets the MaD string representation of the qualifier. - */ -string qualifierString() { result = "Argument[this]" } - -/** - * Gets the MaD string representation of the parameter `p`. - */ -string parameterAccess(J::Parameter p) { - if - p.getType() instanceof J::Array and - not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType()) - then result = "Argument[" + p.getPosition() + "].ArrayElement" - else - if p.getType() instanceof ContainerFlow::ContainerType - then result = "Argument[" + p.getPosition() + "].Element" - else result = "Argument[" + p.getPosition() + "]" -} - -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" } - -class InstanceParameterNode = DataFlow::InstanceParameterNode; - -class ParameterPosition = DataFlowDispatch::ParameterPosition; - -/** - * Gets the MaD string representation of return through parameter at position - * `pos` of callable `c`. - */ -bindingset[c] -string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) { - result = parameterAccess(c.getParameter(pos)) - or - result = qualifierString() and pos = -1 -} - -/** - * Gets the MaD string representation of return through parameter at position - * `pos` of callable `c` for content flow. - */ -bindingset[c] -string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { - result = parameterContentAccess(c.getParameter(pos)) - or - result = qualifierString() and pos = -1 -} - -/** - * Gets the enclosing callable of `ret`. - */ -Callable returnNodeEnclosingCallable(DataFlow::Node ret) { - result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable() -} - -/** - * Holds if `node` is an own instance access. - */ -predicate isOwnInstanceAccessNode(ReturnNode node) { - node.asExpr().(J::ThisAccess).isOwnInstanceAccess() -} - -predicate sinkModelSanitizer(DataFlow::Node node) { - // exclude variable capture jump steps - exists(Ssa::SsaImplicitInit closure | - closure.captures(_) and - node.asExpr() = closure.getAFirstUse() - ) -} - -/** - * Holds if `source` is an api entrypoint relevant for creating sink models. - */ -predicate apiSource(DataFlow::Node source) { - ( - source.asExpr().(J::FieldAccess).isOwnFieldAccess() or - source instanceof DataFlow::ParameterNode - ) and - exists(J::RefType t | - t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and - not t instanceof J::TypeObject and - t.isPublic() - ) -} - -/** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. - */ -predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgumentSpecific(DataFlow::Node source) { - exists(int pos | - source.(DataFlow::ParameterNode).isParameterOf(_, pos) and - if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() - ) - or - source.asExpr() instanceof J::FieldAccess and - result = qualifierString() -} - -/** - * Holds if `kind` is a relevant sink kind for creating sink models. - */ -bindingset[kind] -predicate isRelevantSinkKind(string kind) { - not kind = "log-injection" and - not kind.matches("regex-use%") and - not kind = "file-content-store" -} - -/** - * Holds if `kind` is a relevant source kind for creating source models. - */ -bindingset[kind] -predicate isRelevantSourceKind(string kind) { any() } - -predicate containerContent = DataFlowPrivate::containerContent/1; - -/** - * Holds if there is a taint step from `node1` to `node2` in content flow. - */ -predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and - not exists(DataFlow::Content f | - DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) - ) -} - -/** - * Holds if the content set `c` is a field or a synthetic field. - */ -predicate isField(ContentSet c) { - c instanceof DataFlowUtil::FieldContent or - c instanceof DataFlowUtil::SyntheticFieldContent -} - -/** - * Gets the MaD synthetic name string representation for the content set `c`, if any. - */ -string getSyntheticName(DataFlow::ContentSet c) { - exists(Field f | - not f.isPublic() and - f = c.(DataFlowUtil::FieldContent).getField() and - result = f.getQualifiedName() - ) - or - result = c.(DataFlowUtil::SyntheticFieldContent).getField() -} - -/** - * Gets the MaD string representation of the content set `c`. - */ -string printContent(ContentSet c) { - exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() | - result = "Field[" + f.getQualifiedName() + "]" - ) - or - result = "SyntheticField[" + getSyntheticName(c) + "]" - or - c instanceof DataFlowUtil::CollectionContent and result = "Element" - or - c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement" - or - c instanceof DataFlowUtil::MapValueContent and result = "MapValue" - or - c instanceof DataFlowUtil::MapKeyContent and result = "MapKey" -} diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll b/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll index 5b1a6fc031b0d..4c98cb05ae2df 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll @@ -80,5 +80,5 @@ string captureFlow(DataFlowSummaryTargetApi api) { string captureNoFlow(DataFlowSummaryTargetApi api) { not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and api.isRelevant() and - result = Printing::asNeutralSummaryModel(api) + result = ModelPrinting::asNeutralSummaryModel(api) } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 3d56dff50726b..36aec8053196f 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -1,7 +1,8 @@ private import java private import semmle.code.java.Collections private import semmle.code.java.dataflow.internal.ContainerFlow -private import CaptureModelsSpecific as Specific +private import CaptureModels as CaptureModels +private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput private import CaptureModelsPrinting /** @@ -81,7 +82,7 @@ private predicate localTypeParameter(Callable callable, TypeVariable tv) { private string getAccessPath(Type t) { if t instanceof Array and - not Specific::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType()) + not CaptureModels::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType()) then result = ".ArrayElement" else if t instanceof ContainerType or t instanceof IterableClass @@ -134,7 +135,7 @@ private string implicit(Callable callable, TypeVariable tv) { then access = getAccessPath(decl) else access = getSyntheticField(tv) | - result = Specific::qualifierString() + access + result = ModelGeneratorInput::qualifierString() + access ) } @@ -286,7 +287,7 @@ private predicate output(Callable callable, TypeVariable tv, string output) { module ModelPrintingInput implements ModelPrintingSig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = Specific::SourceOrSinkTargetApi; + class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi; string getProvenance() { result = "tb-generated" } } @@ -297,9 +298,7 @@ private module Printing = ModelPrinting; * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { - TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) } - +class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach.