From 93f7e92560d667996f541386b961ce080221ca5b Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 12 Jun 2024 15:04:31 +0200 Subject: [PATCH 1/3] Data flow: Store/load matching in pruning stage 3 --- shared/dataflow/codeql/dataflow/DataFlow.qll | 5 +- .../codeql/dataflow/internal/DataFlowImpl.qll | 251 +++++++++++-- .../dataflow/internal/DataFlowImplCommon.qll | 345 ++++++++++++++++++ 3 files changed, 563 insertions(+), 38 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 6e4921521b1a..98a2217d64c3 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -265,6 +265,7 @@ signature module InputSig { */ predicate isUnreachableInCall(NodeRegion nr, DataFlowCall call); + /** Gets the access path limit. A maximum limit of 5 is allowed. */ default int accessPathLimit() { result = 5 } /** @@ -401,7 +402,7 @@ module Configs Lang> { */ default int fieldFlowBranchLimit() { result = 2 } - /** Gets the access path limit. */ + /** Gets the access path limit. A maximum limit of 5 is allowed. */ default int accessPathLimit() { result = Lang::accessPathLimit() } /** @@ -517,7 +518,7 @@ module Configs Lang> { */ default int fieldFlowBranchLimit() { result = 2 } - /** Gets the access path limit. */ + /** Gets the access path limit. A maximum limit of 5 is allowed. */ default int accessPathLimit() { result = Lang::accessPathLimit() } /** diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index c8b56db0b343..32ef36e65645 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -94,7 +94,7 @@ module MakeImpl Lang> { */ int fieldFlowBranchLimit(); - /** Gets the access path limit. */ + /** Gets the access path limit. A maximum limit of 5 is allowed. */ int accessPathLimit(); /** @@ -1297,7 +1297,9 @@ module MakeImpl Lang> { } private signature module StageSig { - class Ap; + class Ap { + string toString(); + } class ApNil extends Ap; @@ -1439,6 +1441,12 @@ module MakeImpl Lang> { predicate typecheckStore(Typ typ, DataFlowType contentType); default predicate enableTypeFlow() { any() } + + default predicate enableStoreReadMatching() { none() } + + default predicate storeMayReachRead(NodeEx storeSource, Content c, NodeEx readTarget) { + none() + } } module Stage implements StageSig { @@ -1463,10 +1471,31 @@ module MakeImpl Lang> { ) } + pragma[nomagic] + private predicate prevStageStoreStepCand( + NodeEx node1, ApApprox ap1, Content c, NodeEx node2, DataFlowType contentType, + DataFlowType containerType + ) { + PrevStage::storeStepCand(node1, ap1, c, node2, contentType, containerType) and + ( + not enableStoreReadMatching() or + Param::storeMayReachRead(node1, c, _) + ) + } + + pragma[nomagic] + private predicate prevStageReadStepCand(NodeEx n1, Content c, NodeEx n2) { + PrevStage::readStepCand(n1, c, n2) and + ( + not enableStoreReadMatching() or + Param::storeMayReachRead(_, c, n2) + ) + } + pragma[nomagic] private predicate compatibleContainer0(ApHeadContent apc, DataFlowType containerType) { exists(DataFlowType containerType0, Content c | - PrevStage::storeStepCand(_, _, c, _, _, containerType0) and + prevStageStoreStepCand(_, _, c, _, _, containerType0) and not isTopType(containerType0) and compatibleTypesCached(containerType0, containerType) and apc = projectToHeadContent(c) @@ -1476,7 +1505,7 @@ module MakeImpl Lang> { pragma[nomagic] private predicate topTypeContent(ApHeadContent apc) { exists(DataFlowType containerType0, Content c | - PrevStage::storeStepCand(_, _, c, _, _, containerType0) and + prevStageStoreStepCand(_, _, c, _, _, containerType0) and isTopType(containerType0) and apc = projectToHeadContent(c) ) @@ -1558,11 +1587,8 @@ module MakeImpl Lang> { ) or // read - exists(Typ t0, Ap ap0, Content c | - fwdFlowRead(t0, ap0, c, _, node, state, cc, summaryCtx) and - fwdFlowConsCand(t0, ap0, c, t, ap) and - apa = getApprox(ap) - ) + fwdFlowRead(_, _, t, ap, _, _, node, state, cc, summaryCtx) and + apa = getApprox(ap) or // flow into a callable without summary context fwdFlowInNoFlowThrough(node, apa, state, cc, t, ap) and @@ -1664,42 +1690,71 @@ module MakeImpl Lang> { fwdFlow(node1, state, cc, summaryCtx, t1, ap1, apa1) and not outBarrier(node1, state) and not inBarrier(node2, state) and - PrevStage::storeStepCand(node1, apa1, c, node2, contentType, containerType) and + prevStageStoreStepCand(node1, apa1, c, node2, contentType, containerType) and t2 = getTyp(containerType) and typecheckStore(t1, contentType) ) } + /** + * Holds if forward flow with access path `tail` and type `t1` reaches a + * store of `c` on a container of type `t2` resulting in access path + * `cons`. `storeSource` is a relevant store source. + * + * This predicate is only evaluated when `enableStoreReadMatching()` holds. + */ + pragma[nomagic] + private predicate fwdFlowConsCandStoreReadMatchingEnabled( + NodeEx storeSource, Typ t2, Ap cons, Content c, Typ t1, Ap tail + ) { + enableStoreReadMatching() and + fwdFlowStore(storeSource, t1, tail, c, t2, _, _, _, _) and + cons = apCons(c, t1, tail) + or + exists(Typ t0 | + typeStrengthen(t0, cons, t2) and + fwdFlowConsCandStoreReadMatchingEnabled(storeSource, t0, cons, c, t1, tail) + ) + } + /** * Holds if forward flow with access path `tail` and type `t1` reaches a * store of `c` on a container of type `t2` resulting in access path * `cons`. + * + * This predicate is only evaluated when `enableStoreReadMatching()` + * doesn't hold. */ pragma[nomagic] - private predicate fwdFlowConsCand(Typ t2, Ap cons, Content c, Typ t1, Ap tail) { + private predicate fwdFlowConsCandStoreReadMatchingDisabled( + Typ t2, Ap cons, Content c, Typ t1, Ap tail + ) { + not enableStoreReadMatching() and fwdFlowStore(_, t1, tail, c, t2, _, _, _, _) and cons = apCons(c, t1, tail) or exists(Typ t0 | typeStrengthen(t0, cons, t2) and - fwdFlowConsCand(t0, cons, c, t1, tail) + fwdFlowConsCandStoreReadMatchingDisabled(t0, cons, c, t1, tail) ) } pragma[nomagic] private predicate readStepCand(NodeEx node1, ApHeadContent apc, Content c, NodeEx node2) { - PrevStage::readStepCand(node1, c, node2) and + prevStageReadStepCand(node1, c, node2) and apc = projectToHeadContent(c) } bindingset[node1, apc] pragma[inline_late] - private predicate readStepCand0(NodeEx node1, ApHeadContent apc, Content c, NodeEx node2) { + private predicate readStepCandInlineLate( + NodeEx node1, ApHeadContent apc, Content c, NodeEx node2 + ) { readStepCand(node1, apc, c, node2) } pragma[nomagic] - private predicate fwdFlowRead( + private predicate fwdFlowRead0( Typ t, Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, SummaryCtx summaryCtx ) { @@ -1708,7 +1763,23 @@ module MakeImpl Lang> { not outBarrier(node1, state) and not inBarrier(node2, state) and apc = getHeadContent(ap) and - readStepCand0(node1, apc, c, node2) + readStepCandInlineLate(node1, apc, c, node2) + ) + } + + pragma[inline] + private predicate fwdFlowRead( + Typ t1, Ap ap1, Typ t2, Ap ap2, Content c, NodeEx node1, NodeEx node2, FlowState state, + Cc cc, SummaryCtx summaryCtx + ) { + fwdFlowRead0(t1, ap1, c, node1, node2, state, cc, summaryCtx) and + ( + exists(NodeEx storeSource | + fwdFlowConsCandStoreReadMatchingEnabled(storeSource, t1, ap1, c, t2, ap2) and + storeMayReachRead(storeSource, c, node2) + ) + or + fwdFlowConsCandStoreReadMatchingDisabled(t1, ap1, c, t2, ap2) ) } @@ -2150,10 +2221,7 @@ module MakeImpl Lang> { pragma[nomagic] private predicate readStepFwd(NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2) { - exists(Typ t1 | - fwdFlowRead(t1, ap1, c, n1, n2, _, _, _) and - fwdFlowConsCand(t1, ap1, c, _, ap2) - ) + fwdFlowRead(_, ap1, _, ap2, c, n1, n2, _, _, _) } pragma[nomagic] @@ -2267,8 +2335,14 @@ module MakeImpl Lang> { or // store exists(Ap ap0, Content c | - revFlowStore(ap0, c, ap, _, node, state, _, returnCtx, returnAp) and - revFlowConsCand(ap0, c, ap) + revFlowStore(ap0, c, ap, _, node, state, _, returnCtx, returnAp) + | + exists(NodeEx readTarget | + revFlowConsCandStoreReadMatchingEnabled(readTarget, ap0, c, ap) and + storeMayReachRead(node, c, readTarget) + ) + or + revFlowConsCandStoreReadMatchingDisabled(ap0, c, ap) ) or // read @@ -2329,17 +2403,40 @@ module MakeImpl Lang> { storeStepFwd(node, t, ap, c, mid, ap0) } + pragma[inline] + private predicate revFlowConsCand(NodeEx readTarget, Ap cons, Content c, Ap tail) { + exists(Ap tail0 | + revFlow(readTarget, _, _, _, tail) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, readTarget, tail0) + ) + } + /** * Holds if reverse flow with access path `tail` reaches a read of `c` * resulting in access path `cons`. + * + * This predicate is only evaluated when `enableStoreReadMatching()` holds. */ pragma[nomagic] - private predicate revFlowConsCand(Ap cons, Content c, Ap tail) { - exists(NodeEx mid, Ap tail0 | - revFlow(mid, _, _, _, tail) and - tail = pragma[only_bind_into](tail0) and - readStepFwd(_, cons, c, mid, tail0) - ) + private predicate revFlowConsCandStoreReadMatchingEnabled( + NodeEx readTarget, Ap cons, Content c, Ap tail + ) { + enableStoreReadMatching() and + revFlowConsCand(readTarget, cons, c, tail) + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + * + * This predicate is only evaluated when `enableStoreReadMatching()` + * doesn't hold. + */ + pragma[nomagic] + private predicate revFlowConsCandStoreReadMatchingDisabled(Ap cons, Content c, Ap tail) { + not enableStoreReadMatching() and + revFlowConsCand(_, cons, c, tail) } private module RevTypeFlowInput implements TypeFlowInput { @@ -2464,12 +2561,13 @@ module MakeImpl Lang> { DataFlowType containerType ) { exists(Ap ap2 | - PrevStage::storeStepCand(node1, _, c, node2, contentType, containerType) and + prevStageStoreStepCand(node1, _, c, node2, contentType, containerType) and revFlowStore(ap2, c, ap1, _, node1, _, node2, _, _) and - revFlowConsCand(ap2, c, ap1) + revFlowConsCand(_, ap2, c, ap1) ) } + pragma[nomagic] predicate readStepCand(NodeEx node1, Content c, NodeEx node2) { exists(Ap ap1, Ap ap2 | revFlow(node2, _, _, _, pragma[only_bind_into](ap2)) and @@ -2490,10 +2588,10 @@ module MakeImpl Lang> { private predicate fwdConsCand(Content c, Typ t, Ap ap) { storeStepFwd(_, t, ap, c, _, _) } - private predicate revConsCand(Content c, Typ t, Ap ap) { + private predicate revConsCand(NodeEx readTarget, Content c, Typ t, Ap ap) { exists(Ap ap2 | revFlowStore(ap2, c, ap, t, _, _, _, _, _) and - revFlowConsCand(ap2, c, ap) + revFlowConsCand(readTarget, ap2, c, ap) ) } @@ -2507,7 +2605,7 @@ module MakeImpl Lang> { } additional predicate consCand(Content c, Typ t, Ap ap) { - revConsCand(c, t, ap) and + revConsCand(_, c, t, ap) and validAp(ap) } @@ -3174,10 +3272,9 @@ module MakeImpl Lang> { ) or // read - exists(NodeEx mid, Typ t0, Ap ap0, Content c | + exists(NodeEx mid, Typ t0, Ap ap0 | pn1 = TPathNodeMid(mid, state, cc, summaryCtx, t0, ap0) and - fwdFlowRead(t0, ap0, c, mid, node, state, cc, summaryCtx) and - fwdFlowConsCand(t0, ap0, c, t, ap) and + fwdFlowRead(t0, ap0, t, ap, _, mid, node, state, cc, summaryCtx) and label = "" and isStoreStep = false ) @@ -3828,6 +3925,76 @@ module MakeImpl Lang> { bindingset[typ, contentType] predicate typecheckStore(Typ typ, DataFlowType contentType) { any() } + + predicate enableStoreReadMatching() { any() } + + private class NodeExAlias = NodeEx; + + private module StoreReadMatchingInput implements StoreReadMatchingInputSig { + class NodeEx = NodeExAlias; + + predicate nodeRange(NodeEx node, boolean fromArg) { + exists(PrevStage::Ap ap | + PrevStage::revFlowAp(node, ap) and + ( + ap = true + or + PrevStage::storeStepCand(node, ap, _, _, _, _) + or + PrevStage::readStepCand(_, _, node) + ) + | + exists(PrevStage::Cc cc | PrevStage::fwdFlow(node, _, cc, _, _, ap, _) | + PrevStage::instanceofCcCall(cc) and + fromArg = true + or + PrevStage::instanceofCcNoCall(cc) and + fromArg = false + ) + ) + } + + predicate localValueStep(NodeEx node1, NodeEx node2) { + exists(FlowState state, PrevStage::ApOption returnAp | + PrevStage::revFlow(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](returnAp), true) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, + pragma[only_bind_into](returnAp), true) and + Stage2Param::localStep(node1, state, node2, state, true, _, _, _) + ) + } + + predicate jumpValueStep = jumpStepEx/2; + + pragma[nomagic] + private predicate flowThroughOutOfCall(RetNodeEx ret, NodeEx out) { + exists(DataFlowCall call, CcCall ccc, ReturnKindExt kind | + PrevStage::callEdgeReturn(call, _, ret, kind, out, true, true) and + PrevStage::callMayFlowThroughRev(call) and + PrevStage::returnMayFlowThrough(ret, _, true, kind) and + matchesCall(ccc, call) + ) + } + + predicate callEdgeArgParam(NodeEx arg, NodeEx param) { + PrevStage::callEdgeArgParam(_, _, arg, param, true, true) + } + + predicate callEdgeReturn(NodeEx ret, NodeEx out, boolean mayFlowThrough) { + PrevStage::callEdgeReturn(_, _, ret, _, out, true, true) and + if flowThroughOutOfCall(ret, out) then mayFlowThrough = true else mayFlowThrough = false + } + + predicate readContentStep = PrevStage::readStepCand/3; + + predicate storeContentStep(NodeEx node1, Content c, NodeEx node2) { + PrevStage::storeStepCand(node1, _, c, node2, _, _) + } + + predicate accessPathConfigLimit = Config::accessPathLimit/0; + } + + predicate storeMayReachRead = StoreReadMatching::storeMayReachRead/3; } private module Stage3 = MkStage::Stage; @@ -3952,6 +4119,18 @@ module MakeImpl Lang> { // might have a different type here compared to inside the getter. compatibleTypesFilter(typ, contentType) } + + predicate enableStoreReadMatching() { any() } + + pragma[nomagic] + predicate storeMayReachRead(NodeEx storeSource, Content c, NodeEx readTarget) { + // We do not use the expensive `StoreReadMatching` library; instead, reuse the + // expensive invocation from the input to stage 3 + PrevStage::revFlow(storeSource) and + PrevStage::revFlow(readTarget) and + PrevStage::storeStepCand(_, _, c, _, _, _) and + Stage3Param::storeMayReachRead(storeSource, c, readTarget) + } } private module Stage4 = MkStage::Stage; diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 8a82c7b570c5..47e542087777 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -2333,4 +2333,349 @@ module MakeImplCommon Lang> { this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString())) } } + + /** Provides the input to `StoreReadMatching`. */ + signature module StoreReadMatchingInputSig { + class NodeEx { + string toString(); + } + + predicate nodeRange(NodeEx node, boolean fromArg); + + predicate localValueStep(NodeEx node1, NodeEx node2); + + predicate jumpValueStep(NodeEx node1, NodeEx node2); + + predicate callEdgeArgParam(NodeEx arg, NodeEx param); + + predicate callEdgeReturn(NodeEx ret, NodeEx out, boolean mayFlowThrough); + + predicate readContentStep(NodeEx node1, Content c, NodeEx node2); + + predicate storeContentStep(NodeEx node1, Content c, NodeEx node2); + + int accessPathConfigLimit(); + } + + /** + * Provides logic for computing compatible and (likely) matching store-read pairs. + * + * In order to determine whether a store can be matched with a compatible read, we + * check whether the target of a store may reach the source of a read, using over- + * approximated data flow (no call contexts). + * + * The implementation is based on `doublyBoundedFastTC`, and in order to avoid poor + * performance through recursion, we unroll the recursion manually 4 times, in order to + * be able to handle access paths of maximum length 5. + * + * Additionally, in order to speed up the join with `doublyBoundedFastTC`, we first + * compute three pruning steps: + * + * 1. Which _contents_ may have matching read-store pairs (called `contentIsReadAndStored` below). + * 2. Which store targets may have _a_ matching read (called `storeMayReachARead` below). + * 3. Which reads may have _a_ matching store (called `aStoreMayReachRead` below). + */ + module StoreReadMatching { + private import codeql.util.Boolean + private import Input + + private signature module StoreReachesReadInputSig { + int iteration(); + + predicate storeMayReachReadDelta( + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + ); + + predicate storeMayReachReadPrev( + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + ); + } + + private newtype TNodeOrContent = + TNodeOrContentNode(NodeEx n, Boolean usesPrevDelta, boolean fromArg) { nodeRange(n, fromArg) } or + TNodeOrContentStoreContent(Content c) { storeContentStep(_, c, _) } or + TNodeOrContentReadContent(Content c) { readContentStep(_, c, _) } + + private class NodeOrContent extends TNodeOrContent { + NodeEx asNodeEx(boolean usesPrevDelta, boolean fromArg) { + this = TNodeOrContentNode(result, usesPrevDelta, fromArg) + } + + Content asStoreContent() { this = TNodeOrContentStoreContent(result) } + + Content asReadContent() { this = TNodeOrContentReadContent(result) } + + string toString() { + result = this.asStoreContent().toString() + or + result = this.asReadContent().toString() + or + result = this.asNodeEx(_, _).toString() + } + } + + pragma[nomagic] + private predicate stepNodeCommon( + NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2 + ) { + exists(NodeEx n1, boolean fromArg1 | n1 = node1.asNodeEx(usesPrevDelta2, fromArg1) | + localValueStep(n1, n2) and + fromArg1 = fromArg2 + or + jumpValueStep(n1, n2) and + fromArg2 = false + or + callEdgeArgParam(n1, n2) and + fromArg2 = true + or + exists(boolean mayFlowThrough | + callEdgeReturn(n1, n2, mayFlowThrough) and + nodeRange(n2, fromArg2) + | + fromArg1 = false or mayFlowThrough = true + ) + ) + } + + private module StoreReachesRead implements + StoreReachesReadInputSig + { + int iteration() { result = Prev::iteration() + 1 } + + private predicate enabled() { accessPathConfigLimit() > Prev::iteration() } + + private predicate usesPrevDelta(Boolean usesPrevDelta) { + // in the first iteration there is no previous delta to use + if iteration() > 1 then usesPrevDelta = true else any() + } + + pragma[nomagic] + private predicate stepNode( + NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2 + ) { + enabled() and + ( + stepNodeCommon(node1, n2, usesPrevDelta2, fromArg2) + or + exists(NodeEx n1, boolean usesPrevDelta1, boolean fromArg1 | + n1 = node1.asNodeEx(usesPrevDelta1, fromArg1) + | + Prev::storeMayReachReadDelta(n1, _, n2, fromArg1, fromArg2) and + usesPrevDelta2 = true + or + Prev::storeMayReachReadPrev(n1, _, n2, fromArg1, fromArg2) and + usesPrevDelta1 = usesPrevDelta2 + ) + ) + } + + pragma[nomagic] + private predicate step(NodeOrContent node1, NodeOrContent node2) { + exists(NodeEx n2, boolean usesPrevDelta2, boolean fromArg2 | + n2 = node2.asNodeEx(usesPrevDelta2, fromArg2) and + stepNode(node1, n2, usesPrevDelta2, fromArg2) + ) + or + enabled() and + ( + exists(NodeEx n2, Content c, boolean usesPrevDelta2 | + n2 = node2.asNodeEx(usesPrevDelta2, _) and + c = node1.asStoreContent() and + storeContentStep(_, c, n2) and + usesPrevDelta2 = false + ) + or + exists(NodeEx n1, Content c, boolean usesPrevDelta1 | + n1 = node1.asNodeEx(usesPrevDelta1, _) and + c = node2.asReadContent() and + readContentStep(n1, c, _) and + usesPrevDelta(usesPrevDelta1) + ) + ) + } + + private predicate isStoreContent(NodeOrContent c) { + enabled() and + exists(c.asStoreContent()) + } + + private predicate isReadContent(NodeOrContent c) { + enabled() and + exists(c.asReadContent()) + } + + private predicate contentReachesReadTc(NodeOrContent node1, NodeOrContent node2) = + doublyBoundedFastTC(step/2, isStoreContent/1, isReadContent/1)(node1, node2) + + pragma[nomagic] + private predicate contentIsReadAndStoredJoin(NodeOrContent c1, NodeOrContent c2, Content c) { + c1.asStoreContent() = c and + c2.asReadContent() = c + } + + additional predicate contentIsReadAndStored(Content c) { + enabled() and + exists(NodeOrContent n1, NodeOrContent n2 | + contentReachesReadTc(n1, n2) and + contentIsReadAndStoredJoin(n1, n2, c) + ) + } + + pragma[nomagic] + private predicate isStoreTarget0(NodeOrContent node, Content c) { + exists(boolean usesPrevDelta | + contentIsReadAndStored(c) and + storeContentStep(_, c, node.asNodeEx(usesPrevDelta, _)) and + usesPrevDelta = false + ) + } + + private predicate isStoreTarget(NodeOrContent node) { isStoreTarget0(node, _) } + + pragma[nomagic] + private predicate isReadSource0(NodeOrContent node, Content c) { + exists(boolean usesPrevDelta | + contentIsReadAndStored(c) and + readContentStep(node.asNodeEx(usesPrevDelta, _), c, _) and + usesPrevDelta(usesPrevDelta) + ) + } + + private predicate isReadSource(NodeOrContent node) { isReadSource0(node, _) } + + private predicate storeMayReachAReadTc(NodeOrContent node1, NodeOrContent node2) = + doublyBoundedFastTC(step/2, isStoreTarget/1, isReadContent/1)(node1, node2) + + pragma[nomagic] + private predicate storeMayReachAReadJoin(NodeOrContent n1, NodeOrContent n2, Content c) { + isStoreTarget0(n1, c) and + n2.asReadContent() = c + } + + private predicate storeMayReachARead(NodeOrContent node1, Content c) { + exists(NodeOrContent node2 | + storeMayReachAReadTc(node1, node2) and + storeMayReachAReadJoin(node1, node2, c) + ) + } + + private predicate aStoreMayReachReadTc(NodeOrContent node1, NodeOrContent node2) = + doublyBoundedFastTC(step/2, isStoreContent/1, isReadSource/1)(node1, node2) + + pragma[nomagic] + private predicate aStoreMayReachReadJoin(NodeOrContent n1, NodeOrContent n2, Content c) { + n1.asStoreContent() = c and + isReadSource0(n2, c) + } + + additional predicate aStoreMayReachRead(NodeOrContent node2, Content c) { + exists(NodeOrContent node1 | + aStoreMayReachReadTc(node1, node2) and + aStoreMayReachReadJoin(node1, node2, c) + ) + } + + private predicate isStoreTargetPruned(NodeOrContent node) { storeMayReachARead(node, _) } + + private predicate isReadSourcePruned(NodeOrContent node) { aStoreMayReachRead(node, _) } + + private predicate storeMayReachReadTc(NodeOrContent node1, NodeOrContent node2) = + doublyBoundedFastTC(step/2, isStoreTargetPruned/1, isReadSourcePruned/1)(node1, node2) + + pragma[nomagic] + private predicate storeMayReachReadDeltaJoinLeft( + NodeEx node1, Content c, NodeOrContent node2, boolean fromArg + ) { + exists(boolean usesPrevDelta | + storeMayReachARead(pragma[only_bind_into](node2), pragma[only_bind_into](c)) and + storeContentStep(node1, c, node2.asNodeEx(usesPrevDelta, fromArg)) and + usesPrevDelta = false + ) + } + + pragma[nomagic] + private predicate storeMayReachReadDeltaJoinRight( + NodeOrContent node1, Content c, NodeEx node2, boolean fromArg + ) { + exists(boolean usesPrevDelta | + aStoreMayReachRead(pragma[only_bind_into](node1), pragma[only_bind_into](c)) and + readContentStep(node1.asNodeEx(usesPrevDelta, fromArg), c, node2) and + usesPrevDelta(usesPrevDelta) + ) + } + + pragma[nomagic] + predicate storeMayReachReadDelta( + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + ) { + exists(NodeOrContent storeTarget, NodeOrContent readSource | + storeMayReachReadTc(storeTarget, readSource) and + storeMayReachReadDeltaJoinLeft(storeSource, c, storeTarget, fromArg1) and + storeMayReachReadDeltaJoinRight(readSource, c, readTarget, fromArg2) + ) and + not Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2) + } + + pragma[nomagic] + predicate storeMayReachReadPrev( + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + ) { + Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2) + or + Prev::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2) + } + } + + module Iteration0 implements StoreReachesReadInputSig { + int iteration() { result = 0 } + + predicate storeMayReachReadDelta( + NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2 + ) { + none() + } + + predicate storeMayReachReadPrev( + NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2 + ) { + none() + } + } + + private module StoreReachesRead1 implements StoreReachesReadInputSig { + private module M = StoreReachesRead; + + import M + + predicate storeMayReachReadDelta( + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + ) { + M::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2) + or + // special case only needed for the first iteration: a store immediately followed by a read + exists(NodeEx storeTargetReadSource | + StoreReachesRead1::contentIsReadAndStored(c) and + storeContentStep(storeSource, c, storeTargetReadSource) and + readContentStep(storeTargetReadSource, c, readTarget) + ) and + nodeRange(storeSource, fromArg1) and + nodeRange(readTarget, fromArg2) and + fromArg1 = fromArg2 + } + } + + private module StoreReachesRead2 = StoreReachesRead; + + private module StoreReachesRead3 = StoreReachesRead; + + private module StoreReachesRead4 = StoreReachesRead; + + private module StoreReachesRead5 = StoreReachesRead; + + predicate storeMayReachRead(NodeEx storeSource, Content c, NodeEx readTarget) { + StoreReachesRead5::storeMayReachReadDelta(storeSource, c, readTarget, _, _) + or + StoreReachesRead5::storeMayReachReadPrev(storeSource, c, readTarget, _, _) + } + } } From 43cd2c059441e8726e1cc42e3f0caef3aa13eb3f Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Thu, 22 Aug 2024 09:48:20 +0200 Subject: [PATCH 2/3] Data flow: Depth 1 call contexts in store/load matching --- .../codeql/dataflow/internal/DataFlowImpl.qll | 142 ++++++++++++++---- .../dataflow/internal/DataFlowImplCommon.qll | 133 +++++++++------- 2 files changed, 193 insertions(+), 82 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 32ef36e65645..9cb11d0f01f4 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1617,9 +1617,9 @@ module MakeImpl Lang> { ) } - private newtype TSummaryCtx = - TSummaryCtxNone() or - TSummaryCtxSome(ParamNodeEx p, FlowState state, Typ t, Ap ap) { + additional newtype TSummaryCtx = + additional TSummaryCtxNone() or + additional TSummaryCtxSome(ParamNodeEx p, FlowState state, Typ t, Ap ap) { fwdFlowInFlowThrough(p, _, state, _, t, ap) } @@ -1629,21 +1629,21 @@ module MakeImpl Lang> { * * Summaries are only created for parameters that may flow through. */ - private class SummaryCtx extends TSummaryCtx { + additional class SummaryCtx extends TSummaryCtx { abstract string toString(); abstract Location getLocation(); } /** A summary context from which no flow summary can be generated. */ - private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + additional class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { override string toString() { result = "" } override Location getLocation() { result.hasLocationInfo("", 0, 0, 0, 0) } } /** A summary context from which a flow summary can be generated. */ - private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + additional class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { private ParamNodeEx p; private FlowState state; private Typ t; @@ -2657,23 +2657,25 @@ module MakeImpl Lang> { pragma[nomagic] private predicate revFlowThroughArg( - DataFlowCall call, ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, - Ap ap + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, FlowState state, ReturnCtx returnCtx, + ApOption returnAp, Ap ap ) { - exists(ParamNodeEx p, Ap innerReturnAp | + exists(Ap innerReturnAp | revFlowThrough(call, returnCtx, p, state, _, returnAp, ap, innerReturnAp) and flowThroughIntoCall(call, arg, p, _, ap, innerReturnAp) ) } pragma[nomagic] - predicate callMayFlowThroughRev(DataFlowCall call) { + additional predicate callMayFlowThroughRev(DataFlowCall call, ParamNodeEx p) { exists(ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, Ap ap | revFlow(arg, state, returnCtx, returnAp, ap) and - revFlowThroughArg(call, arg, state, returnCtx, returnAp, ap) + revFlowThroughArg(call, arg, p, state, returnCtx, returnAp, ap) ) } + predicate callMayFlowThroughRev(DataFlowCall call) { callMayFlowThroughRev(call, _) } + predicate callEdgeArgParam( DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Ap ap @@ -3933,24 +3935,67 @@ module MakeImpl Lang> { private module StoreReadMatchingInput implements StoreReadMatchingInputSig { class NodeEx = NodeExAlias; - predicate nodeRange(NodeEx node, boolean fromArg) { - exists(PrevStage::Ap ap | - PrevStage::revFlowAp(node, ap) and + /** + * Gets a call context for `node` that is relevant for either improved virtual + * dispatch or for flow-through. + */ + pragma[nomagic] + private DataFlowCallOption getACallCtx(NodeEx node, PrevStage::Ap ap, boolean fromArg) { + exists(PrevStage::Cc cc, PrevStage::SummaryCtx summaryCtx | + PrevStage::fwdFlow(node, _, cc, summaryCtx, _, ap, _) + | + PrevStage::instanceofCcCall(cc) and + fromArg = true and ( - ap = true - or - PrevStage::storeStepCand(node, ap, _, _, _, _) + // virtual dispatch may be improved + exists(DataFlowCall call, DataFlowCallable c | + PrevStage::callEdgeArgParam(call, c, _, _, _, _) and + cc = Stage2Param::getSpecificCallContextCall(call, c) and + c = node.getEnclosingCallable() and + result = TDataFlowCallSome(call) + ) or - PrevStage::readStepCand(_, _, node) + not cc = Stage2Param::getSpecificCallContextCall(_, _) and + ( + // flow-through not possible + summaryCtx instanceof PrevStage::SummaryCtxNone and + result = TDataFlowCallNone() + or + exists(DataFlowCall call, ParamNodeEx p, PrevStage::Ap argAp | + summaryCtx = PrevStage::TSummaryCtxSome(p, _, _, argAp) + | + if + PrevStage::parameterMayFlowThrough(p, argAp) and + PrevStage::callMayFlowThroughRev(call, p) + then + // flow-through possible + result = TDataFlowCallSome(call) + else ( + // flow-through not possible, but `node` can reach a sink without + // flowing back out + PrevStage::callEdgeArgParam(call, _, _, p, _, _) and + result = TDataFlowCallNone() + ) + ) + ) ) + or + PrevStage::instanceofCcNoCall(cc) and + fromArg = false and + result = TDataFlowCallNone() + ) + } + + predicate nodeRange(NodeEx node, boolean fromArg, DataFlowCallOption summaryCtx) { + exists(PrevStage::Ap ap | + PrevStage::revFlowAp(node, ap) and + summaryCtx = getACallCtx(node, ap, fromArg) | - exists(PrevStage::Cc cc | PrevStage::fwdFlow(node, _, cc, _, _, ap, _) | - PrevStage::instanceofCcCall(cc) and - fromArg = true - or - PrevStage::instanceofCcNoCall(cc) and - fromArg = false - ) + ap = true + or + PrevStage::storeStepCand(node, ap, _, _, _, _) + or + PrevStage::readStepCand(_, _, node) ) } @@ -3976,12 +4021,51 @@ module MakeImpl Lang> { ) } - predicate callEdgeArgParam(NodeEx arg, NodeEx param) { - PrevStage::callEdgeArgParam(_, _, arg, param, true, true) + pragma[nomagic] + private predicate callEdgeArgParam( + DataFlowCall call, DataFlowCallable c, NodeEx arg, NodeEx param, + DataFlowCallOption innerCallCtx + ) { + PrevStage::callEdgeArgParam(call, c, arg, param, true, true) and + innerCallCtx = getACallCtx(param, true, true) and + ( + innerCallCtx = TDataFlowCallNone() + or + innerCallCtx = TDataFlowCallSome(call) + ) + } + + pragma[nomagic] + private predicate callEdgeArgParamCallContextReduced( + DataFlowCall call, NodeEx arg, NodeEx param, Stage2Param::CcCall outerCcCall, + DataFlowCallOption innerCallCtx + ) { + exists(DataFlowCallable c | + callEdgeArgParam(call, c, arg, param, innerCallCtx) and + Stage2Param::viableImplCallContextReduced(call, outerCcCall) = c + ) + } + + bindingset[outerCallCtx] + predicate callEdgeArgParam( + NodeEx arg, NodeEx param, DataFlowCallOption outerCallCtx, DataFlowCallOption innerCallCtx + ) { + exists(DataFlowCall call | callEdgeArgParam(call, _, arg, param, innerCallCtx) | + outerCallCtx = TDataFlowCallNone() + or + exists(DataFlowCall outerCall, Stage2Param::CcCall outerCcCall | + outerCallCtx = TDataFlowCallSome(outerCall) and + outerCcCall = Stage2Param::getCallContextCall(outerCall, call.getEnclosingCallable()) + | + callEdgeArgParamCallContextReduced(call, arg, param, outerCcCall, innerCallCtx) + or + Stage2Param::viableImplNotCallContextReduced(call, outerCcCall) + ) + ) } - predicate callEdgeReturn(NodeEx ret, NodeEx out, boolean mayFlowThrough) { - PrevStage::callEdgeReturn(_, _, ret, _, out, true, true) and + predicate callEdgeReturn(DataFlowCall call, NodeEx ret, NodeEx out, boolean mayFlowThrough) { + PrevStage::callEdgeReturn(call, _, ret, _, out, true, true) and if flowThroughOutOfCall(ret, out) then mayFlowThrough = true else mayFlowThrough = false } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 47e542087777..1fb22dc2f074 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -2340,15 +2340,18 @@ module MakeImplCommon Lang> { string toString(); } - predicate nodeRange(NodeEx node, boolean fromArg); + predicate nodeRange(NodeEx node, boolean fromArg, DataFlowCallOption callCtx); predicate localValueStep(NodeEx node1, NodeEx node2); predicate jumpValueStep(NodeEx node1, NodeEx node2); - predicate callEdgeArgParam(NodeEx arg, NodeEx param); + bindingset[outerCallCtx] + predicate callEdgeArgParam( + NodeEx arg, NodeEx param, DataFlowCallOption outerCallCtx, DataFlowCallOption innerCallCtx + ); - predicate callEdgeReturn(NodeEx ret, NodeEx out, boolean mayFlowThrough); + predicate callEdgeReturn(DataFlowCall call, NodeEx ret, NodeEx out, boolean mayFlowThrough); predicate readContentStep(NodeEx node1, Content c, NodeEx node2); @@ -2362,7 +2365,7 @@ module MakeImplCommon Lang> { * * In order to determine whether a store can be matched with a compatible read, we * check whether the target of a store may reach the source of a read, using over- - * approximated data flow (no call contexts). + * approximated data flow (depth 1 call contexts only). * * The implementation is based on `doublyBoundedFastTC`, and in order to avoid poor * performance through recursion, we unroll the recursion manually 4 times, in order to @@ -2383,22 +2386,28 @@ module MakeImplCommon Lang> { int iteration(); predicate storeMayReachReadDelta( - NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ); predicate storeMayReachReadPrev( - NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ); } private newtype TNodeOrContent = - TNodeOrContentNode(NodeEx n, Boolean usesPrevDelta, boolean fromArg) { nodeRange(n, fromArg) } or + TNodeOrContentNode( + NodeEx n, Boolean usesPrevDelta, boolean fromArg, DataFlowCallOption callCtx + ) { + nodeRange(n, fromArg, callCtx) + } or TNodeOrContentStoreContent(Content c) { storeContentStep(_, c, _) } or TNodeOrContentReadContent(Content c) { readContentStep(_, c, _) } private class NodeOrContent extends TNodeOrContent { - NodeEx asNodeEx(boolean usesPrevDelta, boolean fromArg) { - this = TNodeOrContentNode(result, usesPrevDelta, fromArg) + NodeEx asNodeEx(boolean usesPrevDelta, boolean fromArg, DataFlowCallOption callCtx) { + this = TNodeOrContentNode(result, usesPrevDelta, fromArg, callCtx) } Content asStoreContent() { this = TNodeOrContentStoreContent(result) } @@ -2410,29 +2419,36 @@ module MakeImplCommon Lang> { or result = this.asReadContent().toString() or - result = this.asNodeEx(_, _).toString() + result = this.asNodeEx(_, _, _).toString() } } pragma[nomagic] private predicate stepNodeCommon( - NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2 + NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, + DataFlowCallOption callCtx2 ) { - exists(NodeEx n1, boolean fromArg1 | n1 = node1.asNodeEx(usesPrevDelta2, fromArg1) | + exists(NodeEx n1, boolean fromArg1, DataFlowCallOption callCtx1 | + n1 = node1.asNodeEx(usesPrevDelta2, fromArg1, callCtx1) + | localValueStep(n1, n2) and - fromArg1 = fromArg2 + fromArg1 = fromArg2 and + callCtx1 = callCtx2 or jumpValueStep(n1, n2) and - fromArg2 = false + fromArg2 = false and + callCtx2 = TDataFlowCallNone() or - callEdgeArgParam(n1, n2) and + callEdgeArgParam(n1, n2, callCtx1, callCtx2) and fromArg2 = true or - exists(boolean mayFlowThrough | - callEdgeReturn(n1, n2, mayFlowThrough) and - nodeRange(n2, fromArg2) + exists(DataFlowCall call, boolean mayFlowThrough | + callEdgeReturn(call, n1, n2, mayFlowThrough) and + nodeRange(n2, fromArg2, callCtx2) // depth 1 call contexts only | - fromArg1 = false or mayFlowThrough = true + fromArg1 = false + or + callCtx1 = TDataFlowCallSome(call) and mayFlowThrough = true ) ) } @@ -2451,19 +2467,20 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate stepNode( - NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2 + NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, + DataFlowCallOption callCtx2 ) { enabled() and ( - stepNodeCommon(node1, n2, usesPrevDelta2, fromArg2) + stepNodeCommon(node1, n2, usesPrevDelta2, fromArg2, callCtx2) or - exists(NodeEx n1, boolean usesPrevDelta1, boolean fromArg1 | - n1 = node1.asNodeEx(usesPrevDelta1, fromArg1) + exists(NodeEx n1, boolean usesPrevDelta1, boolean fromArg1, DataFlowCallOption callCtx1 | + n1 = node1.asNodeEx(usesPrevDelta1, fromArg1, callCtx1) | - Prev::storeMayReachReadDelta(n1, _, n2, fromArg1, fromArg2) and + Prev::storeMayReachReadDelta(n1, _, n2, fromArg1, fromArg2, callCtx1, callCtx2) and usesPrevDelta2 = true or - Prev::storeMayReachReadPrev(n1, _, n2, fromArg1, fromArg2) and + Prev::storeMayReachReadPrev(n1, _, n2, fromArg1, fromArg2, callCtx1, callCtx2) and usesPrevDelta1 = usesPrevDelta2 ) ) @@ -2471,22 +2488,22 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate step(NodeOrContent node1, NodeOrContent node2) { - exists(NodeEx n2, boolean usesPrevDelta2, boolean fromArg2 | - n2 = node2.asNodeEx(usesPrevDelta2, fromArg2) and - stepNode(node1, n2, usesPrevDelta2, fromArg2) + exists(NodeEx n2, boolean usesPrevDelta2, boolean fromArg2, DataFlowCallOption callCtx2 | + n2 = node2.asNodeEx(usesPrevDelta2, fromArg2, callCtx2) and + stepNode(node1, n2, usesPrevDelta2, fromArg2, callCtx2) ) or enabled() and ( exists(NodeEx n2, Content c, boolean usesPrevDelta2 | - n2 = node2.asNodeEx(usesPrevDelta2, _) and + n2 = node2.asNodeEx(usesPrevDelta2, _, _) and c = node1.asStoreContent() and storeContentStep(_, c, n2) and usesPrevDelta2 = false ) or exists(NodeEx n1, Content c, boolean usesPrevDelta1 | - n1 = node1.asNodeEx(usesPrevDelta1, _) and + n1 = node1.asNodeEx(usesPrevDelta1, _, _) and c = node2.asReadContent() and readContentStep(n1, c, _) and usesPrevDelta(usesPrevDelta1) @@ -2509,12 +2526,12 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate contentIsReadAndStoredJoin(NodeOrContent c1, NodeOrContent c2, Content c) { + enabled() and c1.asStoreContent() = c and c2.asReadContent() = c } additional predicate contentIsReadAndStored(Content c) { - enabled() and exists(NodeOrContent n1, NodeOrContent n2 | contentReachesReadTc(n1, n2) and contentIsReadAndStoredJoin(n1, n2, c) @@ -2525,7 +2542,7 @@ module MakeImplCommon Lang> { private predicate isStoreTarget0(NodeOrContent node, Content c) { exists(boolean usesPrevDelta | contentIsReadAndStored(c) and - storeContentStep(_, c, node.asNodeEx(usesPrevDelta, _)) and + storeContentStep(_, c, node.asNodeEx(usesPrevDelta, _, _)) and usesPrevDelta = false ) } @@ -2536,7 +2553,7 @@ module MakeImplCommon Lang> { private predicate isReadSource0(NodeOrContent node, Content c) { exists(boolean usesPrevDelta | contentIsReadAndStored(c) and - readContentStep(node.asNodeEx(usesPrevDelta, _), c, _) and + readContentStep(node.asNodeEx(usesPrevDelta, _, _), c, _) and usesPrevDelta(usesPrevDelta) ) } @@ -2584,45 +2601,50 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate storeMayReachReadDeltaJoinLeft( - NodeEx node1, Content c, NodeOrContent node2, boolean fromArg + NodeEx node1, Content c, NodeOrContent node2, boolean fromArg2, DataFlowCallOption callCtx2 ) { exists(boolean usesPrevDelta | storeMayReachARead(pragma[only_bind_into](node2), pragma[only_bind_into](c)) and - storeContentStep(node1, c, node2.asNodeEx(usesPrevDelta, fromArg)) and + storeContentStep(node1, c, node2.asNodeEx(usesPrevDelta, fromArg2, callCtx2)) and usesPrevDelta = false ) } pragma[nomagic] private predicate storeMayReachReadDeltaJoinRight( - NodeOrContent node1, Content c, NodeEx node2, boolean fromArg + NodeOrContent node1, Content c, NodeEx node2, boolean fromArg1, DataFlowCallOption callCtx1 ) { exists(boolean usesPrevDelta | aStoreMayReachRead(pragma[only_bind_into](node1), pragma[only_bind_into](c)) and - readContentStep(node1.asNodeEx(usesPrevDelta, fromArg), c, node2) and + readContentStep(node1.asNodeEx(usesPrevDelta, fromArg1, callCtx1), c, node2) and usesPrevDelta(usesPrevDelta) ) } pragma[nomagic] predicate storeMayReachReadDelta( - NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { exists(NodeOrContent storeTarget, NodeOrContent readSource | storeMayReachReadTc(storeTarget, readSource) and - storeMayReachReadDeltaJoinLeft(storeSource, c, storeTarget, fromArg1) and - storeMayReachReadDeltaJoinRight(readSource, c, readTarget, fromArg2) + storeMayReachReadDeltaJoinLeft(storeSource, c, storeTarget, fromArg1, callCtx1) and + storeMayReachReadDeltaJoinRight(readSource, c, readTarget, fromArg2, callCtx2) ) and - not Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2) + not Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2, callCtx1, + callCtx2) } pragma[nomagic] predicate storeMayReachReadPrev( - NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { - Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2) + Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2, callCtx1, + callCtx2) or - Prev::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2) + Prev::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2, callCtx1, + callCtx2) } } @@ -2630,13 +2652,15 @@ module MakeImplCommon Lang> { int iteration() { result = 0 } predicate storeMayReachReadDelta( - NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2 + NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { none() } predicate storeMayReachReadPrev( - NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2 + NodeEx node1, Content c, NodeEx node2, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { none() } @@ -2648,9 +2672,10 @@ module MakeImplCommon Lang> { import M predicate storeMayReachReadDelta( - NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2 + NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, + DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { - M::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2) + M::storeMayReachReadDelta(storeSource, c, readTarget, fromArg1, fromArg2, callCtx1, callCtx2) or // special case only needed for the first iteration: a store immediately followed by a read exists(NodeEx storeTargetReadSource | @@ -2658,9 +2683,11 @@ module MakeImplCommon Lang> { storeContentStep(storeSource, c, storeTargetReadSource) and readContentStep(storeTargetReadSource, c, readTarget) ) and - nodeRange(storeSource, fromArg1) and - nodeRange(readTarget, fromArg2) and - fromArg1 = fromArg2 + nodeRange(storeSource, fromArg1, callCtx1) and + nodeRange(readTarget, fromArg2, callCtx2) and + fromArg1 = fromArg2 and + pragma[only_bind_into](pragma[only_bind_out](callCtx1)) = + pragma[only_bind_into](pragma[only_bind_out](callCtx2)) } } @@ -2673,9 +2700,9 @@ module MakeImplCommon Lang> { private module StoreReachesRead5 = StoreReachesRead; predicate storeMayReachRead(NodeEx storeSource, Content c, NodeEx readTarget) { - StoreReachesRead5::storeMayReachReadDelta(storeSource, c, readTarget, _, _) + StoreReachesRead5::storeMayReachReadDelta(storeSource, c, readTarget, _, _, _, _) or - StoreReachesRead5::storeMayReachReadPrev(storeSource, c, readTarget, _, _) + StoreReachesRead5::storeMayReachReadPrev(storeSource, c, readTarget, _, _, _, _) } } } From 184df7ebca43a0d339a5e50d3825f10924af16f5 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Thu, 5 Sep 2024 13:06:32 +0200 Subject: [PATCH 3/3] Data flow: Reduce `doublyBoundedFastTC` use in `StoreReadMatching` --- .../dataflow/internal/DataFlowImplCommon.qll | 156 ++++-------------- 1 file changed, 32 insertions(+), 124 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 1fb22dc2f074..0acd15ed6dea 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -2338,6 +2338,8 @@ module MakeImplCommon Lang> { signature module StoreReadMatchingInputSig { class NodeEx { string toString(); + + DataFlowType getDataFlowType(); } predicate nodeRange(NodeEx node, boolean fromArg, DataFlowCallOption callCtx); @@ -2370,13 +2372,6 @@ module MakeImplCommon Lang> { * The implementation is based on `doublyBoundedFastTC`, and in order to avoid poor * performance through recursion, we unroll the recursion manually 4 times, in order to * be able to handle access paths of maximum length 5. - * - * Additionally, in order to speed up the join with `doublyBoundedFastTC`, we first - * compute three pruning steps: - * - * 1. Which _contents_ may have matching read-store pairs (called `contentIsReadAndStored` below). - * 2. Which store targets may have _a_ matching read (called `storeMayReachARead` below). - * 3. Which reads may have _a_ matching store (called `aStoreMayReachRead` below). */ module StoreReadMatching { private import codeql.util.Boolean @@ -2396,36 +2391,24 @@ module MakeImplCommon Lang> { ); } - private newtype TNodeOrContent = - TNodeOrContentNode( - NodeEx n, Boolean usesPrevDelta, boolean fromArg, DataFlowCallOption callCtx - ) { + private newtype TNodeExt = + MkNodeExt(NodeEx n, Boolean usesPrevDelta, boolean fromArg, DataFlowCallOption callCtx) { nodeRange(n, fromArg, callCtx) - } or - TNodeOrContentStoreContent(Content c) { storeContentStep(_, c, _) } or - TNodeOrContentReadContent(Content c) { readContentStep(_, c, _) } + } - private class NodeOrContent extends TNodeOrContent { + private class NodeExt extends MkNodeExt { NodeEx asNodeEx(boolean usesPrevDelta, boolean fromArg, DataFlowCallOption callCtx) { - this = TNodeOrContentNode(result, usesPrevDelta, fromArg, callCtx) + this = MkNodeExt(result, usesPrevDelta, fromArg, callCtx) } - Content asStoreContent() { this = TNodeOrContentStoreContent(result) } - - Content asReadContent() { this = TNodeOrContentReadContent(result) } + DataFlowType getType() { result = this.asNodeEx(_, _, _).getDataFlowType() } - string toString() { - result = this.asStoreContent().toString() - or - result = this.asReadContent().toString() - or - result = this.asNodeEx(_, _, _).toString() - } + string toString() { result = this.asNodeEx(_, _, _).toString() } } pragma[nomagic] private predicate stepNodeCommon( - NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, + NodeExt node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, DataFlowCallOption callCtx2 ) { exists(NodeEx n1, boolean fromArg1, DataFlowCallOption callCtx1 | @@ -2467,7 +2450,7 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate stepNode( - NodeOrContent node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, + NodeExt node1, NodeEx n2, boolean usesPrevDelta2, Boolean fromArg2, DataFlowCallOption callCtx2 ) { enabled() and @@ -2487,124 +2470,41 @@ module MakeImplCommon Lang> { } pragma[nomagic] - private predicate step(NodeOrContent node1, NodeOrContent node2) { + private predicate step(NodeExt node1, NodeExt node2) { exists(NodeEx n2, boolean usesPrevDelta2, boolean fromArg2, DataFlowCallOption callCtx2 | n2 = node2.asNodeEx(usesPrevDelta2, fromArg2, callCtx2) and stepNode(node1, n2, usesPrevDelta2, fromArg2, callCtx2) ) - or - enabled() and - ( - exists(NodeEx n2, Content c, boolean usesPrevDelta2 | - n2 = node2.asNodeEx(usesPrevDelta2, _, _) and - c = node1.asStoreContent() and - storeContentStep(_, c, n2) and - usesPrevDelta2 = false - ) - or - exists(NodeEx n1, Content c, boolean usesPrevDelta1 | - n1 = node1.asNodeEx(usesPrevDelta1, _, _) and - c = node2.asReadContent() and - readContentStep(n1, c, _) and - usesPrevDelta(usesPrevDelta1) - ) - ) } - private predicate isStoreContent(NodeOrContent c) { - enabled() and - exists(c.asStoreContent()) - } - - private predicate isReadContent(NodeOrContent c) { - enabled() and - exists(c.asReadContent()) - } - - private predicate contentReachesReadTc(NodeOrContent node1, NodeOrContent node2) = - doublyBoundedFastTC(step/2, isStoreContent/1, isReadContent/1)(node1, node2) - pragma[nomagic] - private predicate contentIsReadAndStoredJoin(NodeOrContent c1, NodeOrContent c2, Content c) { - enabled() and - c1.asStoreContent() = c and - c2.asReadContent() = c - } - - additional predicate contentIsReadAndStored(Content c) { - exists(NodeOrContent n1, NodeOrContent n2 | - contentReachesReadTc(n1, n2) and - contentIsReadAndStoredJoin(n1, n2, c) - ) - } - - pragma[nomagic] - private predicate isStoreTarget0(NodeOrContent node, Content c) { + private predicate isStoreTarget0(NodeExt node, Content c) { exists(boolean usesPrevDelta | - contentIsReadAndStored(c) and storeContentStep(_, c, node.asNodeEx(usesPrevDelta, _, _)) and usesPrevDelta = false ) } - private predicate isStoreTarget(NodeOrContent node) { isStoreTarget0(node, _) } + private predicate isStoreTarget(NodeExt node) { isStoreTarget0(node, _) } pragma[nomagic] - private predicate isReadSource0(NodeOrContent node, Content c) { + private predicate isReadSource0(NodeExt node, Content c) { exists(boolean usesPrevDelta | - contentIsReadAndStored(c) and readContentStep(node.asNodeEx(usesPrevDelta, _, _), c, _) and usesPrevDelta(usesPrevDelta) ) } - private predicate isReadSource(NodeOrContent node) { isReadSource0(node, _) } - - private predicate storeMayReachAReadTc(NodeOrContent node1, NodeOrContent node2) = - doublyBoundedFastTC(step/2, isStoreTarget/1, isReadContent/1)(node1, node2) - - pragma[nomagic] - private predicate storeMayReachAReadJoin(NodeOrContent n1, NodeOrContent n2, Content c) { - isStoreTarget0(n1, c) and - n2.asReadContent() = c - } - - private predicate storeMayReachARead(NodeOrContent node1, Content c) { - exists(NodeOrContent node2 | - storeMayReachAReadTc(node1, node2) and - storeMayReachAReadJoin(node1, node2, c) - ) - } - - private predicate aStoreMayReachReadTc(NodeOrContent node1, NodeOrContent node2) = - doublyBoundedFastTC(step/2, isStoreContent/1, isReadSource/1)(node1, node2) - - pragma[nomagic] - private predicate aStoreMayReachReadJoin(NodeOrContent n1, NodeOrContent n2, Content c) { - n1.asStoreContent() = c and - isReadSource0(n2, c) - } - - additional predicate aStoreMayReachRead(NodeOrContent node2, Content c) { - exists(NodeOrContent node1 | - aStoreMayReachReadTc(node1, node2) and - aStoreMayReachReadJoin(node1, node2, c) - ) - } - - private predicate isStoreTargetPruned(NodeOrContent node) { storeMayReachARead(node, _) } - - private predicate isReadSourcePruned(NodeOrContent node) { aStoreMayReachRead(node, _) } + private predicate isReadSource(NodeExt node) { isReadSource0(node, _) } - private predicate storeMayReachReadTc(NodeOrContent node1, NodeOrContent node2) = - doublyBoundedFastTC(step/2, isStoreTargetPruned/1, isReadSourcePruned/1)(node1, node2) + private predicate storeMayReachReadTc(NodeExt node1, NodeExt node2) = + doublyBoundedFastTC(step/2, isStoreTarget/1, isReadSource/1)(node1, node2) pragma[nomagic] private predicate storeMayReachReadDeltaJoinLeft( - NodeEx node1, Content c, NodeOrContent node2, boolean fromArg2, DataFlowCallOption callCtx2 + NodeEx node1, Content c, NodeExt node2, boolean fromArg2, DataFlowCallOption callCtx2 ) { exists(boolean usesPrevDelta | - storeMayReachARead(pragma[only_bind_into](node2), pragma[only_bind_into](c)) and storeContentStep(node1, c, node2.asNodeEx(usesPrevDelta, fromArg2, callCtx2)) and usesPrevDelta = false ) @@ -2612,10 +2512,9 @@ module MakeImplCommon Lang> { pragma[nomagic] private predicate storeMayReachReadDeltaJoinRight( - NodeOrContent node1, Content c, NodeEx node2, boolean fromArg1, DataFlowCallOption callCtx1 + NodeExt node1, Content c, NodeEx node2, boolean fromArg1, DataFlowCallOption callCtx1 ) { exists(boolean usesPrevDelta | - aStoreMayReachRead(pragma[only_bind_into](node1), pragma[only_bind_into](c)) and readContentStep(node1.asNodeEx(usesPrevDelta, fromArg1, callCtx1), c, node2) and usesPrevDelta(usesPrevDelta) ) @@ -2626,10 +2525,20 @@ module MakeImplCommon Lang> { NodeEx storeSource, Content c, NodeEx readTarget, boolean fromArg1, boolean fromArg2, DataFlowCallOption callCtx1, DataFlowCallOption callCtx2 ) { - exists(NodeOrContent storeTarget, NodeOrContent readSource | + exists(NodeExt storeTarget, NodeExt readSource | storeMayReachReadTc(storeTarget, readSource) and storeMayReachReadDeltaJoinLeft(storeSource, c, storeTarget, fromArg1, callCtx1) and - storeMayReachReadDeltaJoinRight(readSource, c, readTarget, fromArg2, callCtx2) + storeMayReachReadDeltaJoinRight(readSource, c, readTarget, fromArg2, callCtx2) and + ( + compatibleTypesFilter(storeTarget.getType(), readSource.getType()) + or + not exists(readSource.getType()) + ) and + ( + compatibleTypesFilter(storeSource.getDataFlowType(), readTarget.getDataFlowType()) + or + not exists(readTarget.getDataFlowType()) + ) ) and not Prev::storeMayReachReadPrev(storeSource, c, readTarget, fromArg1, fromArg2, callCtx1, callCtx2) @@ -2679,7 +2588,6 @@ module MakeImplCommon Lang> { or // special case only needed for the first iteration: a store immediately followed by a read exists(NodeEx storeTargetReadSource | - StoreReachesRead1::contentIsReadAndStored(c) and storeContentStep(storeSource, c, storeTargetReadSource) and readContentStep(storeTargetReadSource, c, readTarget) ) and