From 876b2f428913c36860da2b0d975374d52913a1ee Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 1 May 2023 13:32:34 +0200 Subject: [PATCH 1/2] Simple IFDS UninitVars Analysis refactoring + test updates --- .../Problems/IFDSUninitializedVariables.h | 24 +- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 4 + .../Problems/IFDSUninitializedVariables.cpp | 553 +++++++----------- lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 9 + lib/PhasarPass/PhasarPass.cpp | 2 +- .../IFDSUninitializedVariablesTest.cpp | 225 +++---- 6 files changed, 303 insertions(+), 514 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h index 46db4b631..5e8174d68 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h @@ -12,6 +12,10 @@ #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include #include @@ -29,16 +33,17 @@ class IFDSUninitializedVariables std::string FilePath; std::string SrcCode; std::vector VarNames; - std::map> - IRTrace; + std::vector>> IRTrace; + [[nodiscard]] bool empty() const; void print(llvm::raw_ostream &OS); }; public: - IFDSUninitializedVariables(const LLVMProjectIRDB *IRDB, - std::vector EntryPoints = {"main"}); + explicit IFDSUninitializedVariables(const LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT, + std::vector EntryPoints = { + "main"}); ~IFDSUninitializedVariables() override = default; @@ -71,12 +76,15 @@ class IFDSUninitializedVariables void emitTextReport(const SolverResults &Results, llvm::raw_ostream &OS = llvm::outs()) override; - [[nodiscard]] const std::map> &getAllUndefUses() const; + [[nodiscard]] const auto &getAllUndefUses() const noexcept { + return UndefValueUses; + } - std::vector aggregateResults(); + [[nodiscard]] std::vector aggregateResults(); private: - std::map> UndefValueUses; + llvm::DenseMap> UndefValueUses{}; + LLVMAliasInfoRef PT; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 5639c7a17..f8f88f264 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -35,11 +35,15 @@ class StoreInst; class BranchInst; class Module; class CallInst; +class Use; } // namespace llvm namespace psr { class LLVMProjectIRDB; +[[nodiscard]] bool isDefiniteLastUse(const llvm::Use &Use) noexcept; +[[nodiscard]] bool isDefiniteLastUse(const llvm::Value *Use) noexcept; + /** * @brief Checks if the given LLVM Value is a LLVM Function Pointer. * @param V LLVM Value. diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp index 2537972d9..2db2c229e 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp @@ -12,418 +12,260 @@ #include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include #include namespace psr { IFDSUninitializedVariables::IFDSUninitializedVariables( - const LLVMProjectIRDB *IRDB, std::vector EntryPoints) - : IFDSTabulationProblem(IRDB, std::move(EntryPoints), createZeroValue()) {} + const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints) + : IFDSTabulationProblem(IRDB, std::move(EntryPoints), createZeroValue()), + PT(PT) { + assert(PT && + "IFDSUninitializedVariables requires the alias info not to be null!"); +} + +static bool isMatchingType(const llvm::Type *Ty) noexcept { + return Ty->isIntegerTy() || Ty->isFloatingPointTy() || Ty->isPointerTy() || + Ty->isArrayTy(); +} IFDSUninitializedVariables::FlowFunctionPtrType -IFDSUninitializedVariables::getNormalFlowFunction( - IFDSUninitializedVariables::n_t Curr, - IFDSUninitializedVariables::n_t /*Succ*/) { - //---------------------------------------------------------------------------------- - // Why do we need this case? - // Every alloca is reached eventually by this function - //---------------------------------------------------------------------------------- - - // initially mark every local as uninitialized (except entry point args) - /* if (icfg.isStartPoint(curr) && - curr->getFunction()->getName() == "main") { - const llvm::Function *func = icfg.getMethodOf(curr); - // set all locals as uninitialized flow function - struct UVFF : FlowFunction { - const llvm::Function *func; - IFDSUninitializedVariables::d_t zerovalue; - UVFF(const llvm::Function *f, IFDSUninitializedVariables::d_t zv) - : func(f), zerovalue(zv) {} - set - computeTargets(IFDSUninitializedVariables::d_t source) override { - if (source == zerovalue) { - set res; - - // first add all local values of primitive types - for (auto &BB : *func) { - for (auto &inst : BB) { - // collect all alloca instructions of primitive types - if (auto alloc = llvm::dyn_cast(&inst)) { - if (alloc->getAllocatedType()->isIntegerTy() || - alloc->getAllocatedType()->isFloatingPointTy() || - alloc->getAllocatedType()->isPointerTy() || - alloc->getAllocatedType()->isArrayTy()) { - res.insert(alloc); - } - } else { - // collect all instructions that use an undef literal - for (auto &operand : inst.operands()) { - if (llvm::isa(&operand)) { - res.insert(&inst); - } - } - } - } - } - // remove function parameters of entry function - for (auto &arg : func->args()) { - res.erase(&arg); - } - res.insert(zerovalue); - return res; - } - return {}; - } - }; - return make_shared(func, zerovalue); +IFDSUninitializedVariables::getNormalFlowFunction(n_t Curr, n_t /*Succ*/) { + + if (const auto *Alloc = llvm::dyn_cast(Curr)) { + // Allocas of primitive types are uninitialized by default + if (isMatchingType(Alloc->getAllocatedType())) { + return generateFromZero(Alloc); + } } - */ // check the all store instructions and kill initialized variables if (const auto *Store = llvm::dyn_cast(Curr)) { - struct UVFF : FlowFunction { - // const llvm::Value *valueop; - // const llvm::Value *pointerop; - const llvm::StoreInst *Store; - const llvm::Value *Zero; - std::map> &UndefValueUses; - UVFF(const llvm::StoreInst *S, - std::map> &UVU, - const llvm::Value *Zero) - : Store(S), Zero(Zero), UndefValueUses(UVU) {} - std::set - computeTargets(IFDSUninitializedVariables::d_t Source) override { - - //---------------------------------------------------------------------- - // I don't get the purpose of this for-loop; - // When an undefined load is stored, it should eventually be source - //---------------------------------------------------------------------- - - /* - // check if an uninitialized value is loaded and stored in a variable - for (auto &use : store->getValueOperand()->uses()) { - // check if use is load - if (const llvm::LoadInst *load = - llvm::dyn_cast(use)) { - // if the following is uninit, then this store must be uninit - if (source == load->getPointerOperand() || source == load) { - UndefValueUses[load].insert(load->getPointerOperand()); - return {source, load, store->getValueOperand(), - store->getPointerOperand()}; - } - } - if (const llvm::Instruction *inst = - llvm::dyn_cast(use)) { - for (auto &operand : inst->operands()) { - if (const llvm::Value *val = - llvm::dyn_cast(&operand)) { - if (val == source || llvm::isa(val)) { - return {source, val, store->getPointerOperand()}; - } - } - } - } - if (use.get() == source) { - return {source, store->getValueOperand(), - store->getPointerOperand()}; - } - } - // otherwise initialize (kill) the value - if (store->getPointerOperand() == source) { - return {}; - } - */ - if (Source == Store->getValueOperand() || - (Source == Zero && - llvm::isa(Store->getValueOperand()))) { - return {Source, Store->getPointerOperand()}; + auto AA = PT.getAliasSet(Store->getPointerOperand(), Store); + + if (llvm::isa(Store->getValueOperand())) { + return lambdaFlow([AA](d_t Source) -> container_type { + container_type Ret = {Source}; + if (LLVMZeroValue::isLLVMZeroValue(Source)) { + Ret.insert(AA->begin(), AA->end()); } - if (Source == - Store->getPointerOperand()) { // storing an initialized value - // kills the variable as it is - // now initialized too - return {}; + return Ret; + }); + } + + return lambdaFlow([AA, Store](d_t Source) -> container_type { + if (Source == Store->getValueOperand()) { + container_type Ret; + if (!isDefiniteLastUse(Store->getValueOperand())) { + Ret.insert(Source); } - // pass all other facts as identity - return {Source}; + Ret.insert(AA->begin(), AA->end()); + return Ret; } - }; - return std::make_shared(Store, UndefValueUses, getZeroValue()); - } - if (const auto *Alloc = llvm::dyn_cast(Curr)) { - return lambdaFlow( - [Alloc, this](IFDSUninitializedVariables::d_t Source) - -> std::set { - if (isZeroValue(Source)) { - if (Alloc->getAllocatedType()->isIntegerTy() || - Alloc->getAllocatedType()->isFloatingPointTy() || - Alloc->getAllocatedType()->isPointerTy() || - Alloc->getAllocatedType()->isArrayTy()) { - //------------------------------------------------------------ - // Why not generate for structs, but for arrays? (would be - // consistent to generate either both or none of them) - //------------------------------------------------------------ - - // generate the alloca - return {Source, Alloc}; - } - } - // otherwise propagate all facts - return {Source}; - }); - } - // check if some instruction is using an undefined value (in)directly - struct UVFF : FlowFunction { - const llvm::Instruction *Inst; - std::map> &UndefValueUses; - UVFF(const llvm::Instruction *Inst, - std::map> &UVU) - : Inst(Inst), UndefValueUses(UVU) {} - std::set - computeTargets(IFDSUninitializedVariables::d_t Source) override { - for (const auto &Operand : Inst->operands()) { - const llvm::UndefValue *Undef = - llvm::dyn_cast(Operand); - if (Operand == Source || Operand == Undef) { - //---------------------------------------------------------------- - // It is not necessary and (from my point of view) not intended to - // report a leak on EVERY kind of instruction. - // For some of them (e.g. gep, bitcast, ...) propagating the dataflow - // facts may be enough - //---------------------------------------------------------------- - if (!llvm::isa(Inst) && - !llvm::isa(Inst) && - !llvm::isa(Inst)) { - UndefValueUses[Inst].insert(Operand); - } - return {Source, Inst}; - } + if (Source == Store->getPointerOperand()) { + return {}; } + return {Source}; - } - }; - return std::make_shared(Curr, UndefValueUses); -} + }); + } -IFDSUninitializedVariables::FlowFunctionPtrType -IFDSUninitializedVariables::getCallFlowFunction( - IFDSUninitializedVariables::n_t CallSite, - IFDSUninitializedVariables::f_t DestFun) { - if (llvm::isa(CallSite) || - llvm::isa(CallSite)) { - const auto *CS = llvm::cast(CallSite); - struct UVFF : FlowFunction { - const llvm::Function *DestFun; - const llvm::CallBase *CallSite; - const llvm::Value *Zerovalue; - std::vector Actuals; - std::vector Formals; - UVFF(const llvm::Function *DM, const llvm::CallBase *CallSite, - const llvm::Value *ZV) - : DestFun(DM), CallSite(CallSite), Zerovalue(ZV) { - // set up the actual parameters - for (unsigned Idx = 0; Idx < CallSite->arg_size(); ++Idx) { - Actuals.push_back(CallSite->getArgOperand(Idx)); + if (const auto *Load = llvm::dyn_cast(Curr)) { + return lambdaFlow([this, Load](d_t Source) -> container_type { + auto IsDefinitivelyInitialized = [](d_t Source) { + // TODO: Be more precise here + return llvm::isa(Source); + }; + + if (Source == Load->getPointerOperand()) { + if (!IsDefinitivelyInitialized(Source)) { + UndefValueUses[Load].insert(Load->getPointerOperand()); } - // set up the formal parameters - /*for (unsigned idx = 0; idx < destFun->arg_size(); ++idx) { - formals.push_back(getNthFunctionArgument(destFun, idx)); - }*/ - for (const auto &Arg : DestFun->args()) { - Formals.push_back(&Arg); + container_type Ret; + + Ret.insert(Load); + + if (!isDefiniteLastUse(Load->getOperandUse( + llvm::LoadInst::getPointerOperandIndex()))) { + Ret.insert(Source); } + + return Ret; } - std::set - computeTargets(IFDSUninitializedVariables::d_t Source) override { - // perform parameter passing - if (Source != Zerovalue) { - std::set Res; - // do the mapping from actual to formal parameters - // caution: the loop iterates from 0 to formals.size(), - // rather than actuals.size() as we may have more actual - // than formal arguments in case of C-style varargs - for (unsigned Idx = 0; Idx < Formals.size(); ++Idx) { - if (Source == Actuals[Idx]) { - Res.insert(Formals[Idx]); - } - } - return Res; - } + return {Source}; + }); + } + + if (!Curr->getType()->isVoidTy()) { + return lambdaFlow([Curr](d_t Source) -> container_type { + for (const auto &Op : Curr->operands()) { + if (Source == Op) { + container_type Ret = {Curr}; - //-------------------------------------------------------------- - // Why not letting the normal FF generate the allocas? - //-------------------------------------------------------------- - - // on zerovalue -> gen all locals parameter - /* set res; - for (auto &BB : *destFun) { - for (auto &inst : BB) { - if (auto alloc = llvm::dyn_cast(&inst)) { - // check if the allocated value is of a primitive type - if (alloc->getAllocatedType()->isIntegerTy() || - alloc->getAllocatedType()->isFloatingPointTy() || - alloc->getAllocatedType()->isPointerTy() || - alloc->getAllocatedType()->isArrayTy()) { - res.insert(alloc); - } - } else { - // check for instructions using undef value directly - for (auto &operand : inst.operands()) { - if (llvm::isa(&operand)) { - res.insert(&inst); - } - } - } + if (!isDefiniteLastUse(Op)) { + Ret.insert(Source); } - } - return res;*/ - // propagate zero - return {Source}; + return Ret; + } } - }; - return std::make_shared(DestFun, CS, getZeroValue()); + + return {Source}; + }); } - return Identity::getInstance(); + + return identityFlow(); } IFDSUninitializedVariables::FlowFunctionPtrType -IFDSUninitializedVariables::getRetFlowFunction( - IFDSUninitializedVariables::n_t CallSite, - IFDSUninitializedVariables::f_t /*CalleeFun*/, - IFDSUninitializedVariables::n_t ExitStmt, - IFDSUninitializedVariables::n_t /*RetSite*/) { - if (llvm::isa(CallSite) || - llvm::isa(CallSite)) { - const auto *CS = llvm::cast(CallSite); - struct UVFF : FlowFunction { - const llvm::CallBase *Call; - const llvm::Instruction *Exit; - UVFF(const llvm::CallBase *C, const llvm::Instruction *E) - : Call(C), Exit(E) {} - std::set - computeTargets(IFDSUninitializedVariables::d_t Source) override { - // check if we return an uninitialized value - std::set Ret; - if (Exit->getNumOperands() > 0 && Exit->getOperand(0) == Source) { - Ret.insert(Call); - } - //---------------------------------------------------------------------- - // Handle pointer/reference parameters - //---------------------------------------------------------------------- - if (Call->getCalledFunction()) { - unsigned I = 0; - for (const auto &Arg : Call->getCalledFunction()->args()) { - // auto arg = getNthFunctionArgument(call.getCalledFunction(), i); - if (&Arg == Source && Arg.getType()->isPointerTy()) { - Ret.insert(Call->getArgOperand(I)); - } - I++; - } - } +IFDSUninitializedVariables::getCallFlowFunction(n_t CallSite, f_t DestFun) { - // kill all other facts - return Ret; - } - }; - return std::make_shared(CS, ExitStmt); - } - // kill everything else - return killAllFlows(); + const auto *Call = llvm::cast(CallSite); + + return mapFactsToCallee(Call, DestFun); +} + +IFDSUninitializedVariables::FlowFunctionPtrType +IFDSUninitializedVariables::getRetFlowFunction(n_t CallSite, f_t /*CalleeFun*/, + n_t ExitStmt, n_t /*RetSite*/) { + const auto *Call = llvm::cast(CallSite); + + // TODO: Propagate back aliases + + return mapFactsToCaller(Call, ExitStmt, [](d_t Param, d_t Source) { + return Param == Source && Param->getType()->isPointerTy(); + }); } IFDSUninitializedVariables::FlowFunctionPtrType IFDSUninitializedVariables::getCallToRetFlowFunction( - IFDSUninitializedVariables::n_t CallSite, - IFDSUninitializedVariables::n_t /*RetSite*/, - llvm::ArrayRef /*Callees*/) { - //---------------------------------------------------------------------- - // Handle pointer/reference parameters - //---------------------------------------------------------------------- - if (const auto *CS = llvm::dyn_cast(CallSite)) { - return lambdaFlow( - [CS](IFDSUninitializedVariables::d_t Source) - -> std::set { - if (Source->getType()->isPointerTy()) { - for (const auto &Arg : CS->args()) { - if (Arg.get() == Source) { - // do not propagate pointer arguments, since the function may - // initialize them (would be much more precise with - // field-sensitivity) - return {}; - } - } + n_t CallSite, n_t /*RetSite*/, llvm::ArrayRef Callees) { + + const auto *Call = llvm::cast(CallSite); + llvm::SmallBitVector LeakArgs(Call->arg_size()); + llvm::SmallBitVector SanitizerArgs(Call->arg_size()); + + bool HasDeclOnly = false; + + for (const auto *Callee : Callees) { + if (Callee->isDeclaration()) { + HasDeclOnly = true; + for (const auto &Arg : Callee->args()) { + if (!isMatchingType(Arg.getType())) { + continue; + } + if (!Arg.getType()->isPointerTy() || + Arg.hasPassPointeeByValueCopyAttr() || Arg.onlyReadsMemory()) { + LeakArgs.set(Arg.getArgNo()); + } else if (Arg.getType()->isPointerTy()) { + // Here we are unsound! + // We just assume that the function will write to the pointer and + // therefore initialize it + SanitizerArgs.set(Arg.getArgNo()); + } + } + } + } + + if (HasDeclOnly) { + return lambdaFlow([this, Call, LeakArgs{std::move(LeakArgs)}, + SanitizerArgs{std::move(SanitizerArgs)}]( + d_t Source) -> container_type { + // Pass ZeroValue as is + if (LLVMZeroValue::isLLVMZeroValue(Source)) { + return {Source}; + } + // Pass global variables as is + // Need llvm::Constant here to cover also ConstantExpr and + // ConstantAggregate + if (llvm::isa(Source)) { + return {Source}; + } + + unsigned ArgIdx = 0; + for (const auto &Arg : Call->args()) { + if (Arg.get() == Source) { + if (LeakArgs.test(ArgIdx)) { + UndefValueUses[Call].insert(Arg); } - return {Source}; - }); + + if (SanitizerArgs.test(ArgIdx)) { + return {}; + } + + return {Arg.get()}; + } + ++ArgIdx; + } + + return {Source}; + }); } - return Identity::getInstance(); + return mapFactsAlongsideCallSite( + Call, [](d_t Arg) { return !Arg->getType()->isPointerTy(); }); } IFDSUninitializedVariables::FlowFunctionPtrType -IFDSUninitializedVariables::getSummaryFlowFunction( - IFDSUninitializedVariables::n_t /*CallSite*/, - IFDSUninitializedVariables::f_t /*DestFun*/) { +IFDSUninitializedVariables::getSummaryFlowFunction(n_t /*CallSite*/, + f_t /*DestFun*/) { return nullptr; } -InitialSeeds -IFDSUninitializedVariables::initialSeeds() { +auto IFDSUninitializedVariables::initialSeeds() -> InitialSeeds { PHASAR_LOG_LEVEL(DEBUG, "IFDSUninitializedVariables::initialSeeds()"); return createDefaultSeeds(); } -IFDSUninitializedVariables::d_t -IFDSUninitializedVariables::createZeroValue() const { +auto IFDSUninitializedVariables::createZeroValue() const -> d_t { PHASAR_LOG_LEVEL(DEBUG, "IFDSUninitializedVariables::createZeroValue()"); // create a special value to represent the zero value! return LLVMZeroValue::getInstance(); } -bool IFDSUninitializedVariables::isZeroValue( - IFDSUninitializedVariables::d_t Fact) const { +bool IFDSUninitializedVariables::isZeroValue(d_t Fact) const { return LLVMZeroValue::isLLVMZeroValue(Fact); } -void IFDSUninitializedVariables::printNode( - llvm::raw_ostream &OS, IFDSUninitializedVariables::n_t Stmt) const { +void IFDSUninitializedVariables::printNode(llvm::raw_ostream &OS, + n_t Stmt) const { OS << llvmIRToString(Stmt); } -void IFDSUninitializedVariables::printDataFlowFact( - llvm::raw_ostream &OS, IFDSUninitializedVariables::d_t Fact) const { +void IFDSUninitializedVariables::printDataFlowFact(llvm::raw_ostream &OS, + d_t Fact) const { OS << llvmIRToShortString(Fact); } -void IFDSUninitializedVariables::printFunction( - llvm::raw_ostream &OS, IFDSUninitializedVariables::f_t Func) const { +void IFDSUninitializedVariables::printFunction(llvm::raw_ostream &OS, + f_t Func) const { OS << Func->getName(); } void IFDSUninitializedVariables::emitTextReport( - const SolverResults & /*Result*/, - llvm::raw_ostream &OS) { + const SolverResults & /*Result*/, llvm::raw_ostream &OS) { OS << "====================== IFDS-Uninitialized-Analysis Report " "======================\n"; if (UndefValueUses.empty()) { @@ -454,6 +296,13 @@ void IFDSUninitializedVariables::emitTextReport( auto UninitResults = aggregateResults(); OS << "\nTotal uses of uninitialized variables: " << UninitResults.size() << '\n'; + + std::sort(UninitResults.begin(), UninitResults.end(), + [](const UninitResult &R1, const UninitResult &R2) { + return std::tie(R1.FilePath, R1.Line) < + std::tie(R2.FilePath, R2.Line); + }); + size_t Count = 0; for (auto Res : UninitResults) { OS << "\n--------------------------------- " << ++Count @@ -464,40 +313,42 @@ void IFDSUninitializedVariables::emitTextReport( } } -std::vector -IFDSUninitializedVariables::aggregateResults() { - std::vector Results; - unsigned int LineNr = 0; +auto IFDSUninitializedVariables::aggregateResults() + -> std::vector { + std::vector Results; + Results.reserve(UndefValueUses.size()); + unsigned int LineNr = 0; unsigned int CurrLineNr = 0; + UninitResult UR; for (const auto &User : UndefValueUses) { // new line nr idicates a new uninit use on source code level LineNr = getLineFromIR(User.first); if (CurrLineNr != LineNr) { CurrLineNr = LineNr; - UninitResult NewUR; - NewUR.Line = LineNr; - NewUR.FuncName = getFunctionNameFromIR(User.first); - NewUR.FilePath = getFilePathFromIR(User.first); - NewUR.SrcCode = getSrcCodeFromIR(User.first); + if (!UR.empty()) { - Results.push_back(UR); + Results.push_back(std::move(UR)); } - UR = NewUR; + + UR.Line = LineNr; + UR.FuncName = getFunctionNameFromIR(User.first); + UR.FilePath = getFilePathFromIR(User.first); + UR.SrcCode = getSrcCodeFromIR(User.first); } // add current IR trace - UR.IRTrace[User.first] = User.second; + UR.IRTrace.push_back(User); // add (possibly) new variable names for (const auto *UndefV : User.second) { auto VarName = getVarNameFromIR(UndefV); if (!VarName.empty()) { - UR.VarNames.push_back(VarName); + UR.VarNames.push_back(std::move(VarName)); } } } if (!UR.empty()) { - Results.push_back(UR); + Results.push_back(std::move(UR)); } return Results; } @@ -533,10 +384,4 @@ void IFDSUninitializedVariables::UninitResult::print(llvm::raw_ostream &OS) { } } -const std::map> & -IFDSUninitializedVariables::getAllUndefUses() const { - return UndefValueUses; -} - } // namespace psr diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 772bd9629..237ebad33 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -56,6 +56,15 @@ namespace psr { const set HeapAllocationFunctions = {"_Znwm", "_Znam", "malloc", "calloc", "realloc"}; +bool isDefiniteLastUse(const llvm::Use &Use) noexcept { + // TODO: Be more precise here + return isDefiniteLastUse(Use.get()); +} + +bool isDefiniteLastUse(const llvm::Value *Use) noexcept { + return Use->hasNUses(1); +} + bool isFunctionPointer(const llvm::Value *V) noexcept { if (V) { return V->getType()->isPointerTy() && diff --git a/lib/PhasarPass/PhasarPass.cpp b/lib/PhasarPass/PhasarPass.cpp index dc90da2f9..92a159b6b 100644 --- a/lib/PhasarPass/PhasarPass.cpp +++ b/lib/PhasarPass/PhasarPass.cpp @@ -118,7 +118,7 @@ bool PhasarPass::runOnModule(llvm::Module &M) { LLVMTypeSolver.dumpResults(); } } else if (DataFlowAnalysis == "ifds-uninit") { - IFDSUninitializedVariables UninitializedVarProblem(&DB, EntryPoints); + IFDSUninitializedVariables UninitializedVarProblem(&DB, &PT, EntryPoints); IFDSSolver LLVMUnivSolver(UninitializedVarProblem, &I); LLVMUnivSolver.solve(); if (DumpResults) { diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariablesTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariablesTest.cpp index 51d7b0464..6a7bc2a3a 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariablesTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariablesTest.cpp @@ -42,14 +42,29 @@ class IFDSUninitializedVariablesTest : public ::testing::Test { createAnalysisProblem(*HA, EntryPoints); } + void doAnalysis(const llvm::Twine &IRFile, + const map> &GroundTruth) { + HelperAnalyses HA(PathToLlFiles + IRFile, EntryPoints); + auto UninitProblem = + createAnalysisProblem(HA, EntryPoints); + + { + IFDSSolver Solver(UninitProblem, &HA.getICFG()); + Solver.solve(); + } + + compareResults(UninitProblem, GroundTruth); + } + void SetUp() override { ValueAnnotationPass::resetValueID(); } void TearDown() override {} - void compareResults(map> &GroundTruth) { + static void compareResults(const IFDSUninitializedVariables &UninitProblem, + const map> &GroundTruth) { map> FoundUninitUses; - for (const auto &Kvp : UninitProblem->getAllUndefUses()) { + for (const auto &Kvp : UninitProblem.getAllUndefUses()) { auto InstID = stoi(getMetaDataID(Kvp.first)); set UndefValueIds; for (const auto *UV : Kvp.second) { @@ -60,77 +75,51 @@ class IFDSUninitializedVariablesTest : public ::testing::Test { EXPECT_EQ(FoundUninitUses, GroundTruth); } + + void compareResults(map> &GroundTruth) { + compareResults(*UninitProblem, GroundTruth); + } }; // Test Fixture TEST_F(IFDSUninitializedVariablesTest, UninitTest_01_SHOULD_NOT_LEAK) { - initialize({PathToLlFiles + "all_uninit_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); // all_uninit.cpp does not contain undef-uses - map> GroundTruth; - compareResults(GroundTruth); + doAnalysis("all_uninit_cpp_dbg.ll", {}); } TEST_F(IFDSUninitializedVariablesTest, UninitTest_02_SHOULD_LEAK) { - initialize({PathToLlFiles + "binop_uninit_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - // binop_uninit uses uninitialized variable i in 'int j = i + 10;' - map> GroundTruth; - // %4 = load i32, i32* %2, ID: 6 ; %2 is the uninitialized variable i - GroundTruth[6] = {"1"}; - // %5 = add nsw i32 %4, 10 ; %4 is undef, since it is loaded from - // undefined alloca; not sure if it is necessary to report again - GroundTruth[7] = {"6"}; - compareResults(GroundTruth); + doAnalysis( + "binop_uninit_cpp_dbg.ll", + { + // %4 = load i32, i32* %2, ID: 6 ; %2 is the uninitialized variable i + {6, {"1"}}, + }); } -TEST_F(IFDSUninitializedVariablesTest, UninitTest_03_SHOULD_LEAK) { - initialize({PathToLlFiles + "callnoret_c_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); +TEST_F(IFDSUninitializedVariablesTest, UninitTest_03_SHOULD_LEAK) { // callnoret uses uninitialized variable a in 'return a + 10;' of addTen(int) - map> GroundTruth; - - // %4 = load i32, i32* %2 ; %2 is the parameter a of addTen(int) containing - // undef - GroundTruth[5] = {"0"}; - // The same as in test2: is it necessary to report again? - GroundTruth[6] = {"5"}; - // %5 = load i32, i32* %2 ; %2 is the uninitialized variable a - GroundTruth[16] = {"9"}; - // The same as in test2: is it necessary to report again? (the analysis does - // not) - // GroundTruth[17] = {"16"}; - - compareResults(GroundTruth); + doAnalysis("callnoret_c_dbg.ll", + { + // %4 = load i32, i32* %2 ; %2 is the parameter a of + // addTen(int) containing undef + {5, {"0"}}, + // %5 = load i32, i32* %2 ; %2 is the uninitialized variable a + {16, {"9"}}, + }); } TEST_F(IFDSUninitializedVariablesTest, UninitTest_04_SHOULD_NOT_LEAK) { - initialize({PathToLlFiles + "ctor_default_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); // ctor.cpp does not contain undef-uses - map> GroundTruth; - compareResults(GroundTruth); + doAnalysis("ctor_default_cpp_dbg.ll", {}); } TEST_F(IFDSUninitializedVariablesTest, UninitTest_05_SHOULD_NOT_LEAK) { - initialize({PathToLlFiles + "struct_member_init_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); // struct_member_init.cpp does not contain undef-uses - map> GroundTruth; - compareResults(GroundTruth); + doAnalysis("struct_member_init_cpp_dbg.ll", {}); } TEST_F(IFDSUninitializedVariablesTest, UninitTest_06_SHOULD_NOT_LEAK) { - initialize({PathToLlFiles + "struct_member_uninit_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); // struct_member_uninit.cpp does not contain undef-uses - map> GroundTruth; - compareResults(GroundTruth); + doAnalysis("struct_member_uninit_cpp_dbg.ll", {}); } /**************************************************************************************** * fails due to field-insensitivity + struct ignorance + clang compiler hacks @@ -152,95 +141,43 @@ Solver(*UninitProblem, false); Solver.solve(); } *****************************************************************************************/ TEST_F(IFDSUninitializedVariablesTest, UninitTest_08_SHOULD_NOT_LEAK) { - initialize({PathToLlFiles + "global_variable_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); // global_variable.cpp does not contain undef-uses - map> GroundTruth; - compareResults(GroundTruth); + doAnalysis("global_variable_cpp_dbg.ll", {}); } -/**************************************************************************************** - * failssince @i is uninitialized in the c++ code, but initialized in the - * LLVM-IR - * -***************************************************************************************** -TEST_F(IFDSUninitializedVariablesTest, UninitTest_09_SHOULD_LEAK) { - Initialize({pathToLLFiles + "global_variable_cpp_dbg.ll"}); - IFDSSolver -Solver(*UninitProblem, false); Solver.solve(); - // global_variable.cpp does not contain undef-uses - map> GroundTruth; - // load i32, i32* @i - GroundTruth[5] = {"0"}; - compareResults(GroundTruth); -} -*****************************************************************************************/ + TEST_F(IFDSUninitializedVariablesTest, UninitTest_10_SHOULD_LEAK) { - initialize({PathToLlFiles + "return_uninit_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - UninitProblem->emitTextReport(Solver.getSolverResults(), llvm::outs()); - map> GroundTruth; - //%2 = load i32, i32 %1 - GroundTruth[2] = {"0"}; + doAnalysis("return_uninit_cpp_dbg.ll", { + //%2 = load i32, i32 %1 + {2, {"0"}}, + }); // What about this call? // %3 = call i32 @_Z3foov() // GroundTruth[8] = {""}; - compareResults(GroundTruth); } TEST_F(IFDSUninitializedVariablesTest, UninitTest_11_SHOULD_NOT_LEAK) { - - initialize({PathToLlFiles + "sanitizer_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - map> GroundTruth; // all undef-uses are sanitized; // However, the uninitialized variable j is read, which causes the analysis to // report an undef-use // 6 => {2} - - GroundTruth[6] = {"2"}; - compareResults(GroundTruth); + doAnalysis("sanitizer_cpp_dbg.ll", { + {6, {"2"}}, + }); } -//--------------------------------------------------------------------- -// Not relevant any more; Test case covered by UninitTest_11 -//--------------------------------------------------------------------- -/* TEST_F(IFDSUninitializedVariablesTest, UninitTest_12_SHOULD_LEAK) { - Initialize({pathToLLFiles + "sanitizer_uninit_cpp_dbg.ll"}); - IFDSSolver -Solver(*UninitProblem, true); Solver.solve(); - // The sanitized value is not used always => the phi-node is "tainted" - map> GroundTruth; - GroundTruth[6] = {"2"}; - GroundTruth[13] = {"2"}; - compareResults(GroundTruth); -} -*/ TEST_F(IFDSUninitializedVariablesTest, UninitTest_13_SHOULD_NOT_LEAK) { - - initialize({PathToLlFiles + "sanitizer2_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - // The undef-uses do not affect the program behaviour, but are of course still + // The undef-uses do not affect the program behavior, but are of course still // found and reported - map> GroundTruth; - GroundTruth[9] = {"2"}; - compareResults(GroundTruth); + doAnalysis("sanitizer2_cpp_dbg.ll", { + {9, {"2"}}, + }); } -TEST_F(IFDSUninitializedVariablesTest, UninitTest_14_SHOULD_LEAK) { - initialize({PathToLlFiles + "uninit_c_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - map> GroundTruth; - GroundTruth[14] = {"1"}; - GroundTruth[15] = {"2"}; - GroundTruth[16] = {"14", "15"}; - compareResults(GroundTruth); +TEST_F(IFDSUninitializedVariablesTest, UninitTest_14_SHOULD_LEAK) { + doAnalysis("uninit_c_dbg.ll", { + {14, {"1"}}, + {15, {"2"}}, + }); } /**************************************************************************************** * Fails probably due to field-insensitivity @@ -248,10 +185,9 @@ TEST_F(IFDSUninitializedVariablesTest, UninitTest_14_SHOULD_LEAK) { ***************************************************************************************** TEST_F(IFDSUninitializedVariablesTest, UninitTest_15_SHOULD_LEAK) { Initialize({pathToLLFiles + "dyn_mem_cpp_dbg.ll"}); - IFDSSolver -Solver(*UninitProblem, false); Solver.solve(); map> -GroundTruth; + IFDSSolver Solver(*UninitProblem, &HA->getICFG()); + Solver.solve(); + map> GroundTruth; // TODO remove GT[14] and GT[13] GroundTruth[14] = {"3"}; GroundTruth[13] = {"2"}; @@ -275,35 +211,26 @@ GroundTruth; compareResults(GroundTruth); } *****************************************************************************************/ -TEST_F(IFDSUninitializedVariablesTest, UninitTest_16_SHOULD_LEAK) { - - initialize({PathToLlFiles + "growing_example_cpp_dbg.ll"}); - IFDSSolver Solver(*UninitProblem, &HA->getICFG()); - Solver.solve(); - - map> GroundTruth; - // TODO remove GT[11] - GroundTruth[11] = {"0"}; - GroundTruth[16] = {"2"}; - GroundTruth[18] = {"16"}; - GroundTruth[34] = {"24"}; - - compareResults(GroundTruth); +TEST_F(IFDSUninitializedVariablesTest, UninitTest_16_SHOULD_LEAK) { + // TODO: get rid of GT[11] + doAnalysis("growing_example_cpp_dbg.ll", { + {11, {"0"}}, + {16, {"2"}}, + {34, {"24"}}, + }); } /**************************************************************************************** - * Fails due to struct ignorance; general problem with field sensitivity: when - * all structs would be treated as uninitialized per default, the analysis would - * not be able to detect correct constructor calls + * Fails due to struct ignorance; general problem with missing field + * sensitivity: when all structs would be treated as uninitialized per default, + * the analysis would not be able to detect correct constructor calls * ***************************************************************************************** TEST_F(IFDSUninitializedVariablesTest, UninitTest_17_SHOULD_LEAK) { Initialize({pathToLLFiles + "struct_test_cpp.ll"}); - IFDSSolver -Solver(*UninitProblem, false); Solver.solve(); + IFDSSolver Solver(*UninitProblem, &HA->getICFG()); Solver.solve(); map> GroundTruth; // printf should leak both parameters => fails @@ -319,9 +246,7 @@ Solver(*UninitProblem, false); Solver.solve(); TEST_F(IFDSUninitializedVariablesTest, UninitTest_18_SHOULD_NOT_LEAK) { Initialize({pathToLLFiles + "array_init_cpp.ll"}); - IFDSSolver -Solver(*UninitProblem, false); Solver.solve(); + IFDSSolver Solver(*UninitProblem, &HA->getICFG()); Solver.solve(); map> GroundTruth; // @@ -337,9 +262,7 @@ Solver(*UninitProblem, false); Solver.solve(); TEST_F(IFDSUninitializedVariablesTest, UninitTest_19_SHOULD_NOT_LEAK) { Initialize({pathToLLFiles + "array_init_simple_cpp.ll"}); - IFDSSolver -Solver(*UninitProblem, false); Solver.solve(); + IFDSSolver Solver(*UninitProblem, &HA->getICFG()); Solver.solve(); map> GroundTruth; compareResults(GroundTruth); From 0a7bcc7c2806973f28ffabf3c85084bbd7e9aa2b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 3 May 2023 18:30:43 +0200 Subject: [PATCH 2/2] minor --- .../Problems/IFDSUninitializedVariables.h | 9 +++++- .../Problems/IFDSUninitializedVariables.cpp | 31 +++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h index 5e8174d68..b6b794196 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -36,7 +37,13 @@ class IFDSUninitializedVariables std::vector>> IRTrace; [[nodiscard]] bool empty() const; - void print(llvm::raw_ostream &OS); + void print(llvm::raw_ostream &OS) const; + + friend inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const UninitResult &UR) { + UR.print(OS); + return OS; + } }; public: diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp index 2db2c229e..9cea16f27 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSUninitializedVariables.cpp @@ -20,6 +20,7 @@ #include "phasar/Utils/Logger.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" @@ -180,8 +181,9 @@ IFDSUninitializedVariables::getCallToRetFlowFunction( if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || Arg.onlyReadsMemory()) { LeakArgs.set(Arg.getArgNo()); - } else if (Arg.getType()->isPointerTy()) { - // Here we are unsound! + } else if (Arg.getType()->isPointerTy() || + isDefiniteLastUse(Call->getArgOperandUse(Arg.getArgNo()))) { + // Here we may be unsound! // We just assume that the function will write to the pointer and // therefore initialize it SanitizerArgs.set(Arg.getArgNo()); @@ -322,18 +324,27 @@ auto IFDSUninitializedVariables::aggregateResults() unsigned int CurrLineNr = 0; UninitResult UR; + + auto Push = [&UR, &Results] { + if (!UR.empty()) { + std::sort(UR.IRTrace.begin(), UR.IRTrace.end(), + [](const auto &TR1, const auto &TR2) { + return LLVMValueIDLess{}(TR1.first, TR2.first); + }); + Results.push_back(std::move(UR)); + } + }; + for (const auto &User : UndefValueUses) { // new line nr idicates a new uninit use on source code level LineNr = getLineFromIR(User.first); if (CurrLineNr != LineNr) { CurrLineNr = LineNr; - if (!UR.empty()) { - Results.push_back(std::move(UR)); - } + Push(); UR.Line = LineNr; - UR.FuncName = getFunctionNameFromIR(User.first); + UR.FuncName = llvm::demangle(getFunctionNameFromIR(User.first)); UR.FilePath = getFilePathFromIR(User.first); UR.SrcCode = getSrcCodeFromIR(User.first); } @@ -347,9 +358,8 @@ auto IFDSUninitializedVariables::aggregateResults() } } } - if (!UR.empty()) { - Results.push_back(std::move(UR)); - } + + Push(); return Results; } @@ -357,7 +367,8 @@ bool IFDSUninitializedVariables::UninitResult::empty() const { return Line == 0; } -void IFDSUninitializedVariables::UninitResult::print(llvm::raw_ostream &OS) { +void IFDSUninitializedVariables::UninitResult::print( + llvm::raw_ostream &OS) const { OS << "Variable(s): "; if (!VarNames.empty()) { for (size_t I = 0; I < VarNames.size(); ++I) {