From 27c85bcda80ab89204551a9e7c5b256098fb68af Mon Sep 17 00:00:00 2001 From: Schuyler Rosefield Date: Fri, 19 Dec 2025 13:10:40 -0500 Subject: [PATCH 1/3] Try to be smarter with our resolve_gep calls to not duplicate work with resolve_check_bounds - Insert stack tracking at more appropriate location if we find llvm lifetime markers - We only ever invalidate one address at a time, so do that instead of invalidating a range which is more expensive. - Move stack allocations to their own thread_local so we dont need to acquire a lock. - Reduce the number of times we call invalidate_stack - Hint that the resolve_gep branch is likely true. - coalesce neighboring GEP calls --- .gitignore | 3 + Makefile | 11 +- libresolve/src/lib.rs | 4 + libresolve/src/remediate.rs | 186 ++++++++++---- libresolve/src/shadowobjs.rs | 5 + llvm-plugin/Makefile | 7 +- llvm-plugin/src/CMakeLists.txt | 2 +- llvm-plugin/src/CVEAssert/bounds_check.cpp | 282 ++++++++++++++++----- llvm-plugin/tests/regression/heap_oob.c | 4 +- llvm-plugin/tests/regression/stack_oob.c | 10 +- 10 files changed, 386 insertions(+), 128 deletions(-) diff --git a/.gitignore b/.gitignore index ec9f4485..fb75026f 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,7 @@ examples/openssl_reach_out.json reach_wrap_input.json reach_wrap_output.json +resolve_log_*.out +# compiled library folder +comp_libs/ diff --git a/Makefile b/Makefile index 6ffe2b72..22e652a4 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,17 @@ all: build build: build-resolve-facts build-llvm-plugin build-libresolve build-reach build-klee +lib-folder: + mkdir -p comp_libs + build-resolve-facts: resolve-facts +$(MAKE) -C resolve-facts -build-llvm-plugin: llvm-plugin build-libresolve build-resolve-facts - +$(MAKE) -C llvm-plugin +build-llvm-plugin: lib-folder llvm-plugin build-libresolve build-resolve-facts + +$(MAKE) -C llvm-plugin && cp llvm-plugin/build/lib*.so comp_libs/ -build-libresolve: libresolve - cd libresolve && RUSTFLAGS="-D warnings" cargo build --release +build-libresolve: lib-folder libresolve + cd libresolve && RUSTFLAGS="-D warnings" cargo build --release && cp target/release/libresolve.so ../comp_libs test: test-llvm-plugin test-libresolve diff --git a/libresolve/src/lib.rs b/libresolve/src/lib.rs index 85be6de9..298108da 100644 --- a/libresolve/src/lib.rs +++ b/libresolve/src/lib.rs @@ -2,6 +2,10 @@ // LGPL-3; See LICENSE.txt in the repo root for details. #![feature(btree_cursors)] +#![feature(macro_metavar_expr_concat)] +#![feature(test)] +extern crate test; + mod remediate; mod shadowobjs; diff --git a/libresolve/src/remediate.rs b/libresolve/src/remediate.rs index b9d384fd..667dcb93 100644 --- a/libresolve/src/remediate.rs +++ b/libresolve/src/remediate.rs @@ -1,21 +1,13 @@ // Copyright (c) 2025 Riverside Research. // LGPL-3; See LICENSE.txt in the repo root for details. -use libc::{ - c_char, c_void, calloc, free, malloc, realloc, strdup, strlen, strndup, strnlen, -}; +use libc::{c_char, c_void, calloc, free, malloc, realloc, strdup, strlen, strndup, strnlen}; -use crate::shadowobjs::{ALIVE_OBJ_LIST, AllocType, FREED_OBJ_LIST, ShadowObject, Vaddr}; +use crate::shadowobjs::{ + ALIVE_OBJ_LIST, AllocType, FREED_OBJ_LIST, STACK_OBJ_LIST, ShadowObject, Vaddr, +}; use log::{error, info, trace, warn}; -/** - * NOTE - * Libresolve supports adding shadow objects for stack objects - * but we do not currently support removing stack objects from - * ALIVE_OBJ_LIST once the stack objects are freed at the end - * of a function scope - **/ - /** * @brief - Allocator interface for stack objects * @input - size of the pointer allocation in bytes @@ -24,28 +16,61 @@ use log::{error, info, trace, warn}; #[unsafe(no_mangle)] pub extern "C" fn resolve_stack_obj(ptr: *mut c_void, size: usize) -> () { let base = ptr as Vaddr; + + STACK_OBJ_LIST.with_borrow_mut(|l| { + l.add_shadow_object(AllocType::Stack, base, size); + }); + + /* { let mut obj_list = ALIVE_OBJ_LIST.lock(); obj_list.add_shadow_object(AllocType::Stack, base, size); } + */ info!("[STACK] Object allocated with size: {size}, address: 0x{base:x}"); } +/* #[unsafe(no_mangle)] -pub extern "C" fn resolve_invalidate_stack(base: *mut c_void, limit: *mut c_void) { +pub extern "C" fn resolve_invalidate_stack(base: *mut c_void) { let base = base as Vaddr; - let limit = limit as Vaddr; - { - let mut obj_list = ALIVE_OBJ_LIST.lock(); - // TODO: Add these to a free list? - obj_list.invalidate_region(base, limit); - } + STACK_OBJ_LIST.with_borrow_mut(|l| { + l.invalidate_at(base); + }); + + info!("[STACK] Free address 0x{base:x}"); +} +*/ + +macro_rules! invalidate_stacks { + ($name:ident; $($ns:literal),+) => { + +#[unsafe(no_mangle)] +pub extern "C" fn $name($(${concat(base_, $ns)}: *mut c_void, )+) { + + STACK_OBJ_LIST.with_borrow_mut(|l| { + $( + l.invalidate_at(${concat(base_, $ns)} as Vaddr); + info!("[STACK] Free address 0x{:x}", ${concat(base_, $ns)} as Vaddr); + )+ + }); - info!("[STACK] Free range 0x{base:x}..=0x{limit:x}"); } + } +} + +// the x64 ABI allows up to 6 arguments to be passed via register, +// so provide that many functions as we cannot define a variadic +invalidate_stacks!(resolve_invalidate_stack; 1); +invalidate_stacks!(resolve_invalidate_stack_2; 1, 2); +invalidate_stacks!(resolve_invalidate_stack_3; 1, 2, 3); +invalidate_stacks!(resolve_invalidate_stack_4; 1, 2, 3, 4); +invalidate_stacks!(resolve_invalidate_stack_5; 1, 2, 3, 4, 5); +invalidate_stacks!(resolve_invalidate_stack_6; 1, 2, 3, 4, 5, 6); + /** * @brief - Allocator logging interface for malloc * @input - size of the allocation in bytes @@ -78,15 +103,46 @@ pub extern "C" fn resolve_malloc(size: usize) -> *mut c_void { * @input * - ptr: unique pointer root * - derived: pointer derived from unique root ptr + * - max_access: the largest size of a load/store to the derived address. * @return valid pointer within bounds of allocation or * pointer 1-past limit of allocation */ #[unsafe(no_mangle)] -pub extern "C" fn resolve_gep(ptr: *mut c_void, derived: *mut c_void) -> *mut c_void { +pub extern "C" fn resolve_gep( + ptr: *mut c_void, + derived: *mut c_void, + max_access: usize, +) -> *mut c_void { let base = ptr as Vaddr; let derived = derived as Vaddr; + let contains_or_err = |obj: &ShadowObject| { + // If shadow object exists then check if the access is within bounds + if obj.contains(ShadowObject::limit(derived, max_access)) { + trace!( + "[GEP] ptr {max_access}x{derived:x} valid for base 0x{base:x}, obj: {}@0x{:x}", + obj.size(), + obj.base + ); + return derived as *mut c_void; + } + + error!( + "[GEP] ptr {max_access}x{derived:x} not valid for base 0x{base:x}, obj: {}@0x{:x}", + obj.size(), + obj.base + ); + + // Pointer known-invalid, return NULL to indicate failure + 0 as *mut c_void + }; + + // Check the local stack list first since it is cheaper. + if let Some(sobj) = STACK_OBJ_LIST.with_borrow(|l| l.search_intersection(base).cloned()) { + return contains_or_err(&sobj); + }; + let sobj_table = ALIVE_OBJ_LIST.lock(); // Look up the shadow object corresponding to this access. @@ -103,27 +159,9 @@ pub extern "C" fn resolve_gep(ptr: *mut c_void, derived: *mut c_void) -> *mut c_ return derived as *mut c_void; }; - // If shadow object exists then check if the access is within bounds - if sobj.contains(derived as Vaddr) { - info!( - "[GEP] ptr 0x{derived:x} valid for base 0x{base:x}, obj: {}@0x{:x}", - sobj.size(), - sobj.base - ); - return derived as *mut c_void; - } - - error!( - "[GEP] ptr 0x{derived:x} not valid for base 0x{base:x}, obj: {}@0x{:x}", - sobj.size(), - sobj.base - ); - - // Return 1-past limit of allocation @ ptr - sobj.past_limit() as *mut c_void + contains_or_err(sobj) } - /** * @brief - Allocator logging interface for free * @input - ptr to the allocation @@ -184,10 +222,10 @@ pub extern "C" fn resolve_free(ptr: *mut c_void) -> () { #[unsafe(no_mangle)] pub extern "C" fn resolve_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { // Edge cases - // 1. returned memory may not be allocated + // 1. returned memory may not be allocated // 2. pointer passed to realloc may be NULL // 3. size fits within original allocation (returns the original ptr) - + // Consideration: Pointer passed in may be invalidated so we need a mechanism // to remove the shadow object for the orignal allocation let realloc_ptr = unsafe { realloc(ptr, size + 1) }; @@ -196,7 +234,6 @@ pub extern "C" fn resolve_realloc(ptr: *mut c_void, size: usize) -> *mut c_void return realloc_ptr; } - { let mut obj_list = ALIVE_OBJ_LIST.lock(); // Remove shadow object for original pointer @@ -319,27 +356,34 @@ pub extern "C" fn resolve_strndup(ptr: *mut c_char, size: usize) -> *mut c_char pub extern "C" fn resolve_check_bounds(base_ptr: *mut c_void, size: usize) -> bool { let base = base_ptr as Vaddr; - let sobj_table = ALIVE_OBJ_LIST.lock(); - - // Look up the shadow object corresponding to this access - if let Some(sobj) = sobj_table.search_intersection(base) { + let contains_or_err = |obj: &ShadowObject| { // If shadow object exists then check if the access is within bounds - if sobj.contains(ShadowObject::limit(base, size)) { + if obj.contains(ShadowObject::limit(base, size)) { // Access in Bounds trace!( "[BOUNDS] Access allowed {size}@0x{base:x} for allocation {}@0x{:x}", - sobj.size(), - sobj.base + obj.size(), + obj.base ); return true; } else { error!( "[BOUNDS] OOB access at {size}@0x{base:x} too big for allocation {}@0x{:x}", - sobj.size(), - sobj.base + obj.size(), + obj.base ); return false; } + }; + + if let Some(sobj) = STACK_OBJ_LIST.with_borrow(|l| l.search_intersection(base).cloned()) { + return contains_or_err(&sobj); + } + + // Look up the shadow object corresponding to this access + let sobj_table = ALIVE_OBJ_LIST.lock(); + if let Some(sobj) = sobj_table.search_intersection(base) { + return contains_or_err(sobj); } // Check if this is an invalid pointer for one of the known shadow objects @@ -352,6 +396,8 @@ pub extern "C" fn resolve_check_bounds(base_ptr: *mut c_void, size: usize) -> bo return false; } + warn!("[BOUNDS] unknown pointer 0x:{base:x}"); + // Not a tracked pointer, assume good to avoid trapping on otherwise valid pointers // TODO: add a strict mode to reject here / add extra tracking. true @@ -361,13 +407,21 @@ pub extern "C" fn resolve_check_bounds(base_ptr: *mut c_void, size: usize) -> bo pub extern "C" fn resolve_obj_type(base_ptr: *mut c_void) -> AllocType { let base = base_ptr as Vaddr; - let find_in = |table: &crate::MutexWrap| { - let t = table.lock(); - t.search_intersection(base).map(|o| o.alloc_type) + let find_in = |table: &crate::shadowobjs::ShadowObjectTable| { + table.search_intersection(base).map(|o| o.alloc_type) }; // Why does this search freed before alive? - let alloc_type = find_in(&FREED_OBJ_LIST).or_else(|| find_in(&ALIVE_OBJ_LIST)); + let alloc_type = STACK_OBJ_LIST + .with_borrow(|l| find_in(l)) + .or_else(|| { + let l = FREED_OBJ_LIST.lock(); + find_in(&l) + }) + .or_else(|| { + let l = ALIVE_OBJ_LIST.lock(); + find_in(&l) + }); alloc_type.unwrap_or(AllocType::Unknown) } @@ -398,6 +452,7 @@ pub extern "C" fn resolve_report_sanitizer_triggered() -> () { mod tests { use super::*; use crate::{resolve_init, shadowobjs::AllocType}; + use test::Bencher; #[test] fn test_malloc_free() { @@ -481,4 +536,23 @@ mod tests { resolve_free(p); } } + + #[bench] + fn bench_resolve_stack(b: &mut Bencher) { + resolve_init(); + + let addrs: Vec<_> = (0x7FFF_0000_0000_0000..0x7FFF_0000_0001_0000) + .map(|a: usize| a as *mut c_void) + .collect(); + + b.iter(|| { + addrs.iter().for_each(|a| resolve_stack_obj(*a, 1)); + + addrs.iter().for_each(|&a| { + let _ = resolve_gep(a, a, 1); + }); + + addrs.iter().for_each(|a| resolve_invalidate_stack(*a)); + }); + } } diff --git a/libresolve/src/shadowobjs.rs b/libresolve/src/shadowobjs.rs index a7e3b7fe..368a616a 100644 --- a/libresolve/src/shadowobjs.rs +++ b/libresolve/src/shadowobjs.rs @@ -2,6 +2,7 @@ // LGPL-3; See LICENSE.txt in the repo root for details. use crate::MutexWrap; +use std::cell::RefCell; use std::collections::BTreeMap; use std::ops::RangeInclusive; use std::ops::Bound::Included; @@ -94,6 +95,7 @@ impl ShadowObjectTable { } /// Removes any allocation with a base address within the supplied region + #[allow(dead_code)] pub fn invalidate_region(&mut self, base: Vaddr, limit: Vaddr) { self.table .extract_if(base..=limit, |_, _| true) @@ -130,6 +132,9 @@ impl ShadowObjectTable { } // static object lists to store all objects +thread_local! { + pub static STACK_OBJ_LIST: RefCell = RefCell::new(ShadowObjectTable::new()); +} pub static ALIVE_OBJ_LIST: MutexWrap = MutexWrap::new(ShadowObjectTable::new()); pub static FREED_OBJ_LIST: MutexWrap = MutexWrap::new(ShadowObjectTable::new()); diff --git a/llvm-plugin/Makefile b/llvm-plugin/Makefile index 8c74d06b..30388a15 100644 --- a/llvm-plugin/Makefile +++ b/llvm-plugin/Makefile @@ -1,10 +1,11 @@ -all: build test -.PHONY: clangformat test +.PHONY: all build clangformat test + +all: build test build: src mkdir -p build - cd build && cmake ../src && make -j4 + cmake -S ./src -B ./build && cd build && make -j4 clangformat: src cd build && make clangformat diff --git a/llvm-plugin/src/CMakeLists.txt b/llvm-plugin/src/CMakeLists.txt index e6b73235..de5841a8 100644 --- a/llvm-plugin/src/CMakeLists.txt +++ b/llvm-plugin/src/CMakeLists.txt @@ -13,7 +13,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RelWithDebInfo) endif() -# set(CMAKE_CXX_STANDARD 17 STRING "") +set(CMAKE_CXX_STANDARD 20 STRING "") # LLVM is normally built without RTTI. Be consistent with that. if(NOT LLVM_ENABLE_RTTI) diff --git a/llvm-plugin/src/CVEAssert/bounds_check.cpp b/llvm-plugin/src/CVEAssert/bounds_check.cpp index 61713687..7519f50b 100644 --- a/llvm-plugin/src/CVEAssert/bounds_check.cpp +++ b/llvm-plugin/src/CVEAssert/bounds_check.cpp @@ -8,6 +8,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -24,6 +25,81 @@ static const bool FIND_PTR_ROOT_DEBUG = true; using namespace llvm; +static Function * getOrCreateResolveGepSanitizer(Module *M, LLVMContext &Ctx, Vulnerability::RemediationStrategies strategy) { + std::string handlerName = "resolve_gep_sanitized"; + + if (auto handler = M->getFunction(handlerName)) { + return handler; + } + + IRBuilder<> builder(Ctx); + + auto ptr_ty = PointerType::get(Ctx, 0); + auto size_ty = Type::getInt64Ty(Ctx); + + FunctionType *sanitizeGepFnTy= FunctionType::get( + ptr_ty, + { ptr_ty, ptr_ty, size_ty }, + false + ); + + Function *sanitizeGepFn = Function::Create( + sanitizeGepFnTy, + Function::InternalLinkage, + handlerName, + M + ); + + BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); + BasicBlock *NormalGepBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); + BasicBlock *SanitizeGepBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); + + // Call simplified resolve-check-bounds on pointer if not in sobj table call remediation strategy + FunctionType *resolveGepFnTy = FunctionType::get( + ptr_ty, + { ptr_ty, ptr_ty, size_ty }, + false + ); + + FunctionCallee resolveGepFn = M->getOrInsertFunction( + "resolve_gep", + resolveGepFnTy + ); + + Value *basePtr = sanitizeGepFn->getArg(0); + Value *derivedPtr = sanitizeGepFn->getArg(1); + Value *elSize = sanitizeGepFn->getArg(2); + + builder.SetInsertPoint(EntryBB); + Value * gepResult = builder.CreateCall(resolveGepFn, { basePtr, derivedPtr, elSize }); + + // resolve_gep will return the derived pointer if it is allowed + MDBuilder metadata(Ctx); + // By default llvm assumes pointer equality comparisons are unlikely to be true + // in our case it is the opposite and they should be always true unless there is an error. + auto weights = metadata.createBranchWeights(1000000, 1); + Value * withinBounds = builder.CreateICmpEQ(derivedPtr, gepResult); + builder.CreateCondBr(withinBounds, NormalGepBB, SanitizeGepBB, weights); + + // NormalGepBB: Return the loaded value. + builder.SetInsertPoint(NormalGepBB); + builder.CreateRet(derivedPtr); + + // SanitizeGepBB: Apply remediation strategy + builder.SetInsertPoint(SanitizeGepBB); + builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); + builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); + builder.CreateRet(Constant::getNullValue(ptr_ty)); + + // DEBUGGING + raw_ostream &out = errs(); + out << *sanitizeGepFn; + if (verifyFunction(*sanitizeGepFn, &out)) { + } + + return sanitizeGepFn; +} + static Function *getOrCreateBoundsCheckLoadSanitizer(Module *M, LLVMContext &Ctx, Type *ty, Vulnerability::RemediationStrategies strategy) { std::string handlerName = "resolve_bounds_check_ld_" + getLLVMType(ty); @@ -303,37 +379,115 @@ void instrumentAlloca(Function *F) { auto void_ty = Type::getVoidTy(Ctx); // Initialize list to store pointers to alloca and instructions - std::vector allocaList; + std::vector toFreeList; + + auto invalidateFn = M->getOrInsertFunction( + "resolve_invalidate_stack", + FunctionType::get(void_ty, { ptr_ty }, false) + ); + + auto handle_alloca = [&](auto* allocaInst) { + bool hasStart = false; + bool hasEnd = false; + + Type *allocatedType = allocaInst->getAllocatedType(); + uint64_t typeSize = DL.getTypeAllocSize(allocatedType); + + for (auto* user: allocaInst->users()) { + if( auto* call = dyn_cast(user)) { + auto called = call->getCalledFunction(); + if (called && called->getName().starts_with("llvm.lifetime.start")) { + hasStart = true; + builder.SetInsertPoint(call->getNextNode()); + builder.CreateCall(getResolveStackObj(M), { allocaInst, ConstantInt::get(size_ty, typeSize)}); + } + + if (called && called->getName().starts_with("llvm.lifetime.end")) { + hasEnd = true; + builder.SetInsertPoint(call->getNextNode()); + builder.CreateCall(invalidateFn, { allocaInst}); + } + } + } + + // This is probably always true unless we are given malformed input. + assert(hasStart == hasEnd); + if (hasStart) { return; } + // Otherwise Insert after the alloca instruction + builder.SetInsertPoint(allocaInst->getNextNode()); + builder.CreateCall(getResolveStackObj(M), { allocaInst, ConstantInt::get(size_ty, typeSize)}); + // If we have not added an invalidate call already make sure we do so later. + toFreeList.push_back(allocaInst); + }; for (auto &BB: *F) { for (auto &instr: BB) { if (auto *inst = dyn_cast(&instr)) { - allocaList.push_back(inst); + handle_alloca(inst); } } } - for (auto* allocaInst: allocaList) { - // Insert after the alloca instruction - builder.SetInsertPoint(allocaInst->getNextNode()); - Value* allocatedPtr = allocaInst; - Value *sizeVal = nullptr; - Type *allocatedType = allocaInst->getAllocatedType(); - uint64_t typeSize = DL.getTypeAllocSize(allocatedType); - sizeVal = ConstantInt::get(size_ty, typeSize); - builder.CreateCall(getResolveStackObj(M), { allocatedPtr, sizeVal }); - } - - // Find low and high allocations and pass to resolve_invaliate_stack - if (allocaList.empty()) { + if (toFreeList.empty()) { return; } - auto invalidateFn = M->getOrInsertFunction( - "resolve_invalidate_stack", + auto invalidateFn2 = M->getOrInsertFunction( + "resolve_invalidate_stack_2", FunctionType::get(void_ty, { ptr_ty, ptr_ty }, false) ); + auto invalidateFn3 = M->getOrInsertFunction( + "resolve_invalidate_stack_3", + FunctionType::get(void_ty, { ptr_ty, ptr_ty, ptr_ty }, false) + ); + + auto invalidateFn4 = M->getOrInsertFunction( + "resolve_invalidate_stack_4", + FunctionType::get(void_ty, { ptr_ty, ptr_ty, ptr_ty, ptr_ty }, false) + ); + + auto invalidateFn5 = M->getOrInsertFunction( + "resolve_invalidate_stack_5", + FunctionType::get(void_ty, { ptr_ty, ptr_ty, ptr_ty, ptr_ty, ptr_ty }, false) + ); + + auto invalidateFn6 = M->getOrInsertFunction( + "resolve_invalidate_stack_6", + FunctionType::get(void_ty, { ptr_ty, ptr_ty, ptr_ty, ptr_ty, ptr_ty, ptr_ty }, false) + ); + + + // Try to reduce the number of calls to invalidate each of the stack addrs. + // the x64 ABI allows us to pass up to 6 arguments in registers, so libresolve provides functions with up to arity 6. + auto invalidate_all_at = [&](auto* inst) { + builder.SetInsertPoint(inst); + auto size = toFreeList.size(); + for (auto i = 0; i < toFreeList.size(); i += 6) { + switch ((size - i) % 6) { + case 1: + builder.CreateCall(invalidateFn, { toFreeList[i] }); + break; + case 2: + builder.CreateCall(invalidateFn2, { toFreeList[i], toFreeList[i+1] }); + break; + case 3: + builder.CreateCall(invalidateFn3, { toFreeList[i], toFreeList[i+1], toFreeList[i+2] }); + break; + case 4: + builder.CreateCall(invalidateFn4, { toFreeList[i], toFreeList[i+1], toFreeList[i+2], toFreeList[i+3] }); + break; + case 5: + builder.CreateCall(invalidateFn5, { toFreeList[i], toFreeList[i+1], toFreeList[i+2], toFreeList[i+3], toFreeList[i+4] }); + break; + // 6 + case 0: + builder.CreateCall(invalidateFn6, { toFreeList[i], toFreeList[i+1], toFreeList[i+2], toFreeList[i+3], toFreeList[i+4], toFreeList[i+5] }); + break; + } + } + }; + // Stack grows down, so first allocation is high, last is low // Hmm.. compiler seems to be reordering the allocas in ways // that break this assumption @@ -342,11 +496,7 @@ void instrumentAlloca(Function *F) { for (auto &BB: *F) { for (auto &instr: BB) { if (auto *inst = dyn_cast(&instr)) { - builder.SetInsertPoint(inst); - // builder.CreateCall(invalidateFn, { low, high }); - for (auto *alloca: allocaList) { - builder.CreateCall(invalidateFn, { alloca, alloca }); - } + invalidate_all_at(inst); } } } @@ -447,56 +597,62 @@ void instrumentCalloc(Function *F) { } } -void instrumentGEP(Function *F) { +void instrumentGEP(Function *F, Vulnerability::RemediationStrategies strategy) { Module *M = F->getParent(); LLVMContext &Ctx = M->getContext(); IRBuilder<> builder(Ctx); const DataLayout &DL = M->getDataLayout(); - std::vector gepList; auto ptr_ty = PointerType::get(Ctx, 0); + auto size_ty = Type::getInt64Ty(Ctx); - FunctionType *resolveGEPFnTy = FunctionType::get( - ptr_ty, - { ptr_ty, ptr_ty }, - false - ); + std::unordered_set visitedGep; - FunctionCallee resolveGEPFn = M->getOrInsertFunction( - "resolve_gep", - resolveGEPFnTy - ); + auto resolveGepFn = getOrCreateResolveGepSanitizer(M, Ctx, strategy); - for (auto &BB : *F) { - for (auto &inst: BB) { - if (auto *gep = dyn_cast(&inst)) { - gepList.push_back(gep); - } - } - } + auto handle_gep = [&](auto* gep) { - for (auto GEPInst: gepList) { - builder.SetInsertPoint(GEPInst->getNextNode()); - - // Get the pointer operand and offset from GEP - Value *basePtr = GEPInst->getPointerOperand(); - Value * derivedPtr = GEPInst; - - // Don't assume gep is inbounds, otherwise our remdiation risks being optimized away - GEPInst->setIsInBounds(false); + if (visitedGep.contains(gep)) { + return; + } - auto resolveGEPCall = builder.CreateCall(resolveGEPFn, { basePtr, derivedPtr }); + Value* basePtr = gep->getPointerOperand(); + GetElementPtrInst* derivedPtr = gep; + gep->setIsInBounds(false); + + // If we are chaining geps we don't need to check each individually, only the total range in the end. + while (derivedPtr->hasOneUser()) { + if (auto* gep2 = dyn_cast(derivedPtr->user_back())) { + gep2->setIsInBounds(false); + visitedGep.insert(gep2); + derivedPtr = gep2; + } else { + break; + } + } - // Collect users of gep instruction before mutation SmallVector gep_users; - for (User *U : GEPInst->users()) { + for (User *U : derivedPtr->users()) { gep_users.push_back(U); } + builder.SetInsertPoint(derivedPtr->getNextNode()); + auto resolveGepCall = builder.CreateCall(resolveGepFn, { basePtr, derivedPtr, ConstantExpr::getSizeOf(derivedPtr->getResultElementType()) }); + // Iterate over all the users of the gep instruction and - // replace there operands with resolve_gep result + // replace their operands with resolve_gep result for (User *U : gep_users) { - if (U != resolveGEPCall) { - U->replaceUsesOfWith(GEPInst, resolveGEPCall); + if (U != resolveGepCall) { + U->replaceUsesOfWith(derivedPtr, resolveGepCall); + } + } + + visitedGep.insert(gep); + }; + + for (auto &BB : *F) { + for (auto &inst: BB) { + if (auto *gep = dyn_cast(&inst)) { + handle_gep(gep); } } } @@ -575,6 +731,12 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg if (alloca->getAllocatedType() == valueTy) continue; } + // If we already checked with `resolve_gep` the pointer will be valid. + if (auto *call = dyn_cast(ptr)) { + auto called = call->getCalledFunction(); + if (called && called->getName() == "resolve_gep_sanitized") { continue; } + } + auto loadFn = getOrCreateBoundsCheckLoadSanitizer(F->getParent(), F->getContext(), valueTy, strategy); @@ -595,6 +757,12 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg if (alloca->getAllocatedType() == valueTy) continue; } + // If we already checked with `resolve_gep` the pointer will be valid. + if (auto *call= dyn_cast(ptr)) { + auto called = call->getCalledFunction(); + if (called && called->getName() == "resolve_gep_sanitized") { continue; } + } + auto storeFn = getOrCreateBoundsCheckStoreSanitizer( F->getParent(), F->getContext(), valueTy, strategy ); @@ -606,7 +774,7 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg } void sanitizeMemInstBounds(Function *F, Vulnerability::RemediationStrategies strategy) { - instrumentGEP(F); + instrumentGEP(F, strategy); sanitizeMemcpy(F, strategy); sanitizeLoadStore(F, strategy); -} \ No newline at end of file +} diff --git a/llvm-plugin/tests/regression/heap_oob.c b/llvm-plugin/tests/regression/heap_oob.c index 93d8d76b..5839ec72 100644 --- a/llvm-plugin/tests/regression/heap_oob.c +++ b/llvm-plugin/tests/regression/heap_oob.c @@ -10,7 +10,7 @@ // CHECK: call ptr @resolve_malloc // RUN: RESOLVE_LABEL_CVE=vulnerabilities/heap_oob.json %clang -fpass-plugin=%plugin \ // RUN: -L%rlib -lresolve -Wl,-rpath=%rlib %s -o %t.exe -// RUN: %t.exe; test $? -eq 3 +// RUN: RUST_LOG=off %t.exe; test $? -eq 3 #include #include @@ -23,4 +23,4 @@ int main() { int x = ptr[2]; return x; -} \ No newline at end of file +} diff --git a/llvm-plugin/tests/regression/stack_oob.c b/llvm-plugin/tests/regression/stack_oob.c index 44e756ff..e282aafa 100644 --- a/llvm-plugin/tests/regression/stack_oob.c +++ b/llvm-plugin/tests/regression/stack_oob.c @@ -16,25 +16,25 @@ // Test that the remediation is successful // RUN: RESOLVE_LABEL_CVE=vulnerabilities/stack_oob.json %clang -fpass-plugin=%plugin \ // RUN: -L%rlib -lresolve -Wl,-rpath=%rlib %s -o %t.exe -// RUN: %t.exe 11; EXIT_CODE=$?; \ +// RUN: RUST_LOG=off %t.exe 11; EXIT_CODE=$?; \ // RUN: echo Remediated exit: $EXIT_CODE; test $EXIT_CODE -eq 3 // // Test that the remediation is successful with optimizations // RUN: RESOLVE_LABEL_CVE=vulnerabilities/stack_oob.json %clang -O3 -fpass-plugin=%plugin \ // RUN: -L%rlib -lresolve -Wl,-rpath=%rlib %s -o %t.exe -// RUN: %t.exe 11; EXIT_CODE=$?; \ -// RUN: echo Remediated exit: $EXIT_CODE; test $EXIT_CODE -eq 3 +// RUN: RUST_LOG=off %t.exe 11; EXIT_CODE=$?; \ +// RUN: echo Remediated exit with optimizations: $EXIT_CODE; test $EXIT_CODE -eq 3 // // Test that the remediation is successful (underflow) // RUN: RESOLVE_LABEL_CVE=vulnerabilities/stack_oob.json %clang -fpass-plugin=%plugin \ // RUN: -L%rlib -lresolve -Wl,-rpath=%rlib %s -o %t.exe -// RUN: %t.exe -2; EXIT_CODE=$?; \ +// RUN: RUST_LOG=off %t.exe -2; EXIT_CODE=$?; \ // RUN: echo Remediated \(underflow\) exit: $EXIT_CODE; test $EXIT_CODE -eq 3 // // Test that the normal behavior is preserved // RUN: RESOLVE_LABEL_CVE=vulnerabilities/stack_oob.json %clang -O0 -g -fpass-plugin=%plugin \ // RUN: -L%rlib -lresolve -Wl,-rpath=%rlib %s -o %t.exe -// RUN: %t.exe 2; EXIT_CODE=$?; \ +// RUN: RUST_LOG=off %t.exe 2; EXIT_CODE=$?; \ // RUN: echo Normal exit: $EXIT_CODE; test $EXIT_CODE -eq 42 #include From 9dfe0b0c5e94887038e718f4a144856cd104f12d Mon Sep 17 00:00:00 2001 From: Schuyler Rosefield Date: Mon, 5 Jan 2026 13:29:39 -0500 Subject: [PATCH 2/3] Try to refactor some of the pointer checking code to deduplicate in pursuit of merging bounds checks and the actual access --- llvm-plugin/src/CVEAssert/bounds_check.cpp | 330 ++++++++++----------- 1 file changed, 151 insertions(+), 179 deletions(-) diff --git a/llvm-plugin/src/CVEAssert/bounds_check.cpp b/llvm-plugin/src/CVEAssert/bounds_check.cpp index 7519f50b..58c3f22e 100644 --- a/llvm-plugin/src/CVEAssert/bounds_check.cpp +++ b/llvm-plugin/src/CVEAssert/bounds_check.cpp @@ -25,6 +25,53 @@ static const bool FIND_PTR_ROOT_DEBUG = true; using namespace llvm; +static FunctionCallee getResolveGep(Module *M) { + auto &Ctx = M->getContext(); + auto ptr_ty = PointerType::get(Ctx, 0); + auto size_ty = Type::getInt64Ty(Ctx); + + return M->getOrInsertFunction( + "resolve_gep", + FunctionType::get(ptr_ty, { ptr_ty, ptr_ty, size_ty }, false) + ); +} + +static FunctionCallee getResolveCheckBounds(Module *M) { + auto &Ctx = M->getContext(); + auto ptr_ty = PointerType::get(Ctx, 0); + auto size_ty = Type::getInt64Ty(Ctx); + + FunctionType *resolveCheckBoundsFnTy = FunctionType::get( + Type::getInt1Ty(Ctx), + { ptr_ty, size_ty }, + false + ); + + return M->getOrInsertFunction( + "resolve_check_bounds", + resolveCheckBoundsFnTy + ); +} + +template +void verifyPointerThen(Module* M, LLVMContext& Ctx, Vulnerability::RemediationStrategies strategy, IRBuilder<>& builder, Function* hostFn, CF condFn, TF thenFn, EF elseFn) { + + BasicBlock *entryBB = BasicBlock::Create(Ctx, "", hostFn); + BasicBlock *normalBB = BasicBlock::Create(Ctx, "", hostFn); + BasicBlock *sanitizeBB = BasicBlock::Create(Ctx, "", hostFn); + + builder.SetInsertPoint(entryBB); + condFn(normalBB, sanitizeBB); + + builder.SetInsertPoint(normalBB); + thenFn(); + + builder.SetInsertPoint(sanitizeBB); + builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); + builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); + elseFn(); +} + static Function * getOrCreateResolveGepSanitizer(Module *M, LLVMContext &Ctx, Vulnerability::RemediationStrategies strategy) { std::string handlerName = "resolve_gep_sanitized"; @@ -50,46 +97,31 @@ static Function * getOrCreateResolveGepSanitizer(Module *M, LLVMContext &Ctx, Vu M ); - BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); - BasicBlock *NormalGepBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); - BasicBlock *SanitizeGepBB = BasicBlock::Create(Ctx, "", sanitizeGepFn); - // Call simplified resolve-check-bounds on pointer if not in sobj table call remediation strategy - FunctionType *resolveGepFnTy = FunctionType::get( - ptr_ty, - { ptr_ty, ptr_ty, size_ty }, - false - ); - - FunctionCallee resolveGepFn = M->getOrInsertFunction( - "resolve_gep", - resolveGepFnTy - ); + auto resolveGepFn = getResolveGep(M); Value *basePtr = sanitizeGepFn->getArg(0); Value *derivedPtr = sanitizeGepFn->getArg(1); Value *elSize = sanitizeGepFn->getArg(2); - builder.SetInsertPoint(EntryBB); - Value * gepResult = builder.CreateCall(resolveGepFn, { basePtr, derivedPtr, elSize }); - - // resolve_gep will return the derived pointer if it is allowed - MDBuilder metadata(Ctx); - // By default llvm assumes pointer equality comparisons are unlikely to be true - // in our case it is the opposite and they should be always true unless there is an error. - auto weights = metadata.createBranchWeights(1000000, 1); - Value * withinBounds = builder.CreateICmpEQ(derivedPtr, gepResult); - builder.CreateCondBr(withinBounds, NormalGepBB, SanitizeGepBB, weights); - - // NormalGepBB: Return the loaded value. - builder.SetInsertPoint(NormalGepBB); - builder.CreateRet(derivedPtr); - - // SanitizeGepBB: Apply remediation strategy - builder.SetInsertPoint(SanitizeGepBB); - builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); - builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); - builder.CreateRet(Constant::getNullValue(ptr_ty)); + verifyPointerThen(M, Ctx, strategy, builder, sanitizeGepFn, + [&](auto normal, auto sanitize) { + Value * gepResult = builder.CreateCall(resolveGepFn, { basePtr, derivedPtr, elSize }); + // resolve_gep will return the derived pointer if it is allowed + MDBuilder metadata(Ctx); + // By default llvm assumes pointer equality comparisons are unlikely to be true + // in our case it is the opposite and they should be always true unless there is an error. + auto weights = metadata.createBranchWeights(1000000, 1); + Value * withinBounds = builder.CreateICmpEQ(derivedPtr, gepResult); + builder.CreateCondBr(withinBounds, normal, sanitize, weights); + }, + [&]() { + builder.CreateRet(derivedPtr); + }, + [&]() { + builder.CreateRet(Constant::getNullValue(ptr_ty)); + } + ); // DEBUGGING raw_ostream &out = errs(); @@ -125,39 +157,24 @@ static Function *getOrCreateBoundsCheckLoadSanitizer(Module *M, LLVMContext &Ctx M ); - BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", sanitizeLoadFn); - BasicBlock *NormalLoadBB = BasicBlock::Create(Ctx, "", sanitizeLoadFn); - BasicBlock *SanitizeLoadBB = BasicBlock::Create(Ctx, "", sanitizeLoadFn); - - // Call simplified resolve-check-bounds on pointer if not in sobj table call remediation strategy - FunctionType *resolveCheckBoundsFnTy = FunctionType::get( - Type::getInt1Ty(Ctx), - { ptr_ty, size_ty }, - false - ); - - FunctionCallee resolveCheckBoundsFn = M->getOrInsertFunction( - "resolve_check_bounds", - resolveCheckBoundsFnTy - ); + auto resolveCheckBoundsFn = getResolveCheckBounds(M); Value *basePtr = sanitizeLoadFn->getArg(0); - builder.SetInsertPoint(EntryBB); - Value *withinBounds = builder.CreateCall(resolveCheckBoundsFn, { basePtr, ConstantExpr::getSizeOf(ty) }); - - builder.CreateCondBr(withinBounds, NormalLoadBB, SanitizeLoadBB); - - // NormalLoadBB: Return the loaded value. - builder.SetInsertPoint(NormalLoadBB); - LoadInst *load = builder.CreateLoad(ty, basePtr); - builder.CreateRet(load); - - // SanitizeLoadBB: Apply remediation strategy - builder.SetInsertPoint(SanitizeLoadBB); - builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); - builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); - builder.CreateRet(Constant::getNullValue(ty)); + verifyPointerThen(M, Ctx, strategy, builder, sanitizeLoadFn, + [&](auto normal, auto sanitize) { + Value *withinBounds = builder.CreateCall(resolveCheckBoundsFn, { basePtr, ConstantExpr::getSizeOf(ty) }); + + builder.CreateCondBr(withinBounds, normal, sanitize); + }, + [&]() { + LoadInst *load = builder.CreateLoad(ty, basePtr); + builder.CreateRet(load); + }, + [&]() { + builder.CreateRet(Constant::getNullValue(ty)); + } + ); // DEBUGGING raw_ostream &out = errs(); @@ -195,40 +212,25 @@ static Function *getOrCreateBoundsCheckStoreSanitizer(Module *M, LLVMContext &Ct M ); - BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", sanitizeStoreFn); - BasicBlock *NormalStoreBB = BasicBlock::Create(Ctx, "", sanitizeStoreFn); - BasicBlock *SanitizeStoreBB = BasicBlock::Create(Ctx, "", sanitizeStoreFn); - - FunctionType *resolveCheckBoundsFnTy = FunctionType::get( - Type::getInt1Ty(Ctx), - { ptr_ty, size_ty }, - false - ); - - FunctionCallee resolveCheckBoundsFn = M->getOrInsertFunction( - "resolve_check_bounds", - resolveCheckBoundsFnTy - ); + auto resolveCheckBoundsFn = getResolveCheckBounds(M); Value *basePtr = sanitizeStoreFn->getArg(0); Value *storedVal = sanitizeStoreFn->getArg(1); - builder.SetInsertPoint(EntryBB); - - Value *withinBounds = builder.CreateCall(resolveCheckBoundsFn, - { basePtr, ConstantExpr::getSizeOf(ty) }); - - builder.CreateCondBr(withinBounds, NormalStoreBB, SanitizeStoreBB); - // NormalStoreBB: Store value @ addr - builder.SetInsertPoint(NormalStoreBB); - StoreInst *store = builder.CreateStore(storedVal, basePtr); - builder.CreateRetVoid(); - - // SanitizeStoreBB: Apply remediation strategy - builder.SetInsertPoint(SanitizeStoreBB); - builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); - builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); - builder.CreateRetVoid(); + verifyPointerThen(M, Ctx, strategy, builder, sanitizeStoreFn, + [&](auto normal, auto sanitize) { + Value *withinBounds = builder.CreateCall(resolveCheckBoundsFn, { basePtr, ConstantExpr::getSizeOf(ty) }); + + builder.CreateCondBr(withinBounds, normal, sanitize); + }, + [&]() { + StoreInst *store = builder.CreateStore(storedVal, basePtr); + builder.CreateRetVoid(); + }, + [&]() { + builder.CreateRetVoid(); + } + ); // DEBUGGING raw_ostream &out = errs(); @@ -262,51 +264,34 @@ static Function *getOrCreateBoundsCheckMemcpySanitizer(Module *M, Vulnerability: M ); - BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", sanitizeMemcpyFn); - BasicBlock *NormalBB = BasicBlock::Create(Ctx, "", sanitizeMemcpyFn); - BasicBlock *SanitizeMemcpyBB = BasicBlock::Create(Ctx, "", sanitizeMemcpyFn); - - FunctionType *resolveCheckBoundsFnTy = FunctionType::get( - Type::getInt1Ty(Ctx), - { ptr_ty, size_ty }, - false - ); - - FunctionCallee resolveCheckBoundsFn = M->getOrInsertFunction( - "resolve_check_bounds", - resolveCheckBoundsFnTy - ); - - // EntryBB: Call libresolve check_bounds runtime function - // to verify correct bounds of allocation - builder.SetInsertPoint(EntryBB); + auto resolveCheckBoundsFn = getResolveCheckBounds(M); // Extract dst, src, size arguments from function Value *dst_ptr = sanitizeMemcpyFn->getArg(0); Value *src_ptr = sanitizeMemcpyFn->getArg(1); Value *size_arg = sanitizeMemcpyFn->getArg(2); - Value *check_src_bd = - builder.CreateCall(resolveCheckBoundsFn, { src_ptr, size_arg }); - Value *check_dst_bd = - builder.CreateCall(resolveCheckBoundsFn, { dst_ptr, size_arg}); - - Value *withinBounds = builder.CreateAnd(check_src_bd, check_dst_bd); - builder.CreateCondBr(withinBounds, NormalBB, SanitizeMemcpyBB); - - // NormalBB: Call memcpy and return the ptr - builder.SetInsertPoint(NormalBB); - FunctionCallee memcpyfn = M->getOrInsertFunction( - "memcpy", FunctionType::get(ptr_ty, {ptr_ty, ptr_ty, size_ty}, false)); - Value *memcpy_ptr = - builder.CreateCall(memcpyfn, {dst_ptr, src_ptr, size_arg}); - builder.CreateRet(memcpy_ptr); - - // SanitizeMemcpyBB: Remediate memcpy returns null pointer. - builder.SetInsertPoint(SanitizeMemcpyBB); - builder.CreateCall(getOrCreateResolveReportSanitizerTriggered(M)); - builder.CreateCall(getOrCreateRemediationBehavior(M, strategy)); - builder.CreateRet(dst_ptr); + verifyPointerThen(M, Ctx, strategy, builder, sanitizeMemcpyFn, + [&](auto normal, auto sanitize) { + Value *check_src_bd = + builder.CreateCall(resolveCheckBoundsFn, { src_ptr, size_arg }); + Value *check_dst_bd = + builder.CreateCall(resolveCheckBoundsFn, { dst_ptr, size_arg}); + + Value *withinBounds = builder.CreateAnd(check_src_bd, check_dst_bd); + builder.CreateCondBr(withinBounds, normal, sanitize); + }, + [&]() { + FunctionCallee memcpyfn = M->getOrInsertFunction( + "memcpy", FunctionType::get(ptr_ty, {ptr_ty, ptr_ty, size_ty}, false)); + Value *memcpy_ptr = + builder.CreateCall(memcpyfn, {dst_ptr, src_ptr, size_arg}); + builder.CreateRet(memcpy_ptr); + }, + [&]() { + builder.CreateRet(dst_ptr); + } + ); // DEBUGGING raw_ostream &out = errs(); @@ -690,9 +675,6 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg LLVMContext &Ctx = F->getContext(); IRBuilder<> builder(Ctx); - std::vector loadList; - std::vector storeList; - switch(strategy) { // case Vulnerability::RemediationStrategies::CONTINUE-WRAP: /* TODO: Not yet supported. Implement this remediaion strategy */ // case Vulnerability::RemediationStrategies::CONTINUE-ZERO: /* TODO: Not yet supported. Implement this remediation strategy */ @@ -709,67 +691,57 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg break; } + Module* M = F->getParent(); - for (auto &BB : *F) { - for (auto &I : BB) { - if (auto *load = dyn_cast(&I)) { - loadList.push_back(load); - } else if (auto *store = dyn_cast(&I)) { - storeList.push_back(store); - } + auto handleLoadStore = [&](auto* inst) { + builder.SetInsertPoint(inst); + auto ptr = inst->getPointerOperand(); + Type* value_ty; + constexpr bool is_store = std::is_same_v; + if constexpr (is_store) { + value_ty = inst->getValueOperand()->getType(); + } else { + value_ty = inst->getType(); } - } - - for (auto Inst : loadList) { - builder.SetInsertPoint(Inst); - auto ptr = Inst->getPointerOperand(); - auto valueTy = Inst->getType(); // Skip trivially correct accesses to stack values in this function (i.e., most automatic variables) // Skip if ptr is an alloca and types are the same if (auto *alloca = dyn_cast(ptr)) { - if (alloca->getAllocatedType() == valueTy) continue; + if (alloca->getAllocatedType() == value_ty) { return; } } // If we already checked with `resolve_gep` the pointer will be valid. if (auto *call = dyn_cast(ptr)) { auto called = call->getCalledFunction(); - if (called && called->getName() == "resolve_gep_sanitized") { continue; } + if (called && called->getName() == "resolve_gep_sanitized") { return; } } - auto loadFn = getOrCreateBoundsCheckLoadSanitizer(F->getParent(), - F->getContext(), valueTy, strategy); - - auto sanitizedLoad = builder.CreateCall(loadFn, { ptr }); - Inst->replaceAllUsesWith(sanitizedLoad); - Inst->removeFromParent(); - Inst->deleteValue(); - } - - for (auto Inst: storeList) { - builder.SetInsertPoint(Inst); - auto ptr = Inst->getPointerOperand(); - auto valueTy = Inst->getValueOperand()->getType(); + if constexpr (is_store) { + auto storeFn = getOrCreateBoundsCheckStoreSanitizer( + F->getParent(), Ctx, value_ty, strategy + ); - // Skip trivially correct accesses to stack values in this function (i.e., most automatic variables) - // Skip if ptr is an alloca and types are the same - if (auto *alloca = dyn_cast(ptr)) { - if (alloca->getAllocatedType() == valueTy) continue; - } + auto sanitizedStore = builder.CreateCall(storeFn, { ptr, inst->getValueOperand() }); + } else { + auto loadFn = getOrCreateBoundsCheckLoadSanitizer(F->getParent(), + Ctx, value_ty, strategy); - // If we already checked with `resolve_gep` the pointer will be valid. - if (auto *call= dyn_cast(ptr)) { - auto called = call->getCalledFunction(); - if (called && called->getName() == "resolve_gep_sanitized") { continue; } + auto sanitizedLoad = builder.CreateCall(loadFn, { ptr }); + inst->replaceAllUsesWith(sanitizedLoad); } - auto storeFn = getOrCreateBoundsCheckStoreSanitizer( - F->getParent(), F->getContext(), valueTy, strategy - ); + inst->removeFromParent(); + inst->deleteValue(); + }; - auto sanitizedStore = builder.CreateCall(storeFn, { ptr, Inst->getValueOperand() }); - Inst->removeFromParent(); - Inst->deleteValue(); + for (auto &BB : *F) { + for (auto &I : BB) { + if (auto *load = dyn_cast(&I)) { + handleLoadStore(load); + } else if (auto *store = dyn_cast(&I)) { + handleLoadStore(store); + } + } } } From 28941ac6a948dc4066ec2dc74fc4e10847c78cf8 Mon Sep 17 00:00:00 2001 From: Schuyler Rosefield Date: Mon, 5 Jan 2026 14:28:59 -0500 Subject: [PATCH 3/3] Note on behavior when using SAFE remediation --- llvm-plugin/src/CVEAssert/bounds_check.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llvm-plugin/src/CVEAssert/bounds_check.cpp b/llvm-plugin/src/CVEAssert/bounds_check.cpp index 58c3f22e..efa34b47 100644 --- a/llvm-plugin/src/CVEAssert/bounds_check.cpp +++ b/llvm-plugin/src/CVEAssert/bounds_check.cpp @@ -693,6 +693,9 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg Module* M = F->getParent(); + std::vector loadInsts; + std::vector storeInsts; + auto handleLoadStore = [&](auto* inst) { builder.SetInsertPoint(inst); auto ptr = inst->getPointerOperand(); @@ -713,6 +716,10 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg // If we already checked with `resolve_gep` the pointer will be valid. if (auto *call = dyn_cast(ptr)) { auto called = call->getCalledFunction(); + // TODO: in the SAFE strategy we want to elide the load/store instruction entirely. + // It should in theory be possible to coalesce the check and access into one call. + // This is somewhat complicated by a ptr being used for multiple loads/stores. + // e.g. %105 = load i8, ptr %104; ... store i8 %105, %104 if (called && called->getName() == "resolve_gep_sanitized") { return; } } @@ -737,12 +744,19 @@ void sanitizeLoadStore(Function *F, Vulnerability::RemediationStrategies strateg for (auto &BB : *F) { for (auto &I : BB) { if (auto *load = dyn_cast(&I)) { - handleLoadStore(load); + loadInsts.push_back(load); } else if (auto *store = dyn_cast(&I)) { - handleLoadStore(store); + storeInsts.push_back(store); } } } + + for (auto& inst: loadInsts) { + handleLoadStore(inst); + } + for (auto& inst: storeInsts) { + handleLoadStore(inst); + } } void sanitizeMemInstBounds(Function *F, Vulnerability::RemediationStrategies strategy) {