diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6fa958b..104975a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,11 +29,11 @@ jobs: # Windows - os: windows-latest build_type: Debug - test_bin_path: Debug\\PolyHook_2.exe + test_bin_path: Debug/PolyHook_2.exe - os: windows-latest build_type: Release - test_bin_path: Release\\PolyHook_2.exe + test_bin_path: Release/PolyHook_2.exe - os: windows-latest bitness: 32 @@ -92,7 +92,7 @@ jobs: # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: > cmake -B ${{ env.BUILD_DIR }} - -S ${{ github.workspace }} + -DPOLYHOOK_BUILD_DLL="ON" -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} @@ -103,15 +103,9 @@ jobs: # the default Windows generator is a multi-config generator (Visual Studio generator). run: cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build_type }} - # TODO: Deduplicate this command - name: Configure CMake for test run: > cmake -B ${{ env.BUILD_DIR }} - -S ${{ github.workspace }} - -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} - -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - ${{ matrix.arch }} -DPOLYHOOK_BUILD_DLL="OFF" - name: Build tests diff --git a/CMakeLists.txt b/CMakeLists.txt index 525a754..7bf0055 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -389,6 +389,7 @@ endif() #Tests if(NOT POLYHOOK_BUILD_DLL) + target_compile_definitions(${PROJECT_NAME} PRIVATE PLH_DIAGNOSTICS) target_sources(${PROJECT_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/MainTests.cpp ${PROJECT_SOURCE_DIR}/UnitTests/${POLYHOOK_OS}/TestDisassembler.cpp diff --git a/UnitTests/TestUtils.hpp b/UnitTests/TestUtils.hpp index 8ed2d17..869eaee 100644 --- a/UnitTests/TestUtils.hpp +++ b/UnitTests/TestUtils.hpp @@ -25,7 +25,7 @@ noexcept(noexcept(std::declval()(std::declval()...))) -> auto { \ PLH::StackCanary canary; \ PLH_STOP_OPTIMIZATIONS(); \ - effects.PeakEffect().trigger(); \ + effects.PeakEffect().trigger(); \ __VA_ARGS__ \ return PLH::FnCast(TRMP, &FUNC)($args...); \ } @@ -47,8 +47,8 @@ * or ReleaseWithDebInfo mode (ReleaseWithDebInfo optimizes _slightly_ less). */ #define PLH_STOP_OPTIMIZATIONS() \ - volatile int i = 0; \ - PH_UNUSED(i) + volatile int i = 0; \ + PH_UNUSED(i) namespace PLH::test { diff --git a/UnitTests/linux/TestDetourTranslationx64.cpp b/UnitTests/linux/TestDetourTranslationx64.cpp index d4e8004..9d0075a 100644 --- a/UnitTests/linux/TestDetourTranslationx64.cpp +++ b/UnitTests/linux/TestDetourTranslationx64.cpp @@ -23,10 +23,13 @@ PLH_TEST_DETOUR_CALLBACK(dlmopen, { TEST_CASE("Testing Detours with Translations", "[Translation][ADetour]") { PLH::test::registerTestLogger(); +// dlmopen may or may not have instructions requiring translations. +// Hence, this test was disabled. We need to construct reliable synthetic tests instead. +#if 0 SECTION("dlmopen (INPLACE)") { PLH::StackCanary canary; - const auto *resultBefore = dlmopen(LM_ID_BASE, LIBM_SO, RTLD_NOW); + const auto* handleBefore = dlmopen(LM_ID_BASE, LIBM_SO, RTLD_NOW); PLH::x64Detour detour((uint64_t)dlmopen, (uint64_t)dlmopen_hooked, &dlmopen_trmp); // Only INPLACE creates conditions for translation, since @@ -36,11 +39,13 @@ TEST_CASE("Testing Detours with Translations", "[Translation][ADetour]") { REQUIRE(detour.hook()); effects.PushEffect(); - const auto *resultAfter = dlmopen(LM_ID_BASE, LIBM_SO, RTLD_NOW); + const auto* handleAfter = dlmopen(LM_ID_BASE, LIBM_SO, RTLD_NOW); + REQUIRE(detour.hasDiagnostic(PLH::Diagnostic::TranslatedInstructions)); REQUIRE(effects.PopEffect().didExecute()); - REQUIRE(resultAfter == resultBefore); + REQUIRE(handleAfter == handleBefore); REQUIRE(detour.unHook()); } +#endif } diff --git a/UnitTests/linux/TestDetourx64.cpp b/UnitTests/linux/TestDetourx64.cpp index 7043447..16a9e66 100644 --- a/UnitTests/linux/TestDetourx64.cpp +++ b/UnitTests/linux/TestDetourx64.cpp @@ -70,7 +70,7 @@ unsigned char hookMe5[] = { 0x48, 0xFF, 0xA0, 0x20, 0x01, 0x00, 0x00 // jmp qword ptr[rax+120h] }; -uint64_t nullTramp = NULL; +uint64_t nullTramp = 0; NOINLINE void h_nullstub() { PLH::StackCanary canary; PLH_STOP_OPTIMIZATIONS(); diff --git a/UnitTests/linux/TestDetourx86.cpp b/UnitTests/linux/TestDetourx86.cpp index 02e463e..845e47f 100644 --- a/UnitTests/linux/TestDetourx86.cpp +++ b/UnitTests/linux/TestDetourx86.cpp @@ -14,6 +14,8 @@ namespace { EffectTracker effects; +constexpr uint8_t JUMP_SIZE = 5; + } NOINLINE int __cdecl hookMe1() { @@ -97,6 +99,31 @@ NOINLINE void PH_ATTR_NAKED hookMeLoop() { "ret;"); } +extern "C" NOINLINE uintptr_t PH_ATTR_NAKED returnESP() { + __asm__ __volatile__ ( + "movl (%esp), %eax\n" + "ret" + ); +} + +NOINLINE uintptr_t PH_ATTR_NAKED readESP() { + __asm__ __volatile__ ( + "call returnESP\n" + "ret" + ); +} +PLH_TEST_DETOUR_CALLBACK(readESP); + +NOINLINE uintptr_t PH_ATTR_NAKED inlineReadESP() { + __asm__ __volatile__ ( + "call 0f\n" + "0: pop %%eax\n" + "ret" + ::: "eax" + ); +} +PLH_TEST_DETOUR_CALLBACK(inlineReadESP); + PLH_TEST_DETOUR_CALLBACK(hookMeLoop); // PLH_TEST_DETOUR_CALLBACK doesn't support variadic functions yet. @@ -114,7 +141,7 @@ NOINLINE int h_hookPrintf(const char *format, ...) { return PLH::FnCast(hookPrintfTramp, &printf)("INTERCEPTED YO:%s", message.c_str()); } -// must specify specific overload of std::pow by assiging to pFn of type +// must specify specific overload of std::pow by assigning to pFn of type const auto &pow_double = std::pow; PLH_TEST_DETOUR_CALLBACK(pow_double); @@ -186,6 +213,35 @@ TEST_CASE("Testing x86 detours", "[x86Detour][ADetour]") { REQUIRE(detour.unHook() == true); } + SECTION("Test #215 (call to routine returning ESP)") { + PLH::x86Detour PLH_TEST_DETOUR(readESP); + REQUIRE(detour.hook() == true); + + effects.PushEffect(); + const auto esp = readESP(); + REQUIRE(esp == (uintptr_t)readESP + JUMP_SIZE); + REQUIRE(detour.hasDiagnostic(PLH::Diagnostic::FixedCallToRoutineReadingSP)); + REQUIRE(effects.PopEffect().didExecute()); + REQUIRE(detour.unHook() == true); + } + + SECTION("Test #217 (inline call to read ESP)") { + PLH::x86Detour PLH_TEST_DETOUR(inlineReadESP); + REQUIRE(detour.hook() == true); + + effects.PushEffect(); + const auto esp = inlineReadESP(); + REQUIRE(esp == (uintptr_t)inlineReadESP + JUMP_SIZE); + REQUIRE(detour.hasDiagnostic(PLH::Diagnostic::FixedInlineCallToReadSP)); + REQUIRE(effects.PopEffect().didExecute()); + REQUIRE(detour.unHook() == true); + } + +#ifndef NDEBUG + // This test is disabled in Release builds due to aggressive optimization of compilers. + // Specifically with clang on Linux the std::pow function is always inlined. + // Hence, the hooked function is never called. + // it's a pun... // ^ what pun? nothing found on the web >.< SECTION("hook pow") { @@ -199,6 +255,7 @@ TEST_CASE("Testing x86 detours", "[x86Detour][ADetour]") { detour.unHook(); REQUIRE(effects.PopEffect().didExecute()); } +#endif SECTION("hook malloc") { PLH::x86Detour PLH_TEST_DETOUR(malloc); @@ -216,8 +273,6 @@ TEST_CASE("Testing x86 detours", "[x86Detour][ADetour]") { REQUIRE(detour.hook() == true); } - // TODO: Fix this. when making jmpToProl, relative displacement can only encode offset up to 0x7FFFFFFF - // But during tests, distance between prologue and trampoline was larger than that, leading to incorrect jump. SECTION("hook printf") { PLH::x86Detour detour((uint64_t)&printf, (uint64_t)h_hookPrintf, &hookPrintfTramp); REQUIRE(detour.hook() == true); diff --git a/UnitTests/linux/TestDisassembler.cpp b/UnitTests/linux/TestDisassembler.cpp index 4b2cd09..d681d4d 100644 --- a/UnitTests/linux/TestDisassembler.cpp +++ b/UnitTests/linux/TestDisassembler.cpp @@ -263,7 +263,7 @@ TEMPLATE_TEST_CASE("Test Disassemblers x86 FF25", "[ZydisDisassembler]", PLH::Zy auto Instructions = disasm.disassemble( (uint64_t)x86ASM_FF25.data(), (uint64_t)x86ASM_FF25.data(), - (uint64_t)(x86ASM_FF25.data() + address_length), + (uint64_t)(jmp_address_ptr + address_length), PLH::MemAccessor() ); diff --git a/UnitTests/windows/TestDetourx64.cpp b/UnitTests/windows/TestDetourx64.cpp index ccfcac1..6b8e0cd 100644 --- a/UnitTests/windows/TestDetourx64.cpp +++ b/UnitTests/windows/TestDetourx64.cpp @@ -9,7 +9,7 @@ #include "polyhook2/PolyHookOsIncludes.hpp" -#include +#include #define WIN32_LEAN_AND_MEAN #include diff --git a/polyhook2/Detour/ADetour.hpp b/polyhook2/Detour/ADetour.hpp index 83a06b7..a4ae837 100644 --- a/polyhook2/Detour/ADetour.hpp +++ b/polyhook2/Detour/ADetour.hpp @@ -65,6 +65,30 @@ class Detour : public PLH::IHook { void setIsFollowCallOnFnAddress(bool value); + + /** + * @return instructions of a routine if the provided instruction is a call instruction that calls a routine + * which returns an address stored SP register, which refers to the address of the next instruction following + * the call instruction. This is used to fix issue #215. + * An example routine looks like this (eax could be any general-purpose register): + * + * 8B 04 24 | mov eax, dword ptr [esp] + * C3 | ret + * + */ + std::optional getRoutineReturningSP(const Instruction& callInst); + + /** + * @return true if the provided instruction is a call instruction which has next address being its destination + * and which is followed by a `pop reg` instruction. This is used to fix issue #217. + * An example inline routine looks like this (ebx could be any general-purpose register): + * + * 0x59d57b24 | e8 00 00 00 00 | call 0x59D57B29 -> 59d57b29 + * 0x59d57b29 | 5b | pop ebx + * + */ + bool isCallInlineReturningSP(const Instruction& callInst); + protected: uint64_t m_fnAddress; uint64_t m_fnCallback; diff --git a/polyhook2/Detour/ILCallback.hpp b/polyhook2/Detour/ILCallback.hpp index e9630e5..103b227 100644 --- a/polyhook2/Detour/ILCallback.hpp +++ b/polyhook2/Detour/ILCallback.hpp @@ -1,76 +1,76 @@ -#ifndef POLYHOOK_2_0_ILCALLBACK_HPP -#define POLYHOOK_2_0_ILCALLBACK_HPP - -#pragma warning(push, 0) -#include -#pragma warning( pop ) - -#pragma warning( disable : 4200) -#include "polyhook2/PolyHookOs.hpp" -#include "polyhook2/ErrorLog.hpp" -#include "polyhook2/Enums.hpp" -#include "polyhook2/MemAccessor.hpp" - -namespace PLH { - class ILCallback : public MemAccessor { - public: - struct Parameters { - template - void setArg(const uint8_t idx, const T val) const { - *(T*)getArgPtr(idx) = val; - } - - template - T getArg(const uint8_t idx) const { - return *(T*)getArgPtr(idx); - } - - // asm depends on this specific type - // we the ILCallback allocates stack space that is set to point here - volatile uint64_t m_arguments; - private: - // must be char* for aliasing rules to work when reading back out - char* getArgPtr(const uint8_t idx) const { - return ((char*)&m_arguments) + sizeof(uint64_t) * idx; - } - }; - - struct ReturnValue { - unsigned char* getRetPtr() const { - return (unsigned char*)&m_retVal; - } - uint64_t m_retVal; - }; - - typedef void(*tUserCallback)(const Parameters* params, const uint8_t count, const ReturnValue* ret); - - ILCallback(); - ~ILCallback(); - - /* Construct a callback given the raw signature at runtime. 'Callback' param is the C stub to transfer to, - where parameters can be modified through a structure which is written back to the parameter slots depending - on calling convention.*/ - uint64_t getJitFunc(const asmjit::FuncSignature& sig, const asmjit::Arch arch, const tUserCallback callback); - - /* Construct a callback given the typedef as a string. Types are any valid C/C++ data type (basic types), and pointers to - anything are just a uintptr_t. Calling convention is defaulted to whatever is typical for the compiler you use, you can override with - stdcall, fastcall, or cdecl (cdecl is default on x86). On x64 those map to the same thing.*/ - uint64_t getJitFunc(const std::string& retType, const std::vector& paramTypes, const asmjit::Arch arch, const tUserCallback callback, std::string callConv = ""); - uint64_t* getTrampolineHolder(); - private: - // does a given type fit in a general purpose register (i.e. is it integer type) - bool isGeneralReg(const asmjit::TypeId typeId) const; - // float, double, simd128 - bool isXmmReg(const asmjit::TypeId typeId) const; - - asmjit::CallConvId getCallConv(const std::string& conv); - asmjit::TypeId getTypeId(const std::string& type); - - uint64_t m_callbackBuf; - asmjit::x86::Mem argsStack; - - // ptr to trampoline allocated by hook, we hold this so user doesn't need to. - uint64_t m_trampolinePtr; - }; -} -#endif // POLYHOOK_2_0_ILCALLBACK_HPP +#ifndef POLYHOOK_2_0_ILCALLBACK_HPP +#define POLYHOOK_2_0_ILCALLBACK_HPP + +#pragma warning(push, 0) +#include +#pragma warning( pop ) + +#pragma warning( disable : 4200) +#include "polyhook2/PolyHookOs.hpp" +#include "polyhook2/ErrorLog.hpp" +#include "polyhook2/Enums.hpp" +#include "polyhook2/MemAccessor.hpp" + +namespace PLH { + class ILCallback : public MemAccessor { + public: + struct Parameters { + template + void setArg(const uint8_t idx, const T val) const { + *(T*)getArgPtr(idx) = val; + } + + template + T getArg(const uint8_t idx) const { + return *(T*)getArgPtr(idx); + } + + // asm depends on this specific type + // we the ILCallback allocates stack space that is set to point here + volatile uint64_t m_arguments; + private: + // must be char* for aliasing rules to work when reading back out + char* getArgPtr(const uint8_t idx) const { + return ((char*)&m_arguments) + sizeof(uint64_t) * idx; + } + }; + + struct ReturnValue { + unsigned char* getRetPtr() const { + return (unsigned char*)&m_retVal; + } + uint64_t m_retVal; + }; + + typedef void(*tUserCallback)(const Parameters* params, const uint8_t count, const ReturnValue* ret); + + ILCallback(); + ~ILCallback(); + + /* Construct a callback given the raw signature at runtime. 'Callback' param is the C stub to transfer to, + where parameters can be modified through a structure which is written back to the parameter slots depending + on calling convention.*/ + uint64_t getJitFunc(const asmjit::FuncSignature& sig, const asmjit::Arch arch, const tUserCallback callback); + + /* Construct a callback given the typedef as a string. Types are any valid C/C++ data type (basic types), and pointers to + anything are just a uintptr_t. Calling convention is defaulted to whatever is typical for the compiler you use, you can override with + stdcall, fastcall, or cdecl (cdecl is default on x86). On x64 those map to the same thing.*/ + uint64_t getJitFunc(const std::string& retType, const std::vector& paramTypes, const asmjit::Arch arch, const tUserCallback callback, std::string callConv = ""); + uint64_t* getTrampolineHolder(); + private: + // does a given type fit in a general purpose register (i.e. is it integer type) + bool isGeneralReg(const asmjit::TypeId typeId) const; + // float, double, simd128 + bool isXmmReg(const asmjit::TypeId typeId) const; + + asmjit::CallConvId getCallConv(const std::string& conv); + asmjit::TypeId getTypeId(const std::string& type); + + uint64_t m_callbackBuf; + asmjit::x86::Mem argsStack; + + // ptr to trampoline allocated by hook, we hold this so user doesn't need to. + uint64_t m_trampolinePtr; + }; +} +#endif // POLYHOOK_2_0_ILCALLBACK_HPP diff --git a/polyhook2/Detour/x64Detour.hpp b/polyhook2/Detour/x64Detour.hpp index 0a45eb6..878c401 100644 --- a/polyhook2/Detour/x64Detour.hpp +++ b/polyhook2/Detour/x64Detour.hpp @@ -9,10 +9,8 @@ #include "polyhook2/Detour/ADetour.hpp" #include "polyhook2/Enums.hpp" #include "polyhook2/Instruction.hpp" -#include "polyhook2/ZydisDisassembler.hpp" -#include "polyhook2/ErrorLog.hpp" #include "polyhook2/RangeAllocator.hpp" -#include +#include namespace PLH { diff --git a/polyhook2/Detour/x86Detour.hpp b/polyhook2/Detour/x86Detour.hpp index f5cf2f3..f81aea7 100644 --- a/polyhook2/Detour/x86Detour.hpp +++ b/polyhook2/Detour/x86Detour.hpp @@ -7,9 +7,6 @@ #include "polyhook2/Detour/ADetour.hpp" #include "polyhook2/Enums.hpp" #include "polyhook2/Instruction.hpp" -#include "polyhook2/ZydisDisassembler.hpp" -#include "polyhook2/ErrorLog.hpp" -#include "polyhook2/MemProtector.hpp" using namespace std::placeholders; @@ -26,6 +23,10 @@ class x86Detour : public Detour { Mode getArchType() const override; protected: + bool fixSpecialCases(insts_t& prologue); + bool fixCallRoutineReturningSP(Instruction& callInst, const insts_t& routine); + bool fixCallInlineReturningSP(Instruction& callInst); + bool makeTrampoline(insts_t& prologue, insts_t& trampolineOut); }; diff --git a/polyhook2/Enums.hpp b/polyhook2/Enums.hpp index 82911c8..416b39a 100644 --- a/polyhook2/Enums.hpp +++ b/polyhook2/Enums.hpp @@ -18,17 +18,6 @@ enum class HookType { }; -//unsafe enum by design to allow binary OR -enum ProtFlag : uint8_t { - UNSET = 0, // Value means this give no information about protection state (un-read) - X = 1 << 1, - R = 1 << 2, - W = 1 << 3, - S = 1 << 4, - P = 1 << 5, - NONE = 1 << 6, //The value equaling the linux flag PROT_UNSET (read the prot, and the prot is unset) - RWX = R | W | X -}; /* Used by detours class only. This doesn't live in instruction because it * only makes sense for specific jump instructions (perhaps re-factor instruction @@ -55,18 +44,46 @@ enum class ErrorLevel { NONE }; -} +//unsafe enum by design to allow binary OR +enum class ProtFlag : uint8_t { + UNSET = 0, // Value means this give no information about protection state (un-read) + X = 1 << 1, + R = 1 << 2, + W = 1 << 3, + S = 1 << 4, + P = 1 << 5, + NONE = 1 << 6, //The value equaling the linux flag PROT_UNSET (read the prot, and the prot is unset) + RWX = R | W | X +}; -inline PLH::ProtFlag operator|(PLH::ProtFlag lhs, PLH::ProtFlag rhs) { - using underlying = typename std::underlying_type::type; - return static_cast ( +inline ProtFlag operator|(const ProtFlag lhs, const ProtFlag rhs) { + using underlying = std::underlying_type::type; + return static_cast ( static_cast(lhs) | static_cast(rhs) ); } -inline bool operator&(PLH::ProtFlag lhs, PLH::ProtFlag rhs) { - using underlying = typename std::underlying_type::type; +inline bool operator&(const ProtFlag lhs, const ProtFlag rhs) { + using underlying = std::underlying_type_t; return static_cast(lhs) & static_cast(rhs); } + +#ifdef PLH_DIAGNOSTICS +enum class Diagnostic : uint32_t { + None = 0, + TranslatedInstructions = 1 << 0, + FixedCallToRoutineReadingSP = 1 << 1, // #215 + FixedInlineCallToReadSP = 1 << 2, // #217 +}; +// Bitwise operators +inline Diagnostic operator|(const uint32_t flags, const Diagnostic diagnostic) { + return static_cast(flags | static_cast(diagnostic)); +} +inline Diagnostic operator&(const uint32_t flags, const Diagnostic diagnostic) { + return static_cast(flags & static_cast(diagnostic)); +} +#endif + +} diff --git a/polyhook2/IHook.hpp b/polyhook2/IHook.hpp index d659519..de8fdf0 100644 --- a/polyhook2/IHook.hpp +++ b/polyhook2/IHook.hpp @@ -70,9 +70,23 @@ class IHook : public MemAccessor { m_debugSet = state; } +#ifdef PLH_DIAGNOSTICS + bool hasDiagnostic(const Diagnostic diagnostic) const { + return static_cast(m_diagnostics & diagnostic); + } + + void setDiagnostic(const Diagnostic diagnostic) { + m_diagnostics = static_cast(m_diagnostics | diagnostic); + } +#endif + protected: bool m_debugSet; bool m_hooked = false; + +#ifdef PLH_DIAGNOSTICS + uint32_t m_diagnostics; +#endif }; // TODO: Move to these to test utils @@ -121,10 +135,10 @@ struct callback_type \ using return_type = Ret; \ }; -#ifndef _MSC_VER -#define __cdecl __attribute__((__cdecl__)) -#define __fastcall __attribute__((__fastcall__)) -#define __stdcall __attribute__((__stdcall__)) +#ifndef POLYHOOK2_OS_WINDOWS +#define __cdecl +#define __fastcall +#define __stdcall #endif #if defined(POLYHOOK2_ARCH_X86) && defined(POLYHOOK2_OS_WINDOWS) diff --git a/polyhook2/Instruction.hpp b/polyhook2/Instruction.hpp index 29bc05b..8278d0e 100644 --- a/polyhook2/Instruction.hpp +++ b/polyhook2/Instruction.hpp @@ -137,6 +137,10 @@ class Instruction { return m_isRelative; } + bool isReadingSP() const { + return m_isReadingSP; + } + /**Check if the instruction is a type with valid displacement**/ bool hasDisplacement() const { return m_hasDisplacement; @@ -178,10 +182,15 @@ class Instruction { return m_mnemonic + " " + m_opStr; } - /** Displacement size in bytes **/ - void setDisplacementSize(uint8_t size){ - m_dispSize = size; - } + /**Get parameters**/ + const std::string& getOperands() const { + return m_opStr; + } + + /** Displacement size in bytes **/ + void setDisplacementSize(uint8_t size) { + m_dispSize = size; + } size_t getDispSize() const { return m_dispSize; @@ -221,6 +230,10 @@ class Instruction { std::memcpy(&m_bytes[getDisplacementOffset()], &m_displacement.Absolute, dispSz); } + void setReadingSP(const bool isReadingSP) { + m_isReadingSP = isReadingSP; + } + long getUID() const { return m_uid.val; } @@ -326,6 +339,7 @@ class Instruction { bool m_isCalling; // Does this instruction is of a CALL type. bool m_isBranching; // Does this instruction jmp/call or otherwise change control flow bool m_isRelative; // Does the displacement need to be added to the address to retrieve where it points too? + bool m_isReadingSP; // Does this instruction read *SP in its second operand? bool m_hasDisplacement; // Does this instruction have the displacement fields filled (only rip/eip relative types are filled) bool m_hasImmediate; // Does this instruction have the immediate field filled? Displacement m_displacement; // Where an instruction points too (valid for jmp + call types, and RIP relative MEM types) @@ -337,7 +351,8 @@ class Instruction { uint8_t m_dispSize; // Size of the displacement, in bytes std::vector m_bytes; // All the raw bytes of this instruction - std::vector m_operands; // Types of all instruction operands + std::vector + m_operands; // Types of all instruction operands std::string m_mnemonic; std::string m_opStr; diff --git a/polyhook2/MemProtector.hpp b/polyhook2/MemProtector.hpp index 735c3ce..3ea9a62 100644 --- a/polyhook2/MemProtector.hpp +++ b/polyhook2/MemProtector.hpp @@ -9,8 +9,6 @@ #include "polyhook2/MemAccessor.hpp" #include "polyhook2/Enums.hpp" -PLH::ProtFlag operator|(PLH::ProtFlag lhs, PLH::ProtFlag rhs); -bool operator&(PLH::ProtFlag lhs, PLH::ProtFlag rhs); std::ostream& operator<<(std::ostream& os, const PLH::ProtFlag v); // prefer enum class over enum diff --git a/sources/ADetour.cpp b/sources/ADetour.cpp index 479392b..dc155da 100644 --- a/sources/ADetour.cpp +++ b/sources/ADetour.cpp @@ -67,23 +67,36 @@ bool Detour::followJmp(insts_t& functionInsts, const uint8_t curDepth) { // NOLI return true; } - if (!m_isFollowCallOnFnAddress) - { + if (!m_isFollowCallOnFnAddress) { Log::log("setting: Do NOT follow CALL on fnAddress", ErrorLevel::INFO); - if (functionInsts.front().isCalling()) - { + if (functionInsts.front().isCalling()) { Log::log("First assembly instruction is CALL", ErrorLevel::INFO); return true; - } + } + } + + const auto& firstInst = functionInsts.front(); + + // Here we know that first instruction is call imm32. + // But we first need to check if it is a special case #215. If it is, then we ignore it. + if (getRoutineReturningSP(firstInst)) { + Log::log("First assembly instruction is CALL, but to a routine reading SP. Skipping.", ErrorLevel::INFO); + return true; + } + + // Then we check for special case #217 + if (isCallInlineReturningSP(firstInst)) { + Log::log("First assembly instruction is CALL, but to an inline routine reading SP. Skipping.", ErrorLevel::INFO); + return true; } // might be a mem type like jmp rax, not supported - if (!functionInsts.front().hasDisplacement()) { + if (!firstInst.hasDisplacement()) { Log::log("Branching instruction without displacement encountered", ErrorLevel::WARN); return false; } - uint64_t dest = functionInsts.front().getDestination(); + const uint64_t dest = firstInst.getDestination(); functionInsts = m_disasm.disassemble(dest, dest, dest + 100, *this); return followJmp(functionInsts, curDepth + 1); // recurse } @@ -187,9 +200,9 @@ bool Detour::unHook() { MemoryProtector prot(m_fnAddress, calcInstsSz(m_originalInsts), ProtFlag::R | ProtFlag::W | ProtFlag::X, *this); ZydisDisassembler::writeEncoding(m_originalInsts, *this); - if (m_trampoline != NULL) { + if (m_trampoline) { delete[](uint8_t*) m_trampoline; - m_trampoline = NULL; + m_trampoline = 0; } // This code requires that m_userTrampVar is static or has global lifetime. @@ -267,4 +280,42 @@ insts_t Detour::make_nops(uint64_t address, uint16_t size) const { return nops; } +std::optional Detour::getRoutineReturningSP(const Instruction& callInst) { + if (callInst.getMnemonic() != "call") { + return std::nullopt; + } + + const uint64_t routineAddress = callInst.getRelativeDestination(); + + if ( // routine must not be const to enable automatic move + auto routine = m_disasm.disassemble(routineAddress, routineAddress, routineAddress + 4, *this); + routine.size() == 2 && + routine[0].getMnemonic() == "mov" && routine[0].hasRegister() && routine[0].isReadingSP() && + routine[1].getMnemonic() == "ret" + ) { + return routine; + } + + return std::nullopt; +} + +bool Detour::isCallInlineReturningSP(const Instruction& callInst) { + if (callInst.getMnemonic() != "call") { + return false; + } + + if (callInst.getDisplacement().Relative != 0) { + return false; + } + + const uint64_t nextAddress = callInst.getRelativeDestination(); + + const auto nextInst = m_disasm.disassemble(nextAddress, nextAddress, nextAddress + 1, *this); + + return + nextInst.size() == 1 && + nextInst[0].getMnemonic() == "pop" && + nextInst[0].hasRegister(); +} + } diff --git a/sources/InternalUtils.hpp b/sources/InternalUtils.hpp new file mode 100644 index 0000000..feccf87 --- /dev/null +++ b/sources/InternalUtils.hpp @@ -0,0 +1,9 @@ +#pragma once + +// This header defines utilities that are not meant to be used by the users of this library + +#ifdef PLH_DIAGNOSTICS +#define PLH_SET_DIAGNOSTIC(DIAGNOSTIC) setDiagnostic(DIAGNOSTIC) +#else +#define PLH_SET_DIAGNOSTIC(DIAGNOSTIC) +#endif diff --git a/sources/ZydisDisassembler.cpp b/sources/ZydisDisassembler.cpp index 234519d..a0db668 100644 --- a/sources/ZydisDisassembler.cpp +++ b/sources/ZydisDisassembler.cpp @@ -40,7 +40,7 @@ PLH::insts_t PLH::ZydisDisassembler::disassemble( insts_t insVec; // m_branchMap.clear(); - uint64_t size = end - start; + int64_t size = end - start; assert(size > 0); if (size <= 0) { return insVec; @@ -139,7 +139,13 @@ void PLH::ZydisDisassembler::setDisplacementFields(PLH::Instruction& inst, const } case ZYDIS_OPERAND_TYPE_UNUSED: break; - case ZYDIS_OPERAND_TYPE_MEMORY: { // Relative to RIP/EIP + case ZYDIS_OPERAND_TYPE_MEMORY: { + if (i == 1 && operand->mem.base == ZYDIS_REGISTER_ESP) { + inst.setReadingSP(true); + break; + } + + // Relative to RIP/EIP inst.addOperandType(Instruction::OperandType::Displacement); if (zydisInst->attributes & ZYDIS_ATTRIB_IS_RELATIVE) { diff --git a/sources/x64Detour.cpp b/sources/x64Detour.cpp index cb0e2bc..51065cc 100644 --- a/sources/x64Detour.cpp +++ b/sources/x64Detour.cpp @@ -2,10 +2,11 @@ // Created by steve on 7/5/17. // #include +#include #include +#include #include #include -#include #include @@ -13,7 +14,7 @@ #include "polyhook2/MemProtector.hpp" #include "polyhook2/Misc.hpp" -#include +#include "./InternalUtils.hpp" namespace PLH { @@ -726,7 +727,7 @@ Instruction makeRelJmpWithAbsDest(const uint64_t address, const uint64_t abs_des bool x64Detour::makeTrampoline(insts_t& prologue, insts_t& outJmpTable) { assert(!prologue.empty()); - assert(m_trampoline == NULL); + assert(!m_trampoline); const uint64_t prolStart = prologue.front().getAddress(); const uint16_t prolSz = calcInstsSz(prologue); @@ -754,7 +755,7 @@ bool x64Detour::makeTrampoline(insts_t& prologue, insts_t& outJmpTable) { // allocate new trampoline before deleting old to increase odds of new mem address auto tmpTrampoline = (uint64_t) new uint8_t[m_trampolineSz]; - if (m_trampoline != NULL) { + if (m_trampoline) { delete[] (uint8_t*) m_trampoline; } @@ -770,6 +771,7 @@ bool x64Detour::makeTrampoline(insts_t& prologue, insts_t& outJmpTable) { } if(!instsNeedingTranslation.empty()) { Log::log("Instructions needing translation:\n" + instsToStr(instsNeedingTranslation) + "\n", ErrorLevel::INFO); + PLH_SET_DIAGNOSTIC(Diagnostic::TranslatedInstructions); } Log::log("Trampoline address: " + int_to_hex(m_trampoline), ErrorLevel::INFO); diff --git a/sources/x86Detour.cpp b/sources/x86Detour.cpp index a9f6f69..ea755a1 100644 --- a/sources/x86Detour.cpp +++ b/sources/x86Detour.cpp @@ -1,12 +1,20 @@ // // Created by steve on 7/5/17. // +#include + +#include +#include + #include "polyhook2/Detour/x86Detour.hpp" +#include "./InternalUtils.hpp" + namespace PLH { x86Detour::x86Detour(const uint64_t fnAddress, const uint64_t fnCallback, uint64_t* userTrampVar) - : Detour(fnAddress, fnCallback, userTrampVar, getArchType()) {} + : Detour(fnAddress, fnCallback, userTrampVar, getArchType()) { +} Mode x86Detour::getArchType() const { return Mode::x86; @@ -16,12 +24,112 @@ uint8_t getJmpSize() { return 5; } +bool x86Detour::fixCallRoutineReturningSP(Instruction& callInst, const insts_t& routine) { + Log::log( + "Fixing special case [call routine returning ESP]:\n" + instsToStr(std::vector{callInst}), + ErrorLevel::INFO + ); + + const auto destReg = routine[0].getOperands().substr(0, 3); + const uint32_t originalAddress = callInst.getAddress(); + const uint32_t originalNextAddress = originalAddress + callInst.size(); + + // AsmTK parses strings for AsmJit, which generates the binary code. + asmjit::CodeHolder code; + asmjit::JitRuntime asmjitRt; + code.init(asmjitRt.environment()); + + asmjit::x86::Assembler assembler(&code); + asmtk::AsmParser parser(&assembler); + + // Parse the instructions via AsmTK + if (const auto error = parser.parse(std::format("mov {}, {:#x}", destReg, originalNextAddress).c_str())) { + Log::log(std::format("AsmTK error: {}", asmjit::DebugUtils::errorAsString(error)), ErrorLevel::SEV); + return false; + } + + // Generate the binary code via AsmJit + uint64_t movAddress = 0; + if (const auto error = asmjitRt.add(&movAddress, &code)) { + Log::log(std::format("AsmJIT error: {}", asmjit::DebugUtils::errorAsString(error)), ErrorLevel::SEV); + return false; + } + + // Replace `call rel32` instruction with `mov reg, imm32`. Both are 5 bytes long. + + callInst = m_disasm.disassemble(movAddress, movAddress, movAddress + callInst.size(), *this)[0]; + callInst.setAddress(originalAddress); + + return true; +} + +bool x86Detour::fixCallInlineReturningSP(Instruction& callInst) { + Log::log( + "Fixing special case [call inline returning ESP]:\n" + instsToStr(std::vector{callInst}), + ErrorLevel::INFO + ); + + const uint32_t originalAddress = callInst.getAddress(); + const uint32_t originalNextAddress = originalAddress + callInst.size(); + + // AsmTK parses strings for AsmJit, which generates the binary code. + asmjit::CodeHolder code; + asmjit::JitRuntime asmjitRt; + code.init(asmjitRt.environment()); + + asmjit::x86::Assembler assembler(&code); + asmtk::AsmParser parser(&assembler); + + // Parse the instructions via AsmTK + if (const auto error = parser.parse(std::format("push {:#x}", originalNextAddress).c_str())) { + Log::log(std::format("AsmTK error: {}", asmjit::DebugUtils::errorAsString(error)), ErrorLevel::SEV); + return false; + } + + // Generate the binary code via AsmJit + uint64_t pushAddress = 0; + if (const auto error = asmjitRt.add(&pushAddress, &code)) { + Log::log(std::format("AsmJIT error: {}", asmjit::DebugUtils::errorAsString(error)), ErrorLevel::SEV); + return false; + } + + // Replace `call rel32` instruction with `push imm32`. Both are 5 bytes long. + + callInst = m_disasm.disassemble(pushAddress, pushAddress, pushAddress + callInst.size(), *this)[0]; + callInst.setAddress(originalAddress); + + return true; +} + +/** + * @param prologue Must be before any relocations/translations were made + */ +bool x86Detour::fixSpecialCases(insts_t& prologue) { + for (auto& instruction: prologue) { + if (const auto routine = getRoutineReturningSP(instruction)) { + // Fix for #215 https://github.com/stevemk14ebr/PolyHook_2_0/issues/215 + if (!fixCallRoutineReturningSP(instruction, *routine)) { + return false; + } + PLH_SET_DIAGNOSTIC(Diagnostic::FixedCallToRoutineReadingSP); + } else if (isCallInlineReturningSP(instruction)) { + // Fix for #217 https://github.com/stevemk14ebr/PolyHook_2_0/issues/217 + if (!fixCallInlineReturningSP(instruction)) { + return false; + } + PLH_SET_DIAGNOSTIC(Diagnostic::FixedInlineCallToReadSP); + } + } + + return true; +} + bool x86Detour::hook() { Log::log("m_fnAddress: " + int_to_hex(m_fnAddress) + "\n", ErrorLevel::INFO); - + insts_t insts = m_disasm.disassemble(m_fnAddress, m_fnAddress, m_fnAddress + 100, *this); Log::log("Original function:\n" + instsToStr(insts) + "\n", ErrorLevel::INFO); - + if (insts.empty()) { Log::log("Disassembler unable to decode any valid instructions", ErrorLevel::SEV); return false; @@ -92,6 +200,11 @@ bool x86Detour::hook() { bool x86Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) { assert(!prologue.empty()); + + if (!fixSpecialCases(prologue)) { + return false; + } + const uint64_t prolStart = prologue.front().getAddress(); const uint16_t prolSz = calcInstsSz(prologue); @@ -99,7 +212,7 @@ bool x86Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) { address will change each attempt, which changes delta, which changes the number of needed entries. So we just try until we hit that lucky number that works. - The relocation could also because of data operations too. But that's specific to the function and can't + The relocation could also fail because of data operations too. But that's specific to the function and can't work again on a retry (same function, duh). Return immediately in that case. **/ uint8_t neededEntryCount = 5; @@ -114,7 +227,7 @@ bool x86Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) { return false; } - if (m_trampoline != NULL) { + if (m_trampoline) { delete[](unsigned char*) m_trampoline; neededEntryCount = (uint8_t) instsNeedingEntry.size(); }