Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Override dlsym instead of dlopen to correctly honour RPATH/RUNPATHS #525

Merged
merged 4 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/525.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug that was causing ``dlopen`` to not load shared libraries that have an RPATH/RUNPATH set.
13 changes: 10 additions & 3 deletions src/memray/_memray/elf_shenanigans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,13 @@ overwrite_elf_table(
const char* symname = symbols.getSymbolNameByIndex(index);
auto symbol_addr = relocation.r_offset + base_addr;
#define FOR_EACH_HOOKED_FUNCTION(hookname) \
if (strcmp(hooks::hookname.d_symbol, symname) == 0) { \
patch_symbol(hooks::hookname, &intercept::hookname, symname, symbol_addr, restore_original); \
if (strcmp(MEMRAY_ORIG(hookname).d_symbol, symname) == 0) { \
patch_symbol( \
MEMRAY_ORIG(hookname), \
&intercept::hookname, \
symname, \
symbol_addr, \
restore_original); \
continue; \
}
MEMRAY_HOOKED_FUNCTIONS
Expand Down Expand Up @@ -166,7 +171,9 @@ phdrs_callback(dl_phdr_info* info, [[maybe_unused]] size_t size, void* data) noe
patched.insert(info->dlpi_name);
}

if (strstr(info->dlpi_name, "/ld-linux") || strstr(info->dlpi_name, "linux-vdso.so.1")) {
if (strstr(info->dlpi_name, "/ld-linux") || strstr(info->dlpi_name, "/ld-musl")
|| strstr(info->dlpi_name, "linux-vdso.so.1"))
{
// Avoid chaos by not overwriting the symbols in the linker.
// TODO: Don't override the symbols in our shared library!
return 0;
Expand Down
133 changes: 95 additions & 38 deletions src/memray/_memray/hooks.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <cassert>
#include <cstdio>
#include <mutex>
#include <unordered_set>
pablogsal marked this conversation as resolved.
Show resolved Hide resolved

#include "hooks.h"
#include "tracking_api.h"
Expand Down Expand Up @@ -88,14 +90,14 @@ isDeallocator(const Allocator& allocator)
__builtin_unreachable();
}

#define FOR_EACH_HOOKED_FUNCTION(f) SymbolHook<decltype(&::f)> f(#f, &::f);
#define FOR_EACH_HOOKED_FUNCTION(f) SymbolHook<decltype(&::f)> MEMRAY_ORIG_NO_NS(f)(#f, &::f);
MEMRAY_HOOKED_FUNCTIONS
#undef FOR_EACH_HOOKED_FUNCTION

void
ensureAllHooksAreValid()
{
#define FOR_EACH_HOOKED_FUNCTION(f) f.ensureValidOriginalSymbol();
#define FOR_EACH_HOOKED_FUNCTION(f) MEMRAY_ORIG(f).ensureValidOriginalSymbol();
MEMRAY_HOOKED_FUNCTIONS
#undef FOR_EACH_HOOKED_FUNCTION
}
Expand Down Expand Up @@ -164,12 +166,12 @@ pymalloc_free(void* ctx, void* ptr) noexcept
void*
malloc(size_t size) noexcept
{
assert(hooks::malloc);
assert(MEMRAY_ORIG(malloc));

void* ptr;
{
tracking_api::RecursionGuard guard;
ptr = hooks::malloc(size);
ptr = MEMRAY_ORIG(malloc)(size);
}
if (ptr) {
tracking_api::Tracker::trackAllocation(ptr, size, hooks::Allocator::MALLOC);
Expand All @@ -180,7 +182,7 @@ malloc(size_t size) noexcept
void
free(void* ptr) noexcept
{
assert(hooks::free);
assert(MEMRAY_ORIG(free));

// We need to call our API before we call the real free implementation
// to make sure that the pointer is not reused in-between.
Expand All @@ -190,19 +192,19 @@ free(void* ptr) noexcept

{
tracking_api::RecursionGuard guard;
hooks::free(ptr);
MEMRAY_ORIG(free)(ptr);
}
}

void*
realloc(void* ptr, size_t size) noexcept
{
assert(hooks::realloc);
assert(MEMRAY_ORIG(realloc));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::realloc(ptr, size);
ret = MEMRAY_ORIG(realloc)(ptr, size);
}
if (ret) {
if (ptr != nullptr) {
Expand All @@ -216,12 +218,12 @@ realloc(void* ptr, size_t size) noexcept
void*
calloc(size_t num, size_t size) noexcept
{
assert(hooks::calloc);
assert(MEMRAY_ORIG(calloc));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::calloc(num, size);
ret = MEMRAY_ORIG(calloc)(num, size);
}
if (ret) {
tracking_api::Tracker::trackAllocation(ret, num * size, hooks::Allocator::CALLOC);
Expand All @@ -232,11 +234,11 @@ calloc(size_t num, size_t size) noexcept
void*
mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) noexcept
{
assert(hooks::mmap);
assert(MEMRAY_ORIG(mmap));
void* ptr;
{
tracking_api::RecursionGuard guard;
ptr = hooks::mmap(addr, length, prot, flags, fd, offset);
ptr = MEMRAY_ORIG(mmap)(addr, length, prot, flags, fd, offset);
}
if (ptr != MAP_FAILED) {
tracking_api::Tracker::trackAllocation(ptr, length, hooks::Allocator::MMAP);
Expand All @@ -248,11 +250,11 @@ mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) noexc
void*
mmap64(void* addr, size_t length, int prot, int flags, int fd, off64_t offset) noexcept
{
assert(hooks::mmap64);
assert(MEMRAY_ORIG(mmap64));
void* ptr;
{
tracking_api::RecursionGuard guard;
ptr = hooks::mmap64(addr, length, prot, flags, fd, offset);
ptr = MEMRAY_ORIG(mmap64)(addr, length, prot, flags, fd, offset);
}
if (ptr != MAP_FAILED) {
tracking_api::Tracker::trackAllocation(ptr, length, hooks::Allocator::MMAP);
Expand All @@ -264,23 +266,23 @@ mmap64(void* addr, size_t length, int prot, int flags, int fd, off64_t offset) n
int
munmap(void* addr, size_t length) noexcept
{
assert(hooks::munmap);
assert(MEMRAY_ORIG(munmap));
tracking_api::Tracker::trackDeallocation(addr, length, hooks::Allocator::MUNMAP);
{
tracking_api::RecursionGuard guard;
return hooks::munmap(addr, length);
return MEMRAY_ORIG(munmap)(addr, length);
}
}

void*
valloc(size_t size) noexcept
{
assert(hooks::valloc);
assert(MEMRAY_ORIG(valloc));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::valloc(size);
ret = MEMRAY_ORIG(valloc)(size);
}
if (ret) {
tracking_api::Tracker::trackAllocation(ret, size, hooks::Allocator::VALLOC);
Expand All @@ -291,34 +293,88 @@ valloc(size_t size) noexcept
int
posix_memalign(void** memptr, size_t alignment, size_t size) noexcept
{
assert(hooks::posix_memalign);
assert(MEMRAY_ORIG(posix_memalign));

int ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::posix_memalign(memptr, alignment, size);
ret = MEMRAY_ORIG(posix_memalign)(memptr, alignment, size);
}
if (!ret) {
tracking_api::Tracker::trackAllocation(*memptr, size, hooks::Allocator::POSIX_MEMALIGN);
}
return ret;
}

// We need to override dlopen/dlclose to account for new shared libraries being
// loaded in the process memory space. This is needed so we can correctly track
// allocations in those libraries by overriding their PLT entries and also so we
// can properly map the addresses of the symbols in those libraries when we
// resolve later native traces. Unfortunately, we can't just override dlopen
// directly because of the following edge case: when a shared library dlopen's
// another by name (e.g. dlopen("libfoo.so")), the dlopen call will honor the
// RPATH/RUNPATH of the calling library if it's set. Some libraries set an
// RPATH/RUNPATH based on $ORIGIN (the path of the calling library) to load
// dependencies from a relative directory based on the location of the calling
// library. This means that if we override dlopen, we'll end up loading the
// library from the wrong path or more likely, not loading it at all because the
// dynamic loader will think the memray extenion it's the calling library and
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
// the RPATH of the real calling library will not be honoured.
//
// To work around this, we override dlsym instead and override the symbols in
// the loaded libraries only the first time we have seen a handle passed to
// dlsym. This works because for a symbol from a given dlopen-ed library to
// appear in a call stack, *something* from that library has to be dlsym-ed
// first. The only exception to this are static initializers, but we cannot
// track those anyway by overriding dlopen as they run within the dlopen call
// itself.
pablogsal marked this conversation as resolved.
Show resolved Hide resolved
// There's another set of cases we would miss: if library A has a static initializer
// that passes a pointer to one of its functions to library B, and library B stores
// that function pointer, then we could see calls into library A via the function pointer
// held by library B, even though dlsym was never called on library A. This should be
// very rare and will be corrected the next time library B calls dlsym so this should
// not be a problem in practice.

class DlsymCache
{
public:
auto insert(const void* handle)
{
std::unique_lock lock(mutex_);
return d_handles.insert(handle);
}

void erase(const void* handle)
{
std::unique_lock lock(mutex_);
d_handles.erase(handle);
}

private:
mutable std::mutex mutex_;
std::unordered_set<const void*> d_handles;
};

static DlsymCache dlsym_cache;

void*
dlopen(const char* filename, int flag) noexcept
dlsym(void* handle, const char* symbol) noexcept
{
assert(hooks::dlopen);
assert(MEMRAY_ORIG(dlsym));
void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::dlopen(filename, flag);
ret = MEMRAY_ORIG(dlsym)(handle, symbol);
}
if (ret) {
tracking_api::Tracker::invalidate_module_cache();
if (filename
&& (nullptr != strstr(filename, "/_greenlet.") || nullptr != strstr(filename, "/greenlet.")))
{
tracking_api::Tracker::beginTrackingGreenlets();
auto [_, inserted] = dlsym_cache.insert(handle);
if (inserted) {
tracking_api::Tracker::invalidate_module_cache();
if (symbol
godlygeek marked this conversation as resolved.
Show resolved Hide resolved
&& (0 == strcmp(symbol, "PyInit_greenlet") || 0 == strcmp(symbol, "PyInit__greenlet")))
{
tracking_api::Tracker::beginTrackingGreenlets();
}
}
}
return ret;
Expand All @@ -327,13 +383,14 @@ dlopen(const char* filename, int flag) noexcept
int
dlclose(void* handle) noexcept
{
assert(hooks::dlclose);
assert(MEMRAY_ORIG(dlclose));

int ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::dlclose(handle);
ret = MEMRAY_ORIG(dlclose)(handle);
}
dlsym_cache.erase(handle);
tracking_api::NativeTrace::flushCache();
if (!ret) tracking_api::Tracker::invalidate_module_cache();
return ret;
Expand All @@ -342,12 +399,12 @@ dlclose(void* handle) noexcept
void*
aligned_alloc(size_t alignment, size_t size) noexcept
{
assert(hooks::aligned_alloc);
assert(MEMRAY_ORIG(aligned_alloc));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::aligned_alloc(alignment, size);
ret = MEMRAY_ORIG(aligned_alloc)(alignment, size);
}
if (ret) {
tracking_api::Tracker::trackAllocation(ret, size, hooks::Allocator::ALIGNED_ALLOC);
Expand All @@ -360,12 +417,12 @@ aligned_alloc(size_t alignment, size_t size) noexcept
void*
memalign(size_t alignment, size_t size) noexcept
{
assert(hooks::memalign);
assert(MEMRAY_ORIG(memalign));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::memalign(alignment, size);
ret = MEMRAY_ORIG(memalign)(alignment, size);
}
if (ret) {
tracking_api::Tracker::trackAllocation(ret, size, hooks::Allocator::MEMALIGN);
Expand All @@ -377,12 +434,12 @@ memalign(size_t alignment, size_t size) noexcept
void*
pvalloc(size_t size) noexcept
{
assert(hooks::pvalloc);
assert(MEMRAY_ORIG(pvalloc));

void* ret;
{
tracking_api::RecursionGuard guard;
ret = hooks::pvalloc(size);
ret = MEMRAY_ORIG(pvalloc)(size);
}
if (ret) {
tracking_api::Tracker::trackAllocation(ret, size, hooks::Allocator::PVALLOC);
Expand All @@ -407,7 +464,7 @@ prctl(int option, ...) noexcept
tracking_api::Tracker::registerThreadName(name);
}

unsigned long ret = hooks::prctl(option, args[0], args[1], args[2], args[3]);
unsigned long ret = MEMRAY_ORIG(prctl)(option, args[0], args[1], args[2], args[3]);

return ret;
}
Expand All @@ -416,7 +473,7 @@ prctl(int option, ...) noexcept
PyGILState_STATE
PyGILState_Ensure() noexcept
{
PyGILState_STATE ret = hooks::PyGILState_Ensure();
PyGILState_STATE ret = MEMRAY_ORIG(PyGILState_Ensure)();
tracking_api::install_trace_function();
return ret;
}
Expand Down
10 changes: 7 additions & 3 deletions src/memray/_memray/hooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
FOR_EACH_HOOKED_FUNCTION(aligned_alloc) \
FOR_EACH_HOOKED_FUNCTION(mmap) \
FOR_EACH_HOOKED_FUNCTION(munmap) \
FOR_EACH_HOOKED_FUNCTION(dlopen) \
FOR_EACH_HOOKED_FUNCTION(dlsym) \
FOR_EACH_HOOKED_FUNCTION(dlclose) \
FOR_EACH_HOOKED_FUNCTION(PyGILState_Ensure) \
MEMRAY_PLATFORM_HOOKED_FUNCTIONS
Expand Down Expand Up @@ -140,7 +140,11 @@ allocatorKind(const Allocator& allocator);
bool
isDeallocator(const Allocator& allocator);

#define FOR_EACH_HOOKED_FUNCTION(f) extern SymbolHook<decltype(&::f)> f;
#define MEMRAY_ORIG_concat_helper(x, y) x##y
#define MEMRAY_ORIG_NO_NS(f) MEMRAY_ORIG_concat_helper(memray_, f)
#define MEMRAY_ORIG(f) memray::hooks::MEMRAY_ORIG_NO_NS(f)

#define FOR_EACH_HOOKED_FUNCTION(f) extern SymbolHook<decltype(&::f)> MEMRAY_ORIG_NO_NS(f);
MEMRAY_HOOKED_FUNCTIONS
#undef FOR_EACH_HOOKED_FUNCTION

Expand Down Expand Up @@ -175,7 +179,7 @@ void*
pvalloc(size_t size) noexcept;

void*
dlopen(const char* filename, int flag) noexcept;
dlsym(void* handle, const char* symbol) noexcept;

int
dlclose(void* handle) noexcept;
Expand Down
Loading
Loading