diff --git a/cmake/Findlibdatadog.cmake b/cmake/Findlibdatadog.cmake index 254d92069..5c7661f0c 100644 --- a/cmake/Findlibdatadog.cmake +++ b/cmake/Findlibdatadog.cmake @@ -5,7 +5,7 @@ # libdatadog : common profiler imported libraries # https://github.com/DataDog/libdatadog/releases/tag/v7.0.0 set(TAG_LIBDATADOG - "v8.0.0" + "v9.0.0" CACHE STRING "libdatadog github tag") set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG}) diff --git a/include/crash_reporter.hpp b/include/crash_reporter.hpp new file mode 100644 index 000000000..242423184 --- /dev/null +++ b/include/crash_reporter.hpp @@ -0,0 +1,22 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include "ddprof_defs.hpp" +#include "ddres_def.hpp" + +#include "unwind_state.hpp" + +#include +#include + +namespace ddprof { + +struct ExporterInput; + +bool report_crash(pid_t pid, pid_t tid, const ExporterInput &export_input); + +} // namespace ddprof diff --git a/include/crash_tracker.hpp b/include/crash_tracker.hpp new file mode 100644 index 000000000..3304b874d --- /dev/null +++ b/include/crash_tracker.hpp @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include +#include + +namespace ddprof { + +bool install_crash_tracker(const std::string &handler_exe, + const std::vector &handler_args); + +} // namespace ddprof diff --git a/include/ddprof_cli.hpp b/include/ddprof_cli.hpp index 44b174217..1c249cb7a 100644 --- a/include/ddprof_cli.hpp +++ b/include/ddprof_cli.hpp @@ -71,6 +71,8 @@ struct DDProfCLI { bool remote_symbolization{false}; bool disable_symbolization{false}; bool reorder_events{false}; // reorder events by timestamp + bool report_crash{false}; + bool track_crashes{false}; int maximum_pids{-1}; std::string socket_path; diff --git a/include/ddprof_context.hpp b/include/ddprof_context.hpp index f59533c25..7aa17499a 100644 --- a/include/ddprof_context.hpp +++ b/include/ddprof_context.hpp @@ -36,6 +36,7 @@ struct DDProfContext { bool remote_symbolization{false}; bool disable_symbolization{false}; bool reorder_events{false}; // reorder events by timestamp + bool report_crash{false}; int maximum_pids{0}; cpu_set_t cpu_affinity{}; diff --git a/include/dumpable.hpp b/include/dumpable.hpp new file mode 100644 index 000000000..5b48ca787 --- /dev/null +++ b/include/dumpable.hpp @@ -0,0 +1,45 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#pragma once + +#include + +namespace ddprof { + +class DumpableRestorer { +public: + DumpableRestorer() : _was_dumpable{prctl(PR_GET_DUMPABLE) > 0} {} + ~DumpableRestorer() { prctl(PR_SET_DUMPABLE, _was_dumpable ? 1 : 0); } + + DumpableRestorer(const DumpableRestorer &) = delete; + DumpableRestorer operator=(const DumpableRestorer &) = delete; + +private: + bool _was_dumpable; +}; + +class DumpableGuard { +public: + DumpableGuard() : _was_dumpable(prctl(PR_GET_DUMPABLE, 0) > 0) { + if (!_was_dumpable) { + prctl(PR_SET_DUMPABLE, 1); + } + } + + ~DumpableGuard() { + if (!_was_dumpable) { + prctl(PR_SET_DUMPABLE, 0); + } + } + + DumpableGuard(const DumpableGuard &) = delete; + DumpableGuard operator=(const DumpableGuard &) = delete; + +private: + bool _was_dumpable; +}; + +} // namespace ddprof \ No newline at end of file diff --git a/include/exporter/ddprof_exporter.hpp b/include/exporter/ddprof_exporter.hpp index 2ac594d87..41e9586c9 100644 --- a/include/exporter/ddprof_exporter.hpp +++ b/include/exporter/ddprof_exporter.hpp @@ -39,4 +39,6 @@ DDRes ddprof_exporter_export(ddog_prof_Profile *profile, DDRes ddprof_exporter_free(DDProfExporter *exporter); +std::string determine_agent_url(const ExporterInput &exporter_input); + } // namespace ddprof diff --git a/include/perf_archmap.hpp b/include/perf_archmap.hpp index 7c0473c60..461eb7e5b 100644 --- a/include/perf_archmap.hpp +++ b/include/perf_archmap.hpp @@ -88,7 +88,8 @@ enum PERF_ARCHMAP_ARM { PAM_ARM_X28, PAM_ARM_X29, PAM_ARM_FP = PAM_ARM_X29, // For uniformity - PAM_ARM_LR, + PAM_ARM_X30, + PAM_ARM_LR = PAM_ARM_X30, // For uniformity PAM_ARM_SP, PAM_ARM_PC, PAM_ARM_MAX, diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index e0dfdda0c..90f1276e0 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -11,6 +11,7 @@ #include "ddprof_file_info.hpp" #include "ddres_def.hpp" #include "perf_watcher.hpp" +#include "symbolizer.hpp" #include "tags.hpp" #include "unwind_output.hpp" @@ -59,4 +60,10 @@ DDRes pprof_write_profile(const DDProfPProf *pprof, int fd); DDRes pprof_free_profile(DDProfPProf *pprof); +DDRes process_symbolization( + std::span locs, const SymbolHdr &symbol_hdr, + const FileInfoVector &file_infos, Symbolizer *symbolizer, + std::array &locations_buff, + Symbolizer::BlazeResultsWrapper &session_results, unsigned &write_index); + } // namespace ddprof diff --git a/include/stack_helper.hpp b/include/stack_helper.hpp index 048685f92..9ced0ac8f 100644 --- a/include/stack_helper.hpp +++ b/include/stack_helper.hpp @@ -8,6 +8,7 @@ #include "ddprof_defs.hpp" namespace ddprof { +struct UnwindState; bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, - void *arg); -} + UnwindState *us); +} // namespace ddprof diff --git a/include/symbol_helper.hpp b/include/symbol_helper.hpp index 3cba2bfc4..fcd5bd70c 100644 --- a/include/symbol_helper.hpp +++ b/include/symbol_helper.hpp @@ -3,6 +3,7 @@ // developed at Datadog (https://www.datadoghq.com/). Copyright 2024-Present // Datadog, Inc. +#include "defer.hpp" #include "unwind_state.hpp" #include @@ -12,8 +13,8 @@ namespace ddprof { // This is a test API. Use the symbolizer to populate pprof structures -std::vector collect_symbols(UnwindState &state, - blaze_symbolizer *symbolizer) { +inline std::vector collect_symbols(UnwindState &state, + blaze_symbolizer *symbolizer) { std::vector demangled_symbols; auto &symbol_table = state.symbol_hdr._symbol_table; for (size_t iloc = 0; iloc < state.output.locs.size(); ++iloc) { @@ -23,7 +24,7 @@ std::vector collect_symbols(UnwindState &state, std::array elf_addr{state.output.locs[iloc].elf_addr}; const auto &file_info_value = state.dso_hdr.get_file_info_value( state.output.locs[iloc].file_info_id); - blaze_symbolize_src_elf src_elf{ + const blaze_symbolize_src_elf src_elf{ .type_size = sizeof(blaze_symbolize_src_elf), .path = file_info_value.get_path().c_str(), .debug_syms = true, diff --git a/include/symbolizer.hpp b/include/symbolizer.hpp index 45f7ea1fe..1253638a1 100644 --- a/include/symbolizer.hpp +++ b/include/symbolizer.hpp @@ -87,6 +87,8 @@ class Symbolizer { int remove_unvisited(); void reset_unvisited_flag(); + AddrFormat reported_addr_format() const { return _reported_addr_format; } + private: struct BlazeSymbolizerDeleter { void operator()(blaze_symbolizer *ptr) const { diff --git a/include/syscalls.hpp b/include/syscalls.hpp index b0b0dd26f..a3c2e3bd5 100644 --- a/include/syscalls.hpp +++ b/include/syscalls.hpp @@ -5,13 +5,25 @@ #pragma once +#include #include #include namespace ddprof { -static inline pid_t gettid() { return syscall(SYS_gettid); } +inline pid_t gettid() { return syscall(SYS_gettid); } -static inline int memfd_create(const char *name, unsigned int flags) { +inline int memfd_create(const char *name, unsigned int flags) { return syscall(SYS_memfd_create, name, flags); } + +inline int futex(uint32_t *uaddr, int futex_op, uint32_t val, + const struct timespec *timeout, uint32_t *uaddr2, + uint32_t val3) { + return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3); +} + +inline int rt_tgsigqueueinfo(int tgid, int tid, int sig, siginfo_t *uinfo) { + return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo); +} + } // namespace ddprof diff --git a/include/unwind.hpp b/include/unwind.hpp index dc0cad93f..190587331 100644 --- a/include/unwind.hpp +++ b/include/unwind.hpp @@ -23,7 +23,7 @@ void unwind_init_sample(UnwindState *us, const uint64_t *sample_regs, // Main unwind API DDRes unwindstate_unwind(UnwindState *us); -// Mark a cycle: garbadge collection, stats +// Mark a cycle: garbage collection, stats void unwind_cycle(UnwindState *us); // Clear unwinding structures of this pid diff --git a/include/unwind_state.hpp b/include/unwind_state.hpp index e627e7fea..890a79497 100644 --- a/include/unwind_state.hpp +++ b/include/unwind_state.hpp @@ -17,6 +17,7 @@ #include "symbol_hdr.hpp" #include "unwind_output.hpp" +#include #include #include @@ -63,6 +64,10 @@ struct UnwindState { UnwindOutput output; UniqueElf ref_elf; // reference elf object used to initialize dwfl int maximum_pids; + + using MemoryReadCallback = + std::function; + MemoryReadCallback memory_read_callback; // custom memory read callback }; std::optional diff --git a/src/crash_reporter.cc b/src/crash_reporter.cc new file mode 100644 index 000000000..8a7c3341e --- /dev/null +++ b/src/crash_reporter.cc @@ -0,0 +1,346 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "crash_reporter.hpp" + +#include "datadog/profiling.h" +#include "ddog_profiling_utils.hpp" +#include "defer.hpp" +#include "exporter/ddprof_exporter.hpp" +#include "exporter_input.hpp" +#include "logger.hpp" +#include "pprof/ddprof_pprof.hpp" +#include "symbolizer.hpp" +#include "unwind.hpp" +#include "unwind_state.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace ddprof { + +namespace { + +bool ptrace_attach(pid_t tid) { + if (ptrace(PTRACE_ATTACH, tid, nullptr, nullptr) != 0) { + LG_ERR("Failed to attach to pid %d: %s", tid, strerror(errno)); + return false; + } + + while (true) { + int status; + const int r = waitpid(tid, &status, __WALL); + if (r == -1 && errno == EINTR) { + continue; + } + + if (r == -1) { + LG_ERR("Failed to wait for pid %d: %s", tid, strerror(errno)); + return false; + } + + if (!WIFSTOPPED(status)) { + LG_ERR("Process %d not stopped: %d", tid, status); + return false; + } + return true; + } +} + +bool ptrace_detach(pid_t tid) { + if (ptrace(PTRACE_DETACH, tid, nullptr, nullptr) != 0) { + LG_ERR("Failed to detach from pid %d: %s", tid, strerror(errno)); + return false; + } + return true; +} + +size_t ptrace_get_registers(pid_t tid, user_regs_struct *user_regs) { + iovec iov; + iov.iov_base = user_regs; + iov.iov_len = sizeof(*user_regs); + if (ptrace(PTRACE_GETREGSET, tid, reinterpret_cast(NT_PRSTATUS), + &iov) != 0) { + return 0; + } + return iov.iov_len; +} + +bool get_unwind_registers(pid_t pid, UnwindRegisters *registers) { + user_regs_struct user_regs; + if (ptrace_get_registers(pid, &user_regs) == 0) { + return false; + } +#ifdef __x86_64__ + registers->regs[REGNAME(RAX)] = user_regs.rax; + registers->regs[REGNAME(RBX)] = user_regs.rbx; + registers->regs[REGNAME(RCX)] = user_regs.rcx; + registers->regs[REGNAME(RDX)] = user_regs.rdx; + registers->regs[REGNAME(RSI)] = user_regs.rsi; + registers->regs[REGNAME(RDI)] = user_regs.rdi; + registers->regs[REGNAME(RBP)] = user_regs.rbp; + registers->regs[REGNAME(RIP)] = user_regs.rip; + registers->regs[REGNAME(RSP)] = user_regs.rsp; + registers->regs[REGNAME(R8)] = user_regs.r8; + registers->regs[REGNAME(R9)] = user_regs.r9; + registers->regs[REGNAME(R10)] = user_regs.r10; + registers->regs[REGNAME(R11)] = user_regs.r11; + registers->regs[REGNAME(R12)] = user_regs.r12; + registers->regs[REGNAME(R13)] = user_regs.r13; + registers->regs[REGNAME(R14)] = user_regs.r14; + registers->regs[REGNAME(R15)] = user_regs.r15; +#else + // NOLINTBEGIN(readability-magic-numbers) + registers->regs[REGNAME(X0)] = user_regs.regs[0]; + registers->regs[REGNAME(X1)] = user_regs.regs[1]; + registers->regs[REGNAME(X2)] = user_regs.regs[2]; + registers->regs[REGNAME(X3)] = user_regs.regs[3]; + registers->regs[REGNAME(X4)] = user_regs.regs[4]; + registers->regs[REGNAME(X5)] = user_regs.regs[5]; + registers->regs[REGNAME(X6)] = user_regs.regs[6]; + registers->regs[REGNAME(X7)] = user_regs.regs[7]; + registers->regs[REGNAME(X8)] = user_regs.regs[8]; + registers->regs[REGNAME(X9)] = user_regs.regs[9]; + registers->regs[REGNAME(X10)] = user_regs.regs[10]; + registers->regs[REGNAME(X11)] = user_regs.regs[11]; + registers->regs[REGNAME(X12)] = user_regs.regs[12]; + registers->regs[REGNAME(X13)] = user_regs.regs[13]; + registers->regs[REGNAME(X14)] = user_regs.regs[14]; + registers->regs[REGNAME(X15)] = user_regs.regs[15]; + registers->regs[REGNAME(X16)] = user_regs.regs[16]; + registers->regs[REGNAME(X17)] = user_regs.regs[17]; + registers->regs[REGNAME(X18)] = user_regs.regs[18]; + registers->regs[REGNAME(X19)] = user_regs.regs[19]; + registers->regs[REGNAME(X20)] = user_regs.regs[20]; + registers->regs[REGNAME(X21)] = user_regs.regs[21]; + registers->regs[REGNAME(X22)] = user_regs.regs[22]; + registers->regs[REGNAME(X23)] = user_regs.regs[23]; + registers->regs[REGNAME(X24)] = user_regs.regs[24]; + registers->regs[REGNAME(X25)] = user_regs.regs[25]; + registers->regs[REGNAME(X26)] = user_regs.regs[26]; + registers->regs[REGNAME(X27)] = user_regs.regs[27]; + registers->regs[REGNAME(X28)] = user_regs.regs[28]; + registers->regs[REGNAME(X29)] = user_regs.regs[29]; + registers->regs[REGNAME(X30)] = user_regs.regs[30]; + // NOLINTEND(readability-magic-numbers) + registers->regs[REGNAME(PC)] = user_regs.pc; + registers->regs[REGNAME(SP)] = user_regs.sp; +#endif + return true; +} + +ddog_prof_Endpoint create_endpoint(const ExporterInput &exporter_input) { + if (exporter_input.url.starts_with("file://")) { + return ddog_Endpoint_file(to_CharSlice(exporter_input.url)); + } + + return ddog_prof_Endpoint_agent( + to_CharSlice(determine_agent_url(exporter_input))); +} + +bool report_crash_to_agent(const ExporterInput &exp_input, + ddog_prof_CrashInfo *crashinfo) { + + auto res = ddog_crashinfo_set_timestamp_to_now(crashinfo); + if (res.tag != DDOG_PROF_CRASHTRACKER_RESULT_OK) { + LG_ERR("Failed to set timestamp: %.*s", (int)res.err.message.len, + res.err.message.ptr); + ddog_Error_drop(&res.err); + return false; + } + + auto tags = ddog_Vec_Tag_new(); + defer { ddog_Vec_Tag_drop(tags); }; + (void)ddog_Vec_Tag_push(&tags, to_CharSlice("service"), + to_CharSlice(exp_input.service)); + (void)ddog_Vec_Tag_push(&tags, to_CharSlice("environment"), + to_CharSlice(exp_input.environment)); + // \fixme{nsavoire}: add more tags + + const ddog_prof_CrashtrackerMetadata metadata = { + .profiling_library_name = to_CharSlice("ddprof"), + .profiling_library_version = to_CharSlice(exp_input.profiler_version), + .family = to_CharSlice(exp_input.family), + .tags = &tags, + }; + + res = ddog_crashinfo_set_metadata(crashinfo, metadata); + if (res.tag != DDOG_PROF_CRASHTRACKER_RESULT_OK) { + LG_ERR("Failed to set metadata: %.*s", (int)res.err.message.len, + res.err.message.ptr); + return false; + } + + const auto timeout_secs = 5; + const ddog_prof_CrashtrackerConfiguration config = { + .endpoint = create_endpoint(exp_input), + .timeout_secs = timeout_secs, + }; + + res = ddog_crashinfo_upload_to_endpoint(crashinfo, config); + if (res.tag != DDOG_PROF_CRASHTRACKER_RESULT_OK) { + LG_ERR("Failed to upload crashinfo: %.*s", (int)res.err.message.len, + res.err.message.ptr); + return false; + } + + return true; +} + +DDRes unwind_once(pid_t pid, const UnwindRegisters ®isters, + UnwindState::MemoryReadCallback read_memory_callback, + std::vector *locations, + Symbolizer::BlazeResultsWrapper &session_results) { + unwind_init(); + + auto unwind_state = create_unwind_state(); + if (!unwind_state) { + LG_ERR("Failed to create unwind state"); + return ddres_error(DD_WHAT_UW_ERROR); + } + + unwind_state->memory_read_callback = std::move(read_memory_callback); + + unwind_init_sample(&*unwind_state, registers.regs, pid, 0, nullptr); + unwindstate_unwind(&*unwind_state); + + unsigned write_index = 0; + const std::unique_ptr symbolizer = + std::make_unique(true, false, Symbolizer::k_process); + std::array locs; + + process_symbolization(unwind_state->output.locs, unwind_state->symbol_hdr, + unwind_state->dso_hdr.get_file_info_vector(), + symbolizer.get(), locs, session_results, write_index); + + for (unsigned i = 0; i < write_index; ++i) { + locations->push_back(locs[i]); + } + + return {}; +} + +} // namespace + +bool report_crash(pid_t /*pid*/, pid_t tid, const ExporterInput &export_input) { + + if (!ptrace_attach(tid)) { + return false; + } + + defer { ptrace_detach(tid); }; + + UnwindRegisters regs; + if (!get_unwind_registers(tid, ®s)) { + return false; + } + + Symbolizer::BlazeResultsWrapper session_results; + auto crashinfo_new_result = ddog_crashinfo_new(); + if (crashinfo_new_result.tag != DDOG_PROF_CRASH_INFO_NEW_RESULT_OK) { + LG_ERR("Failed to make new crashinfo: %.*s", + (int)crashinfo_new_result.err.message.len, + crashinfo_new_result.err.message.ptr); + ddog_Error_drop(&crashinfo_new_result.err); + return false; + } + + auto *crashinfo = &crashinfo_new_result.ok; + defer { ddog_crashinfo_drop(crashinfo); }; + + std::vector locations; + unwind_once( + tid, regs, + [tid](ProcessAddress_t addr, ElfWord_t *result, int /*regno*/) { + errno = 0; + const long res = ptrace(PTRACE_PEEKDATA, tid, addr, nullptr); + if (errno == 0) { + *result = res; + return true; + } + return false; + }, + &locations, session_results); + + std::vector stackFrames(locations.size()); + std::vector stackFrameNames(locations.size()); + + printf("Crash stack trace:\n"); + int idx = 0; + for (const auto &loc : locations) { + std::string buf; + + std::string_view function_name{loc.function.name.ptr, + loc.function.name.len}; + std::string_view source_file{loc.function.filename.ptr, + loc.function.filename.len}; + + absl::StrAppendFormat(&buf, "#%d ", idx); + absl::StrAppendFormat(&buf, "%#x", loc.address); + + if (!function_name.empty()) { + function_name = function_name.substr(0, function_name.find('(')); + // Append the function name, trimming at the first '(' if present. + absl::StrAppendFormat(&buf, " in %s", function_name); + } + if (!source_file.empty()) { + // Append the file name, showing only the file and not the full path. + auto pos = source_file.rfind('/'); + if (pos != std::string_view::npos) { + source_file = source_file.substr(pos + 1); + } + absl::StrAppendFormat(&buf, " at %s ", source_file); + } + // Include line number if available and greater than zero. + if (loc.line > 0) { + absl::StrAppendFormat(&buf, ":%d", loc.line); + } + + stackFrameNames[idx] = ddog_prof_StackFrameNames{ + .colno = {DDOG_PROF_OPTION_U32_NONE_U32, {0}}, + .filename = to_CharSlice(source_file), + .lineno = {loc.line > 0 ? DDOG_PROF_OPTION_U32_SOME_U32 + : DDOG_PROF_OPTION_U32_NONE_U32, + {static_cast(loc.line)}}, + .name = to_CharSlice(function_name)}; + + stackFrames[idx] = ddog_prof_StackFrame{ + .ip = loc.address, + .module_base_address = 0, + .names{ + .ptr = &stackFrameNames[idx], + .len = 1, + }, + .sp = 0, + .symbol_address = 0, + }; + + ++idx; + + printf("%s\n", buf.c_str()); + } + + const ddog_prof_Slice_StackFrame stacktrace{ + .ptr = stackFrames.data(), + .len = stackFrames.size(), + }; + + auto res = ddog_crashinfo_set_stacktrace( + crashinfo, to_CharSlice(std::to_string(tid)), stacktrace); + if (res.tag != DDOG_PROF_CRASHTRACKER_RESULT_OK) { + LG_ERR("Failed to set stracktrace: %.*s", (int)res.err.message.len, + res.err.message.ptr); + return false; + } + + return report_crash_to_agent(export_input, crashinfo); +} + +} // namespace ddprof diff --git a/src/crash_tracker.cc b/src/crash_tracker.cc new file mode 100644 index 000000000..e82ba31bf --- /dev/null +++ b/src/crash_tracker.cc @@ -0,0 +1,239 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "crash_tracker.hpp" + +#include "dumpable.hpp" +#include "logger.hpp" +#include "syscalls.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ddprof { + +namespace { + +using SignalSpan = std::span; +using OldActions = std::array; +using SignalSet = std::unordered_set; +using SignalHandler = void (*)(int, siginfo_t *, void *); + +constexpr std::array kFatalSignals = { + SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGQUIT, + SIGSEGV, SIGSYS, SIGTRAP, SIGXCPU, SIGXFSZ, +}; + +// convert pid to string without allocation (assuming out has enough space) +void pid_to_string(pid_t pid, std::string *out) { + constexpr int base = 10; + + if (pid == 0) { + out->assign("0"); + return; + } + + int ndigits = 0; + // count number of digits + for (pid_t tmp = pid; tmp > 0; tmp /= base) { + ++ndigits; + } + + out->resize(ndigits); + for (int i = ndigits - 1; i >= 0; --i) { + (*out)[i] = '0' + (pid % base); + pid /= base; + } +} + +bool install_signal_handler(int sig, SignalHandler handler, int flags, + struct sigaction *old_action) { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = flags | SA_SIGINFO; + sa.sa_sigaction = handler; + if (sigaction(sig, &sa, old_action) != 0) { + LG_ERR("Failed to install signal handler for signal %d: %s", sig, + strerror(errno)); + return false; + } + + return true; +} + +bool install_signal_handlers(SignalSpan signals, SignalHandler handler, + int flags = 0, OldActions *old_actions = nullptr, + const SignalSet *signals_to_ignore = nullptr) { + return std::ranges::all_of(signals, [&](int signal) { + if (signals_to_ignore && + signals_to_ignore->find(signal) != signals_to_ignore->end()) { + return true; + } + return install_signal_handler(signal, handler, flags, + old_actions ? &(*old_actions)[signal] + : nullptr); + }); +} + +void restore_signal_handler_and_raise_signal( + siginfo_t *siginfo, const struct sigaction *old_action) { + bool restore_default_action = old_action == nullptr; + + if (!restore_default_action) { + if (sigaction(siginfo->si_signo, old_action, nullptr) != 0) { + restore_default_action = true; + } + } + if (restore_default_action) { + struct sigaction default_action; + sigemptyset(&default_action.sa_mask); + default_action.sa_flags = 0; + default_action.sa_handler = SIG_DFL; + if (sigaction(siginfo->si_signo, &default_action, nullptr) != 0) { + return; + } + } + + const int ret = + rt_tgsigqueueinfo(getpid(), gettid(), siginfo->si_signo, siginfo); + if (ret == 0) { + return; + } + + raise(siginfo->si_signo); +} + +class CrashTracker { +public: + static CrashTracker *instance() { + static auto *instance = new CrashTracker(); + return instance; + } + + bool init(std::vector argv); + + ~CrashTracker() = delete; + CrashTracker(const CrashTracker &) = delete; + CrashTracker &operator=(const CrashTracker &) = delete; + +private: + static void handle_signal(int signo, siginfo_t *siginfo, void *ucontext) { + auto *tracker = CrashTracker::instance(); + tracker->track_crash(signo, siginfo, ucontext); + } + + void wait_for_crash_tracking_done() { + const int timeout_sec = 5; + timespec timeout; + timeout.tv_sec = timeout_sec; + timeout.tv_nsec = 0; + futex(&_crash_tracking_done, FUTEX_WAIT_PRIVATE, 0, &timeout, nullptr, 0); + } + + void wake_crash_tracking_done() { + _crash_tracking_done = 1; + futex(&_crash_tracking_done, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, + 0); + } + + void track_crash_impl() { + const pid_t tid = gettid(); + pid_to_string(tid, &_argv.back()); + + const DumpableGuard dumpableGuard; + const pid_t pid = fork(); + if (pid < 0) { + return; + } + + if (pid == 0) { + execv(_cargv[0], _cargv.data()); + _exit(EXIT_FAILURE); + } + + // \fixme{nsavoire}: race condition here, we should synchronize with the + // child to make sure set ptracer is done before child does a ptrace attach. + // We could use a socketpair / pipe to synchronize with the child. + prctl(PR_SET_PTRACER, pid, 0, 0, 0); + + int status; + waitpid(pid, &status, 0); + } + + void track_crash(int signo, siginfo_t *siginfo, void * /*ucontext*/) { + if (!_crash_tracked.test_and_set()) { + track_crash_impl(); + wake_crash_tracking_done(); + } else { + wait_for_crash_tracking_done(); + } + + restore_signal_handler_and_raise_signal(siginfo, &_old_actions[signo]); + } + + CrashTracker() = default; + + std::vector _argv; + std::vector _cargv; + std::array _old_actions; + std::atomic_flag _crash_tracked; + uint32_t _crash_tracking_done = 0; + bool _initialized = false; +}; + +bool CrashTracker::init(std::vector argv) { + if (_initialized) { + return false; + } + + _argv = std::move(argv); + + _cargv.clear(); + std::ranges::transform( + _argv, std::back_inserter(_cargv), + // cppcheck is completely wrong about `arg` that could be declared const + // cppcheck-suppress constParameterReference + [](std::string &arg) { return arg.data(); }); + _cargv.push_back(nullptr); + + _initialized = true; + + // \fixme{nsavoire}: set up alternate stack (alternate stack needs to be set + // up for each thread) + return install_signal_handlers(kFatalSignals, handle_signal, SA_ONSTACK, + &_old_actions); +} + +std::vector +get_handler_argv(const std::string &handler_exe, + const std::vector &handler_args) { + std::vector argv; + argv.push_back(handler_exe); + std::copy(handler_args.begin(), handler_args.end(), std::back_inserter(argv)); + + argv.emplace_back("--pid"); + // reserve space for tid + argv.emplace_back(std::numeric_limits::digits10, '0'); + return argv; +} + +} // namespace + +bool install_crash_tracker(const std::string &handler_exe, + const std::vector &handler_args) { + auto *crash_handler = CrashTracker::instance(); + return crash_handler->init(get_handler_argv(handler_exe, handler_args)); +} + +} // namespace ddprof diff --git a/src/ddprof.cc b/src/ddprof.cc index e3d659911..563c8d2c9 100644 --- a/src/ddprof.cc +++ b/src/ddprof.cc @@ -6,6 +6,7 @@ #include "ddprof.hpp" #include "cap_display.hpp" +#include "crash_tracker.hpp" #include "ddprof_cmdline.hpp" #include "ddprof_context.hpp" #include "ddprof_context_lib.hpp" @@ -33,26 +34,6 @@ namespace ddprof { namespace { -/***************************** SIGSEGV Handler *******************************/ -void sigsegv_handler(int sig, siginfo_t *si, void *uc) { - // TODO this really shouldn't call printf-family functions... - (void)uc; -#ifdef __GLIBC__ - constexpr size_t k_stacktrace_buffer_size = 4096; - static void *buf[k_stacktrace_buffer_size] = {}; - size_t const sz = backtrace(buf, std::size(buf)); -#endif - (void)fprintf( - stderr, "ddprof[%d]: <%.*s> has encountered an error and will exit\n", - getpid(), static_cast(str_version().size()), str_version().data()); - if (sig == SIGSEGV) { - printf("[DDPROF] Fault address: %p\n", si->si_addr); - } -#ifdef __GLIBC__ - backtrace_symbols_fd(buf, sz, STDERR_FILENO); -#endif - exit(-1); -} void display_system_info() { @@ -92,6 +73,25 @@ DDRes get_process_threads(pid_t pid, std::vector &threads) { return {}; } +void configure_crash_tracker(const DDProfContext &ctx) { + std::vector args = {"--service", ctx.exp_input.service}; + if (!ctx.exp_input.url.empty()) { + args.emplace_back("--url"); + args.emplace_back(ctx.exp_input.url); + } + if (!ctx.exp_input.host.empty()) { + args.emplace_back("--host"); + args.emplace_back(ctx.exp_input.host); + } + if (!ctx.exp_input.port.empty()) { + args.emplace_back("--port"); + args.emplace_back(ctx.exp_input.port); + } + + args.emplace_back("--report-crash"); + install_crash_tracker("/proc/self/exe", args); +} + } // namespace DDRes ddprof_setup(DDProfContext &ctx) { @@ -115,10 +115,7 @@ DDRes ddprof_setup(DDProfContext &ctx) { // Setup signal handler if defined if (ctx.params.fault_info) { - struct sigaction sigaction_handlers = {}; - sigaction_handlers.sa_sigaction = sigsegv_handler; - sigaction_handlers.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &(sigaction_handlers), nullptr); + configure_crash_tracker(ctx); } // Set the nice level, but only if it was overridden because 0 is valid diff --git a/src/ddprof_cli.cc b/src/ddprof_cli.cc index c28d98afc..c4bf4f2e5 100644 --- a/src/ddprof_cli.cc +++ b/src/ddprof_cli.cc @@ -398,6 +398,12 @@ int DDProfCLI::parse(int argc, const char *argv[]) { ->default_val(k_default_max_profiled_pids) ->envname("DD_PROFILING_MAXIMUM_PIDS") ->group("")); + + extended_options.push_back( + app.add_flag("--report-crash", report_crash, "Report crash to Datadog") + ->default_val(false) + ->group("")); + // Parse CLI11_PARSE(app, argc, argv); diff --git a/src/ddprof_context_lib.cc b/src/ddprof_context_lib.cc index 6cef5c100..c7ca85ca7 100644 --- a/src/ddprof_context_lib.cc +++ b/src/ddprof_context_lib.cc @@ -90,6 +90,7 @@ void copy_cli_values(const DDProfCLI &ddprof_cli, DDProfContext &ctx) { ctx.params.show_samples = ddprof_cli.show_samples; ctx.params.timeline = ddprof_cli.timeline; ctx.params.fault_info = ddprof_cli.fault_info; + ctx.params.report_crash = ddprof_cli.report_crash; ctx.params.remote_symbolization = ddprof_cli.remote_symbolization; ctx.params.disable_symbolization = ddprof_cli.disable_symbolization; ctx.params.reorder_events = ddprof_cli.reorder_events; diff --git a/src/dwfl_thread_callbacks.cc b/src/dwfl_thread_callbacks.cc index eeffe0bef..cf1ea871d 100644 --- a/src/dwfl_thread_callbacks.cc +++ b/src/dwfl_thread_callbacks.cc @@ -51,7 +51,11 @@ bool set_initial_registers(Dwfl_Thread *thread, void *arg) { bool memory_read_dwfl(Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, int regno, void *arg) { (void)dwfl; - return memory_read(addr, result, regno, arg); + auto *us = static_cast(arg); + if (us->memory_read_callback) { + return us->memory_read_callback(addr, result, regno); + } + return memory_read(addr, result, regno, us); } } // namespace ddprof diff --git a/src/exe/main.cc b/src/exe/main.cc index 1f5f3ce52..531d8b716 100644 --- a/src/exe/main.cc +++ b/src/exe/main.cc @@ -4,6 +4,7 @@ // Datadog, Inc. #include "constants.hpp" +#include "crash_reporter.hpp" #include "daemonize.hpp" #include "ddprof.hpp" #include "ddprof_cli.hpp" @@ -409,6 +410,11 @@ int main(int argc, char *argv[]) { } // cli is destroyed here (prevents forks from having an instance of CLI + if (ctx->params.report_crash) { + report_crash(getppid(), ctx->params.pid, ctx->exp_input); + return 0; + } + // Save switch_user since ctx will be destroyed after call to start_profiler std::string const switch_user = ctx->params.switch_user; diff --git a/src/exporter/ddprof_exporter.cc b/src/exporter/ddprof_exporter.cc index a2dcb7d1a..b7ff8154a 100644 --- a/src/exporter/ddprof_exporter.cc +++ b/src/exporter/ddprof_exporter.cc @@ -186,6 +186,35 @@ DDRes fill_cycle_tags(const Tags &additional_tags, uint32_t profile_seq, } // namespace +std::string determine_agent_url(const ExporterInput &exporter_input) { + std::string port_str = exporter_input.port; + std::string agent_url; + + if (!exporter_input.url.empty()) { + // uds or already port -> no port + if (!strncasecmp(exporter_input.url.c_str(), "unix", 4) || + contains_port(exporter_input.url)) { + port_str = {}; + } + // check if schema is already available + if (strstr(exporter_input.url.c_str(), "://") != nullptr) { + agent_url = alloc_url_agent("", exporter_input.url, port_str); + } else if (exporter_input.url[0] == '/') { + // Starts with a '/', assume unix domain socket + agent_url = alloc_url_agent("unix://", exporter_input.url, {}); + } else { + // not available, assume http + agent_url = alloc_url_agent("http://", exporter_input.url, port_str); + } + } else { + // no url, use default host and port settings + agent_url = + alloc_url_agent("http://", exporter_input.host, exporter_input.port); + } + + return agent_url; +} + DDRes ddprof_exporter_init(const ExporterInput &exporter_input, DDProfExporter *exporter) { exporter->_input = exporter_input; @@ -202,30 +231,7 @@ DDRes ddprof_exporter_init(const ExporterInput &exporter_input, } if (exporter->_agent) { - std::string port_str = exporter_input.port; - - if (!exporter_input.url.empty()) { - // uds or already port -> no port - if (!strncasecmp(exporter_input.url.c_str(), "unix", 4) || - contains_port(exporter_input.url)) { - port_str = {}; - } - // check if schema is already available - if (strstr(exporter_input.url.c_str(), "://") != nullptr) { - exporter->_url = alloc_url_agent("", exporter_input.url, port_str); - } else if (exporter_input.url[0] == '/') { - // Starts with a '/', assume unix domain socket - exporter->_url = alloc_url_agent("unix://", exporter_input.url, {}); - } else { - // not available, assume http - exporter->_url = - alloc_url_agent("http://", exporter_input.url, port_str); - } - } else { - // no url, use default host and port settings - exporter->_url = - alloc_url_agent("http://", exporter_input.host, exporter_input.port); - } + exporter->_url = determine_agent_url(exporter_input); } else { // agentless mode if (!exporter->_input.url.empty()) { diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 3aabdbaf8..ba15eb0ee 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -290,14 +290,17 @@ std::span adjust_locations(const PerfWatcher *watcher, return locs; } +} // namespace + DDRes process_symbolization( std::span locs, const SymbolHdr &symbol_hdr, const FileInfoVector &file_infos, Symbolizer *symbolizer, - DDProfPProf *pprof, std::array &locations_buff, Symbolizer::BlazeResultsWrapper &session_results, unsigned &write_index) { unsigned index = 0; + const bool use_process_addresses = + symbolizer->reported_addr_format() == Symbolizer::k_process; const ddprof::SymbolTable &symbol_table = symbol_hdr._symbol_table; const ddprof::MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; @@ -309,9 +312,9 @@ DDRes process_symbolization( if (locs[index].symbol_idx != k_symbol_idx_null) { // Location already symbolized const FunLoc &loc = locs[index]; - write_location( - loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], - &locations_buff[write_index++], pprof->use_process_adresses); + write_location(loc, mapinfo_table[loc.map_info_idx], + symbol_table[loc.symbol_idx], + &locations_buff[write_index++], use_process_addresses); ++index; continue; } @@ -369,13 +372,11 @@ DDRes process_symbolization( const FunLoc &loc = locs.back(); write_location(loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], &locations_buff[write_index++], - pprof->use_process_adresses); + use_process_addresses); } return {}; } -} // namespace - DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx) { size_t const num_watchers = ctx.watchers.size(); @@ -520,7 +521,7 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, Symbolizer::BlazeResultsWrapper session_results; unsigned write_index = 0; DDRES_CHECK_FWD(process_symbolization(locs, symbol_hdr, file_infos, - symbolizer, pprof, locations_buff, + symbolizer, locations_buff, session_results, write_index)); std::array labels{}; // Create the labels for the sample. Two samples are the same only when diff --git a/src/stack_helper.cc b/src/stack_helper.cc index 0a58970f5..bcbd88c6c 100644 --- a/src/stack_helper.cc +++ b/src/stack_helper.cc @@ -8,9 +8,8 @@ namespace ddprof { // read a word from the given stack bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, - void *arg) { + UnwindState *us) { *result = 0; - auto *us = static_cast(arg); constexpr uint64_t k_zero_page_limit = 4096; if (addr < k_zero_page_limit) { diff --git a/src/user_override.cc b/src/user_override.cc index e466408ff..7e4eba1d3 100644 --- a/src/user_override.cc +++ b/src/user_override.cc @@ -9,13 +9,13 @@ #include "user_override.hpp" +#include "dumpable.hpp" #include "logger.hpp" #include #include #include #include -#include #include #include @@ -25,18 +25,6 @@ namespace { constexpr const char *const s_user_nobody = "nobody"; constexpr uid_t s_root_user = 0; -struct DumpableRestorer { -public: - DumpableRestorer() : _dumpable{prctl(PR_GET_DUMPABLE)} {} - ~DumpableRestorer() { prctl(PR_SET_DUMPABLE, _dumpable); } - - DumpableRestorer(const DumpableRestorer &) = delete; - DumpableRestorer operator=(const DumpableRestorer &) = delete; - -private: - int _dumpable; -}; - } // namespace bool is_root() { return getuid() == s_root_user; } diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 648b3217c..c4f6029a5 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -5,6 +5,7 @@ #include "exporter/ddprof_exporter.hpp" +#include "defer.hpp" #include "loghandle.hpp" #include "pevent_lib_mocks.hpp" #include "pprof/ddprof_pprof.hpp" @@ -159,6 +160,9 @@ TEST(DDProfExporter, simple) { MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfContext ctx = {}; + ctx.worker_ctx.symbolizer = new Symbolizer(); + defer { delete ctx.worker_ctx.symbolizer; }; + ctx.watchers.push_back(*ewatcher_from_str("sCPU")); res = pprof_create_profile(&pprofs, ctx); EXPECT_TRUE(IsDDResOK(res)); diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 517c21ec2..6658e1cdf 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -8,6 +8,7 @@ #include "ddog_profiling_utils.hpp" #include "ddprof_cmdline.hpp" #include "ddprof_cmdline_watcher.hpp" +#include "defer.hpp" #include "loghandle.hpp" #include "pevent_lib_mocks.hpp" #include "symbol_hdr.hpp" @@ -66,6 +67,8 @@ TEST(DDProfPProf, aggregate) { fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfPProf pprof; DDProfContext ctx = {}; + ctx.worker_ctx.symbolizer = new Symbolizer(); + defer { delete ctx.worker_ctx.symbolizer; }; bool ok = watchers_from_str("sCPU", ctx.watchers); EXPECT_TRUE(ok); @@ -94,6 +97,9 @@ TEST(DDProfPProf, just_live) { fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfPProf pprof; DDProfContext ctx = {}; + ctx.worker_ctx.symbolizer = new Symbolizer(); + defer { delete ctx.worker_ctx.symbolizer; }; + { bool ok = watchers_from_str("sDUM", ctx.watchers); EXPECT_TRUE(ok); diff --git a/tools/libdatadog_checksums.txt b/tools/libdatadog_checksums.txt index 83c4e1a09..ccac8c6e0 100644 --- a/tools/libdatadog_checksums.txt +++ b/tools/libdatadog_checksums.txt @@ -1,4 +1,4 @@ -09640a86f6beb5069c25ff7772f8b1a336d68f33dd94469711be9b98388fee0b libdatadog-aarch64-alpine-linux-musl.tar.gz -ff3c3f4650ac45d5700917026a84953b1d52c07652b834a12ada2f80c4cda47e libdatadog-aarch64-unknown-linux-gnu.tar.gz -17360251a8585450d8499ae5e323161a1486e487937b257571adc4c06a2a3688 libdatadog-x86_64-alpine-linux-musl.tar.gz -1856e9b209f3a414536930eea1a8d4c854539ccfbf395ba1740f0d8f71bc2938 libdatadog-x86_64-unknown-linux-gnu.tar.gz +c2c6c32f612e8c1682131e72bd50492d809bac973f516e56e163e797435eef75 libdatadog-aarch64-alpine-linux-musl.tar.gz +97c4fc46f92580b8929e8fcc3f51b47226836e29bce0b57ac8d3387a27a81ce1 libdatadog-aarch64-unknown-linux-gnu.tar.gz +68e67c5e87616f830289bc85626d2062277bef54694cc6dbb445105c66fe8885 libdatadog-x86_64-alpine-linux-musl.tar.gz +cd89cbe480db0b828a43afd161ddd83e57319dbe3d412fa4a2d096daae244595 libdatadog-x86_64-unknown-linux-gnu.tar.gz \ No newline at end of file