Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9cd3145
Import Nick's code
r1viollet Sep 22, 2022
8e567d9
Add a small test for dwarf unwinding
r1viollet Sep 22, 2022
050df2d
Compare between remote vs local unwinding
r1viollet Sep 26, 2022
8b097fc
Minor update on symbols
r1viollet Sep 26, 2022
7c7ca0a
wip
r1viollet Nov 16, 2022
d165250
Merge branch 'main' of github.com:DataDog/ddprof into r1viollet/nick_…
r1viollet Nov 16, 2022
a295903
WIP
r1viollet Nov 16, 2022
e81e633
Refactor and add a test on async profiler unwinding
r1viollet Nov 17, 2022
ed168f4
Minor notes on steps to take to improve unwinding
r1viollet Nov 17, 2022
fcda4a5
Create a remote unwinding test
r1viollet Nov 18, 2022
e4cff89
Add an async prof library
r1viollet Nov 19, 2022
e61cb47
Hacky version using the async profiler's unwinding
r1viollet Nov 19, 2022
78e0957
Adding a benchmark for the async profiler
r1viollet Nov 21, 2022
c0047e0
Prevent tail call optimisation
r1viollet Nov 22, 2022
6af2003
Minor change in comment
r1viollet Nov 23, 2022
15f3093
Remove the save context from the benchmark operation
r1viollet Nov 25, 2022
81b9d2d
Minor env fixes
r1viollet Nov 29, 2022
f562ce9
Minor fix for zsh
r1viollet Nov 29, 2022
e8989ab
Version allowing to run async profiler with ddprof
r1viollet Nov 30, 2022
6765810
Add a small loader tool to compare async profiler's load vs remote load.
r1viollet Dec 1, 2022
cfd7bd5
Work in progress
r1viollet Jan 4, 2023
a5cbca1
Async profiler
r1viollet Jan 6, 2023
495d0d3
Adjust the offset to the eh_frame section
r1viollet Jan 6, 2023
e8ce231
Add the show frames to debug unwinding issues
r1viollet Jan 9, 2023
a589266
Adjust max unwinding depth
r1viollet Jan 9, 2023
5554831
Add in binary information
r1viollet Jan 10, 2023
9d38887
Implement a basic red zone optim fix
r1viollet Jan 10, 2023
82f40ad
Lost sample throttling
r1viollet Jan 11, 2023
3052627
Add pthread to the async profiler compilation
r1viollet Jan 18, 2023
4b394f7
Ensure in whole host we are able to run using the /proc/<pid>/root li…
r1viollet Jan 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ endif()

# ---- Unit tests ----

aux_source_directory(${CMAKE_SOURCE_DIR}/src/async-profiler ASYNC_PROFILER_SRC)
set(ASYNC_PROFILER_INCLUDE ${CMAKE_SOURCE_DIR}/include/async-profiler)

# Unit tests Add infrastructure for enabling tests
option(BUILD_DDPROF_TESTING "Enable tests" ON)
if(${BUILD_DDPROF_TESTING})
Expand Down
139 changes: 139 additions & 0 deletions include/async-profiler/arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef _ARCH_H
#define _ARCH_H


typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;

static inline u64 atomicInc(volatile u64& var, u64 increment = 1) {
return __sync_fetch_and_add(&var, increment);
}

static inline int atomicInc(volatile int& var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
}

static inline u64 loadAcquire(u64& var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
}

static inline void storeRelease(u64& var, u64 value) {
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
}


#if defined(__x86_64__) || defined(__i386__)

typedef unsigned char instruction_t;
const instruction_t BREAKPOINT = 0xcc;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = 2;
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 1;
const int PLT_HEADER_SIZE = 16;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 8; // PERF_REG_X86_IP

#define spinPause() asm volatile("pause")
#define rmb() asm volatile("lfence" : : : "memory")
#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory")

#elif defined(__arm__) || defined(__thumb__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xe7f001f0;
const instruction_t BREAKPOINT_THUMB = 0xde01de01;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 20;
const int PLT_ENTRY_SIZE = 12;
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC

#define spinPause() asm volatile("yield")
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))

#elif defined(__aarch64__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xd4200000;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC

#define spinPause() asm volatile("isb")
#define rmb() asm volatile("dmb ish" : : : "memory")
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))

#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0x7fe00008;
// We place the break point in the third instruction slot on PPCLE as the first two are skipped if
// the call comes from within the same compilation unit according to the LE ABI.
const int BREAKPOINT_OFFSET = 8;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 2;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 24;
const int PLT_ENTRY_SIZE = 24;
const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP

#define spinPause() asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems
#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry
#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t))

#else

#error "Compiling on unsupported arch"

#endif


// Return address signing support.
// Apple M1 has 47 bit virtual addresses.
#if defined(__aarch64__) && defined(__APPLE__)
# define ADDRESS_BITS 47
# define WX_MEMORY true
#else
# define WX_MEMORY false
#endif

#ifdef ADDRESS_BITS
static inline const void* stripPointer(const void* p) {
return (const void*) ((unsigned long)p & ((1UL << ADDRESS_BITS) - 1));
}
#else
# define stripPointer(p) (p)
#endif


#endif // _ARCH_H
190 changes: 190 additions & 0 deletions include/async-profiler/codeCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef _CODECACHE_H
#define _CODECACHE_H

//#include <jvmti.h>


#define NO_MIN_ADDRESS ((const void*)-1)
#define NO_MAX_ADDRESS ((const void*)0)

typedef bool (*NamePredicate)(const char* name);

const int INITIAL_CODE_CACHE_CAPACITY = 1000;
const int MAX_NATIVE_LIBS = 2048;


class NativeFunc {
private:
short _lib_index;
char _mark;
char _reserved;
char _name[0];

static NativeFunc* from(const char* name) {
return (NativeFunc*)(name - sizeof(NativeFunc));
}

public:
static char* create(const char* name, short lib_index);
static void destroy(char* name);

static short libIndex(const char* name) {
return from(name)->_lib_index;
}

static bool isMarked(const char* name) {
return from(name)->_mark != 0;
}

static void mark(const char* name) {
from(name)->_mark = 1;
}
};


class CodeBlob {
public:
const void* _start;
const void* _end;
char* _name;

static int comparator(const void* c1, const void* c2) {
CodeBlob* cb1 = (CodeBlob*)c1;
CodeBlob* cb2 = (CodeBlob*)c2;
if (cb1->_start < cb2->_start) {
return -1;
} else if (cb1->_start > cb2->_start) {
return 1;
} else if (cb1->_end == cb2->_end) {
return 0;
} else {
return cb1->_end > cb2->_end ? -1 : 1;
}
}
};


class FrameDesc;

class CodeCache {
protected:
char* _name;
short _lib_index;
const void* _min_address;
const void* _max_address;
const char* _text_base;

void** _got_start;
void** _got_end;
bool _got_patchable;

FrameDesc* _dwarf_table;
int _dwarf_table_length;

int _capacity;
int _count;
CodeBlob* _blobs;

void expand();

public:
CodeCache(const char* name,
short lib_index = -1,
const void* min_address = NO_MIN_ADDRESS,
const void* max_address = NO_MAX_ADDRESS);

~CodeCache();

const char* name() const {
return _name;
}

const void* minAddress() const {
return _min_address;
}

const void* maxAddress() const {
return _max_address;
}

bool contains(const void* address) const {
return address >= _min_address && address < _max_address;
}

void setTextBase(const char* text_base) {
_text_base = text_base;
}

const char *getTextBase() {
return _text_base;
}

void** gotStart() const {
return _got_start;
}

void** gotEnd() const {
return _got_end;
}

void add(const void* start, int length, const char* name, bool update_bounds = false);
void updateBounds(const void* start, const void* end);
void sort();
void mark(NamePredicate predicate);

CodeBlob* find(const void* address);
const char* binarySearch(const void* address);
const void* findSymbol(const char* name);
const void* findSymbolByPrefix(const char* prefix);
const void* findSymbolByPrefix(const char* prefix, int prefix_len);

void setGlobalOffsetTable(void** start, void** end, bool patchable);
void** findGlobalOffsetEntry(void* address);
void makeGotPatchable();

void setDwarfTable(FrameDesc* table, int length);
FrameDesc* findFrameDesc(const void* pc);
};


class CodeCacheArray {
private:
CodeCache* _libs[MAX_NATIVE_LIBS];
int _count;

public:
CodeCacheArray() : _count(0) {
}

CodeCache* operator[](int index) {
return _libs[index];
}

int count() {
return __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
}

void add(CodeCache* lib) {
int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
_libs[index] = lib;
__atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE);
}
};

#endif // _CODECACHE_H
Loading