From 9cd314577bf70bda12a8231f194e1bed207386d0 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 22 Sep 2022 11:34:49 +0200 Subject: [PATCH 01/29] Import Nick's code --- CMakeLists.txt | 3 + include/async-profiler/arch.h | 139 ++++++ include/async-profiler/codeCache.h | 190 ++++++++ include/async-profiler/dwarf.h | 160 +++++++ include/async-profiler/mutex.h | 60 +++ include/async-profiler/os.h | 31 ++ include/async-profiler/safeAccess.h | 35 ++ include/async-profiler/stackFrame.h | 73 +++ include/async-profiler/stackWalker.h | 44 ++ include/async-profiler/symbols.h | 38 ++ src/async-profiler/LICENSE | 201 ++++++++ src/async-profiler/README.md | 13 + src/async-profiler/cgotraceback.cpp | 210 ++++++++ src/async-profiler/cgotraceback.go | 25 + src/async-profiler/codeCache.cpp | 228 +++++++++ src/async-profiler/dwarf.cpp | 351 ++++++++++++++ src/async-profiler/mutex.cpp | 46 ++ src/async-profiler/os.cpp | 6 + src/async-profiler/safeAccess.cpp | 78 +++ src/async-profiler/stackFrame_aarch64.cpp | 134 ++++++ src/async-profiler/stackFrame_arm.cpp | 104 ++++ src/async-profiler/stackFrame_i386.cpp | 116 +++++ src/async-profiler/stackFrame_ppc64.cpp | 133 ++++++ src/async-profiler/stackFrame_x64.cpp | 72 +++ src/async-profiler/stackWalker.cpp | 118 +++++ src/async-profiler/symbols_darwin.cpp | 158 ++++++ src/async-profiler/symbols_linux.cpp | 554 ++++++++++++++++++++++ src/perf.cc | 1 - test/CMakeLists.txt | 6 + 29 files changed, 3326 insertions(+), 1 deletion(-) create mode 100644 include/async-profiler/arch.h create mode 100644 include/async-profiler/codeCache.h create mode 100644 include/async-profiler/dwarf.h create mode 100644 include/async-profiler/mutex.h create mode 100644 include/async-profiler/os.h create mode 100644 include/async-profiler/safeAccess.h create mode 100644 include/async-profiler/stackFrame.h create mode 100644 include/async-profiler/stackWalker.h create mode 100644 include/async-profiler/symbols.h create mode 100644 src/async-profiler/LICENSE create mode 100644 src/async-profiler/README.md create mode 100644 src/async-profiler/cgotraceback.cpp create mode 100644 src/async-profiler/cgotraceback.go create mode 100644 src/async-profiler/codeCache.cpp create mode 100644 src/async-profiler/dwarf.cpp create mode 100644 src/async-profiler/mutex.cpp create mode 100644 src/async-profiler/os.cpp create mode 100644 src/async-profiler/safeAccess.cpp create mode 100644 src/async-profiler/stackFrame_aarch64.cpp create mode 100644 src/async-profiler/stackFrame_arm.cpp create mode 100644 src/async-profiler/stackFrame_i386.cpp create mode 100644 src/async-profiler/stackFrame_ppc64.cpp create mode 100644 src/async-profiler/stackFrame_x64.cpp create mode 100644 src/async-profiler/stackWalker.cpp create mode 100644 src/async-profiler/symbols_darwin.cpp create mode 100644 src/async-profiler/symbols_linux.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 84b522390..708fae8ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -311,6 +311,9 @@ endif() # ---- Unit tests ---- +aux_source_directory(${CMAKE_SOURCE_DIR}/src/async-profiler ASYNC_PROFILER_SRC) +set(ASYNC_PROFILER_INCLUDE ${CMAKE_SOURCE_DIR}/include/async-profiler) + # Unit tests Add infrastructure for enabling tests option(BUILD_DDPROF_TESTING "Enable tests" ON) if(${BUILD_DDPROF_TESTING}) diff --git a/include/async-profiler/arch.h b/include/async-profiler/arch.h new file mode 100644 index 000000000..5cedcbfd9 --- /dev/null +++ b/include/async-profiler/arch.h @@ -0,0 +1,139 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARCH_H +#define _ARCH_H + + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +static inline u64 atomicInc(volatile u64& var, u64 increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline int atomicInc(volatile int& var, int increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline u64 loadAcquire(u64& var) { + return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +} + +static inline void storeRelease(u64& var, u64 value) { + return __atomic_store_n(&var, value, __ATOMIC_RELEASE); +} + + +#if defined(__x86_64__) || defined(__i386__) + +typedef unsigned char instruction_t; +const instruction_t BREAKPOINT = 0xcc; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = 2; +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 1; +const int PLT_HEADER_SIZE = 16; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 8; // PERF_REG_X86_IP + +#define spinPause() asm volatile("pause") +#define rmb() asm volatile("lfence" : : : "memory") +#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory") + +#elif defined(__arm__) || defined(__thumb__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xe7f001f0; +const instruction_t BREAKPOINT_THUMB = 0xde01de01; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 20; +const int PLT_ENTRY_SIZE = 12; +const int PERF_REG_PC = 15; // PERF_REG_ARM_PC + +#define spinPause() asm volatile("yield") +#define rmb() asm volatile("dmb ish" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#elif defined(__aarch64__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xd4200000; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 32; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC + +#define spinPause() asm volatile("isb") +#define rmb() asm volatile("dmb ish" : : : "memory") +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0x7fe00008; +// We place the break point in the third instruction slot on PPCLE as the first two are skipped if +// the call comes from within the same compilation unit according to the LE ABI. +const int BREAKPOINT_OFFSET = 8; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 2; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 24; +const int PLT_ENTRY_SIZE = 24; +const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP + +#define spinPause() asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems +#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry +#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) + +#else + +#error "Compiling on unsupported arch" + +#endif + + +// Return address signing support. +// Apple M1 has 47 bit virtual addresses. +#if defined(__aarch64__) && defined(__APPLE__) +# define ADDRESS_BITS 47 +# define WX_MEMORY true +#else +# define WX_MEMORY false +#endif + +#ifdef ADDRESS_BITS +static inline const void* stripPointer(const void* p) { + return (const void*) ((unsigned long)p & ((1UL << ADDRESS_BITS) - 1)); +} +#else +# define stripPointer(p) (p) +#endif + + +#endif // _ARCH_H diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h new file mode 100644 index 000000000..83d8b6f21 --- /dev/null +++ b/include/async-profiler/codeCache.h @@ -0,0 +1,190 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _CODECACHE_H +#define _CODECACHE_H + +//#include + + +#define NO_MIN_ADDRESS ((const void*)-1) +#define NO_MAX_ADDRESS ((const void*)0) + +typedef bool (*NamePredicate)(const char* name); + +const int INITIAL_CODE_CACHE_CAPACITY = 1000; +const int MAX_NATIVE_LIBS = 2048; + + +class NativeFunc { + private: + short _lib_index; + char _mark; + char _reserved; + char _name[0]; + + static NativeFunc* from(const char* name) { + return (NativeFunc*)(name - sizeof(NativeFunc)); + } + + public: + static char* create(const char* name, short lib_index); + static void destroy(char* name); + + static short libIndex(const char* name) { + return from(name)->_lib_index; + } + + static bool isMarked(const char* name) { + return from(name)->_mark != 0; + } + + static void mark(const char* name) { + from(name)->_mark = 1; + } +}; + + +class CodeBlob { + public: + const void* _start; + const void* _end; + char* _name; + + static int comparator(const void* c1, const void* c2) { + CodeBlob* cb1 = (CodeBlob*)c1; + CodeBlob* cb2 = (CodeBlob*)c2; + if (cb1->_start < cb2->_start) { + return -1; + } else if (cb1->_start > cb2->_start) { + return 1; + } else if (cb1->_end == cb2->_end) { + return 0; + } else { + return cb1->_end > cb2->_end ? -1 : 1; + } + } +}; + + +class FrameDesc; + +class CodeCache { + protected: + char* _name; + short _lib_index; + const void* _min_address; + const void* _max_address; + const char* _text_base; + + void** _got_start; + void** _got_end; + bool _got_patchable; + + FrameDesc* _dwarf_table; + int _dwarf_table_length; + + int _capacity; + int _count; + CodeBlob* _blobs; + + void expand(); + + public: + CodeCache(const char* name, + short lib_index = -1, + const void* min_address = NO_MIN_ADDRESS, + const void* max_address = NO_MAX_ADDRESS); + + ~CodeCache(); + + const char* name() const { + return _name; + } + + const void* minAddress() const { + return _min_address; + } + + const void* maxAddress() const { + return _max_address; + } + + bool contains(const void* address) const { + return address >= _min_address && address < _max_address; + } + + void setTextBase(const char* text_base) { + _text_base = text_base; + } + + const char *getTextBase() { + return _text_base; + } + + void** gotStart() const { + return _got_start; + } + + void** gotEnd() const { + return _got_end; + } + + void add(const void* start, int length, const char* name, bool update_bounds = false); + void updateBounds(const void* start, const void* end); + void sort(); + void mark(NamePredicate predicate); + + CodeBlob* find(const void* address); + const char* binarySearch(const void* address); + const void* findSymbol(const char* name); + const void* findSymbolByPrefix(const char* prefix); + const void* findSymbolByPrefix(const char* prefix, int prefix_len); + + void setGlobalOffsetTable(void** start, void** end, bool patchable); + void** findGlobalOffsetEntry(void* address); + void makeGotPatchable(); + + void setDwarfTable(FrameDesc* table, int length); + FrameDesc* findFrameDesc(const void* pc); +}; + + +class CodeCacheArray { + private: + CodeCache* _libs[MAX_NATIVE_LIBS]; + int _count; + + public: + CodeCacheArray() : _count(0) { + } + + CodeCache* operator[](int index) { + return _libs[index]; + } + + int count() { + return __atomic_load_n(&_count, __ATOMIC_ACQUIRE); + } + + void add(CodeCache* lib) { + int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE); + _libs[index] = lib; + __atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE); + } +}; + +#endif // _CODECACHE_H diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h new file mode 100644 index 000000000..85956aea0 --- /dev/null +++ b/include/async-profiler/dwarf.h @@ -0,0 +1,160 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _DWARF_H +#define _DWARF_H + +#include +#include "arch.h" + + +#if defined(__x86_64__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 6; +const int DW_REG_SP = 7; +const int DW_REG_PC = 16; + +#elif defined(__i386__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 5; +const int DW_REG_SP = 4; +const int DW_REG_PC = 8; + +#else + +#define DWARF_SUPPORTED false + +const int DW_REG_FP = 0; +const int DW_REG_SP = 1; +const int DW_REG_PC = 2; + +#endif + +const int DW_REG_PLT = 128; // denotes special rule for PLT entries +const int DW_REG_INVALID = 255; // denotes unsupported configuration + +const int DW_PC_OFFSET = 1; +const int DW_SAME_FP = 0x80000000; +const int DW_STACK_SLOT = sizeof(void*); + + +struct FrameDesc { + u32 loc; + int cfa; + int fp_off; + + static FrameDesc default_frame; + + static int comparator(const void* p1, const void* p2) { + FrameDesc* fd1 = (FrameDesc*)p1; + FrameDesc* fd2 = (FrameDesc*)p2; + return (int)(fd1->loc - fd2->loc); + } +}; + + +class DwarfParser { + private: + const char* _name; + const char* _image_base; + const char* _ptr; + + int _capacity; + int _count; + FrameDesc* _table; + FrameDesc* _prev; + + u32 _code_align; + int _data_align; + + const char* add(size_t size) { + const char* ptr = _ptr; + _ptr = ptr + size; + return ptr; + } + + u8 get8() { + return *_ptr++; + } + + u16 get16() { + return *(u16*)add(2); + } + + u32 get32() { + return *(u32*)add(4); + } + + u32 getLeb() { + u32 result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + } + + int getSLeb() { + int result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= -1 << shift; + } + return result; + } + } + } + + void skipLeb() { + while (*_ptr++ & 0x80) {} + } + + const char* getPtr() { + const char* ptr = _ptr; + return ptr + *(int*)add(4); + } + + void parse(const char* eh_frame_hdr); + void parseCie(); + void parseFde(); + void parseInstructions(u32 loc, const char* end); + int parseExpression(); + + void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off); + FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off); + + public: + DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); + + FrameDesc* table() const { + return _table; + } + + int count() const { + return _count; + } +}; + +#endif // _DWARF_H diff --git a/include/async-profiler/mutex.h b/include/async-profiler/mutex.h new file mode 100644 index 000000000..48e69205c --- /dev/null +++ b/include/async-profiler/mutex.h @@ -0,0 +1,60 @@ +/* + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _MUTEX_H +#define _MUTEX_H + +#include +#include "arch.h" + + +class Mutex { + protected: + pthread_mutex_t _mutex; + + public: + Mutex(); + + void lock(); + void unlock(); +}; + +class WaitableMutex : public Mutex { + protected: + pthread_cond_t _cond; + + public: + WaitableMutex(); + + bool waitUntil(u64 wall_time); + void notify(); +}; + +class MutexLocker { + private: + Mutex* _mutex; + + public: + MutexLocker(Mutex& mutex) : _mutex(&mutex) { + _mutex->lock(); + } + + ~MutexLocker() { + _mutex->unlock(); + } +}; + +#endif // _MUTEX_H diff --git a/include/async-profiler/os.h b/include/async-profiler/os.h new file mode 100644 index 000000000..ad35dc8fa --- /dev/null +++ b/include/async-profiler/os.h @@ -0,0 +1,31 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifndef _OS_H +#define _OS_H + +#include + +class OS { + public: + static const size_t page_size; + static const size_t page_mask; +}; + +#endif // _OS_H diff --git a/include/async-profiler/safeAccess.h b/include/async-profiler/safeAccess.h new file mode 100644 index 000000000..8afe71571 --- /dev/null +++ b/include/async-profiler/safeAccess.h @@ -0,0 +1,35 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SAFEACCESS_H +#define _SAFEACCESS_H + +#include +#include "arch.h" + +#ifdef __clang__ +# define NOINLINE __attribute__((noinline)) +#else +# define NOINLINE __attribute__((noinline,noclone)) +#endif + +namespace SafeAccess { + +NOINLINE __attribute__((aligned(16))) void* load(void** ptr); + +} + +#endif // _SAFEACCESS_H diff --git a/include/async-profiler/stackFrame.h b/include/async-profiler/stackFrame.h new file mode 100644 index 000000000..b658e10fe --- /dev/null +++ b/include/async-profiler/stackFrame.h @@ -0,0 +1,73 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _STACKFRAME_H +#define _STACKFRAME_H + +#include +#include +#include "arch.h" + + +class StackFrame { + private: + ucontext_t* _ucontext; + + static bool withinCurrentStack(uintptr_t address) { + // Check that the address is not too far from the stack pointer of current context + void* real_sp; + return address - (uintptr_t)&real_sp <= 0xffff; + } + + public: + StackFrame(void* ucontext) { + _ucontext = (ucontext_t*)ucontext; + } + + void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) { + if (_ucontext != NULL) { + pc() = saved_pc; + sp() = saved_sp; + fp() = saved_fp; + } + } + + uintptr_t stackAt(int slot) { + return ((uintptr_t*)sp())[slot]; + } + + uintptr_t& pc(); + uintptr_t& sp(); + uintptr_t& fp(); + + uintptr_t& retval(); + uintptr_t arg0(); + uintptr_t arg1(); + uintptr_t arg2(); + uintptr_t arg3(); + + void ret(); + + bool popStub(instruction_t* entry, const char* name); + bool popMethod(instruction_t* entry); + + bool checkInterruptedSyscall(); + + // Check if PC points to a syscall instruction + static bool isSyscall(instruction_t* pc); +}; + +#endif // _STACKFRAME_H diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h new file mode 100644 index 000000000..a16e1f48f --- /dev/null +++ b/include/async-profiler/stackWalker.h @@ -0,0 +1,44 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifndef _STACKWALKER_H +#define _STACKWALKER_H + +#include + +#include "codeCache.h" + +struct StackContext { + const void* pc; + uintptr_t sp; + uintptr_t fp; + + void set(const void* pc, uintptr_t sp, uintptr_t fp) { + this->pc = pc; + this->sp = sp; + this->fp = fp; + } +}; + +class StackWalker { + public: + static int walkDwarf(CodeCacheArray *cache, void* ucontext, const void** callchain, int max_depth, int skip); +}; + +#endif // _STACKWALKER_H diff --git a/include/async-profiler/symbols.h b/include/async-profiler/symbols.h new file mode 100644 index 000000000..084376c2d --- /dev/null +++ b/include/async-profiler/symbols.h @@ -0,0 +1,38 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SYMBOLS_H +#define _SYMBOLS_H + +#include "codeCache.h" +#include "mutex.h" + + +class Symbols { + private: + static Mutex _parse_lock; + static bool _have_kernel_symbols; + + public: + static void parseKernelSymbols(CodeCache* cc); + static void parseLibraries(CodeCacheArray* array, bool kernel_symbols); + + static bool haveKernelSymbols() { + return _have_kernel_symbols; + } +}; + +#endif // _SYMBOLS_H diff --git a/src/async-profiler/LICENSE b/src/async-profiler/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/src/async-profiler/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/async-profiler/README.md b/src/async-profiler/README.md new file mode 100644 index 000000000..56d70eb80 --- /dev/null +++ b/src/async-profiler/README.md @@ -0,0 +1,13 @@ +This directory contains a modified subset of the code from the +[async-profiler](https://github.com/jvm-profiling-tools/async-profiler) project +needed to implement async signal-safe DWARF call stack unwinding. This code +came from commit [56ae519224ed9a9b081fd8c384326784326fae43](https://github.com/jvm-profiling-tools/async-profiler/commit/56ae519224ed9a9b081fd8c384326784326fae43) + +The following changes have been made to the original code: + +* Anything not directly related to DWARF call stack unwinding has been removed. +* Java-related components of the call stack unwinding code have been removed. +* The `Profiler` class has been removed and its `getNativeFrames` method has + been extracted to a stand-alone `async_profiler_backtrace` function. Its + `CodeCacheArray` has been made into a global variable (wrapped by a singleton). +* The SEGV handler functionality is not used. \ No newline at end of file diff --git a/src/async-profiler/cgotraceback.cpp b/src/async-profiler/cgotraceback.cpp new file mode 100644 index 000000000..511cc86a1 --- /dev/null +++ b/src/async-profiler/cgotraceback.cpp @@ -0,0 +1,210 @@ +#include +#include + +#include "codeCache.h" +#include "stackWalker.h" +#include "symbols.h" + +struct CodeCacheArraySingleton { + static CodeCacheArray *getInstance(); + static CodeCacheArray *instance; +}; + +CodeCacheArray *CodeCacheArraySingleton::instance = nullptr; +CodeCacheArray *CodeCacheArraySingleton::getInstance() { + // XXX(nick): I don't know that I need to care about concurrency. This + // should be read-only once init() is called. + if (instance == nullptr) { + instance = new CodeCacheArray(); + } + return instance; +} + +static CodeBlob *asmcgocall_bounds = nullptr; +static uintptr_t asmcgocall_base = 0; + +static __attribute__((constructor)) void init(void) { + auto a = CodeCacheArraySingleton::getInstance(); + Symbols::parseLibraries(a, false); + + int count = a->count(); + for (int i = 0; i < count; i++) { + CodeCache *c = a->operator[](i); + const void *p = NULL; + p = c->findSymbol("runtime.asmcgocall.abi0"); + if (p == nullptr) { + // amscgocall name has "abi0" suffix on more recent Go versions + // but not on older versions + p = c->findSymbol("runtime.asmcgocall"); + } + if (p == nullptr) { + continue; + } + auto cb = c->find(p); + if (cb != nullptr) { + asmcgocall_bounds = cb; + asmcgocall_base = (uintptr_t) c->getTextBase(); + } + } +} + +void populateStackContext(StackContext &sc, void *ucontext); +int stackWalk(CodeCacheArray *cache, StackContext &sc, const void** callchain, int max_depth, int skip); +bool stepStackContext(StackContext &sc, CodeCacheArray *cache); + +extern "C" { + +static int enabled = 1; + +// for benchmarking +void async_cgo_traceback_internal_set_enabled(int value) { + enabled = value; +} + +#define STACK_MAX 32 + +struct cgo_context { + const void *pc; + uintptr_t sp; + uintptr_t fp; + uintptr_t stack[STACK_MAX]; + int cached; + int inuse; +}; + +// There may be multiple C->Go transitions for a single C tread, so we have a +// per-thread free list of contexts. +// +// Thread-local storage for the context list is safe. A context will be taken +// from the list when a C thread transitions to Go, and that context will be +// released as soon as the Go call returns. Thus the thread that the context +// came from will be alive the entire time the context is in use. +#define cgo_contexts_length 256 +static __thread struct cgo_context cgo_contexts[cgo_contexts_length]; + +// XXX: The runtime.SetCgoTraceback docs claim that cgo_context can be called +// from a signal handler. I know in practice that doesn't happen but maybe it +// could in the future. If so, can we make sure that accessing this list of +// cgo_contexts is signal safe? + +static struct cgo_context *cgo_context_get(void) { + for (int i = 0; i < cgo_contexts_length; i++) { + if (cgo_contexts[i].inuse == 0) { + cgo_contexts[i].inuse = 1; + cgo_contexts[i].cached = 0; + return &cgo_contexts[i]; + } + } + return NULL; +} + +static void cgo_context_release(struct cgo_context *c) { + c->inuse = 0; +} + +// truncate_asmcgocall truncates a call stack after asmcgocall, if asmcgocall is +// present in the stack. This function is the first function in the C call stack +// for a Go -> C call, and it is not the responsibility of this library to +// unwind past that function. +static void truncate_asmcgocall(void **stack, int size) { + if (asmcgocall_bounds == nullptr) { + return; + } + for (int i = 0; i < size; i++) { + uintptr_t a = (uintptr_t) stack[i]; + a += asmcgocall_base; + if ((a >= (uintptr_t) asmcgocall_bounds->_start) && (a <= (uintptr_t) asmcgocall_bounds->_end)) { + if ((i + 1) < size) { + // zero out the thing AFTER asmcgocall. We want to stop at + // asmcgocall since that's the "top" of the C stack in a + // Go -> C (-> Go) call + stack[i + 1] = 0; + return; + } + } + } +} + +struct cgo_context_arg { + uintptr_t p; +}; + +void async_cgo_context(void *p) { + if (enabled == 0) { + return; + } + + cgo_context_arg *arg = (cgo_context_arg *)p; + struct cgo_context *ctx = (struct cgo_context *) arg->p; + if (ctx != NULL) { + cgo_context_release(ctx); + return; + } + ctx = cgo_context_get(); + if (ctx == NULL) { + return; + } + StackContext sc; + populateStackContext(sc, nullptr); + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + // There are two frames in the call stack we should skip. The first is this + // function, and the second is _cgo_wait_runtime_init_done, which calls this + // function to save the C call stack context before calling into Go code. + // The next frame after that is the exported C->Go function, which is where + // unwinding should begin for this context in the traceback function. + stepStackContext(sc, cache); + stepStackContext(sc, cache); + ctx->pc = sc.pc; + ctx->sp = sc.sp; + ctx->fp = sc.fp; + arg->p = (uintptr_t) ctx; + return; +} + +struct cgo_traceback_arg { + uintptr_t context; + uintptr_t sig_context; + uintptr_t* buf; + uintptr_t max; +}; + +void async_cgo_traceback(void *p) { + if (enabled == 0) { + return; + } + + struct cgo_traceback_arg *arg = (struct cgo_traceback_arg *)p; + struct cgo_context *ctx = NULL; + StackContext sc; + + // If we had a previous context, then we're being called to unwind some + // previous C portion of a mixed C/Go call stack. We use the call stack + // information saved in the context. + if (arg->context != 0) { + ctx = (struct cgo_context *) arg->context; + if (ctx->cached == 0) { + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + sc.pc = ctx->pc; + sc.sp = ctx->sp; + sc.fp = ctx->fp; + int n = stackWalk(cache, sc, (const void **) ctx->stack, STACK_MAX, 0); + truncate_asmcgocall((void **) ctx->stack, n); + ctx->cached = 1; + } + uintptr_t n = (arg->max < STACK_MAX) ? arg->max : STACK_MAX; + memcpy(arg->buf, ctx->stack, n * sizeof(uintptr_t)); + return; + } + + populateStackContext(sc, (void *) arg->sig_context); + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + int n = stackWalk(cache, sc, (const void **) arg->buf, arg->max, 0); + if (n < arg->max) { + arg->buf[n] = 0; + } + truncate_asmcgocall((void **) arg->buf, n); + + return; +} + +} // extern "C" \ No newline at end of file diff --git a/src/async-profiler/cgotraceback.go b/src/async-profiler/cgotraceback.go new file mode 100644 index 000000000..966312db0 --- /dev/null +++ b/src/async-profiler/cgotraceback.go @@ -0,0 +1,25 @@ +package asyncprofiler + +/* +#cgo CXXFLAGS: -fno-omit-frame-pointer -g -O2 -std=c++11 +#cgo darwin CXXFLAGS: -D_XOPEN_SOURCE + +extern void async_cgo_context(void *); +extern void async_cgo_traceback(void *); +extern void async_cgo_traceback_internal_set_enabled(int); +*/ +import "C" +import "unsafe" + +var ( + CgoContext = unsafe.Pointer(C.async_cgo_context) + CgoTraceback = unsafe.Pointer(C.async_cgo_traceback) +) + +func SetEnabled(status bool) { + var enabled C.int + if status { + enabled = 1 + } + C.async_cgo_traceback_internal_set_enabled(enabled) +} diff --git a/src/async-profiler/codeCache.cpp b/src/async-profiler/codeCache.cpp new file mode 100644 index 000000000..c1cec91f9 --- /dev/null +++ b/src/async-profiler/codeCache.cpp @@ -0,0 +1,228 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2016 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#include +#include +#include +#include +#include "codeCache.h" +#include "dwarf.h" +#include "os.h" + + +char* NativeFunc::create(const char* name, short lib_index) { + NativeFunc* f = (NativeFunc*)malloc(sizeof(NativeFunc) + 1 + strlen(name)); + f->_lib_index = lib_index; + f->_mark = 0; + return strcpy(f->_name, name); +} + +void NativeFunc::destroy(char* name) { + free(from(name)); +} + + +CodeCache::CodeCache(const char* name, short lib_index, const void* min_address, const void* max_address) { + _name = NativeFunc::create(name, -1); + _lib_index = lib_index; + _min_address = min_address; + _max_address = max_address; + _text_base = NULL; + + _got_start = NULL; + _got_end = NULL; + _got_patchable = false; + + _dwarf_table = NULL; + _dwarf_table_length = 0; + + _capacity = INITIAL_CODE_CACHE_CAPACITY; + _count = 0; + _blobs = new CodeBlob[_capacity]; +} + +CodeCache::~CodeCache() { + for (int i = 0; i < _count; i++) { + NativeFunc::destroy(_blobs[i]._name); + } + NativeFunc::destroy(_name); + delete[] _blobs; + free(_dwarf_table); +} + +void CodeCache::expand() { + CodeBlob* old_blobs = _blobs; + CodeBlob* new_blobs = new CodeBlob[_capacity * 2]; + + memcpy(new_blobs, old_blobs, _count * sizeof(CodeBlob)); + + _capacity *= 2; + _blobs = new_blobs; + delete[] old_blobs; +} + +void CodeCache::add(const void* start, int length, const char* name, bool update_bounds) { + char* name_copy = NativeFunc::create(name, _lib_index); + // Replace non-printable characters + for (char* s = name_copy; *s != 0; s++) { + if (*s < ' ') *s = '?'; + } + + if (_count >= _capacity) { + expand(); + } + + const void* end = (const char*)start + length; + _blobs[_count]._start = start; + _blobs[_count]._end = end; + _blobs[_count]._name = name_copy; + _count++; + + if (update_bounds) { + updateBounds(start, end); + } +} + +void CodeCache::updateBounds(const void* start, const void* end) { + if (start < _min_address) _min_address = start; + if (end > _max_address) _max_address = end; +} + +void CodeCache::sort() { + if (_count == 0) return; + + qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator); + + if (_min_address == NO_MIN_ADDRESS) _min_address = _blobs[0]._start; + if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end; +} + +void CodeCache::mark(NamePredicate predicate) { + for (int i = 0; i < _count; i++) { + const char* blob_name = _blobs[i]._name; + if (blob_name != NULL && predicate(blob_name)) { + NativeFunc::mark(blob_name); + } + } +} + +CodeBlob* CodeCache::find(const void* address) { + for (int i = 0; i < _count; i++) { + if (address >= _blobs[i]._start && address < _blobs[i]._end) { + return &_blobs[i]; + } + } + return NULL; +} + +const char* CodeCache::binarySearch(const void* address) { + int low = 0; + int high = _count - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_blobs[mid]._end <= address) { + low = mid + 1; + } else if (_blobs[mid]._start > address) { + high = mid - 1; + } else { + return _blobs[mid]._name; + } + } + + // Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code. + // Also, in some cases (endless loop) the return address may point beyond the function. + if (low > 0 && (_blobs[low - 1]._start == _blobs[low - 1]._end || _blobs[low - 1]._end == address)) { + return _blobs[low - 1]._name; + } + return _name; +} + +const void* CodeCache::findSymbol(const char* name) { + for (int i = 0; i < _count; i++) { + const char* blob_name = _blobs[i]._name; + if (blob_name != NULL && strcmp(blob_name, name) == 0) { + return _blobs[i]._start; + } + } + return NULL; +} + +const void* CodeCache::findSymbolByPrefix(const char* prefix) { + return findSymbolByPrefix(prefix, strlen(prefix)); +} + +const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) { + for (int i = 0; i < _count; i++) { + const char* blob_name = _blobs[i]._name; + if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) { + return _blobs[i]._start; + } + } + return NULL; +} + +void CodeCache::setGlobalOffsetTable(void** start, void** end, bool patchable) { + _got_start = start; + _got_end = end; + _got_patchable = patchable; +} + +void** CodeCache::findGlobalOffsetEntry(void* address) { + for (void** entry = _got_start; entry < _got_end; entry++) { + if (*entry == address) { + makeGotPatchable(); + return entry; + } + } + return NULL; +} + +void CodeCache::makeGotPatchable() { + if (!_got_patchable) { + uintptr_t got_start = (uintptr_t)_got_start & ~OS::page_mask; + uintptr_t got_size = ((uintptr_t)_got_end - got_start + OS::page_mask) & ~OS::page_mask; + mprotect((void*)got_start, got_size, PROT_READ | PROT_WRITE); + _got_patchable = true; + } +} + +void CodeCache::setDwarfTable(FrameDesc* table, int length) { + _dwarf_table = table; + _dwarf_table_length = length; +} + +FrameDesc* CodeCache::findFrameDesc(const void* pc) { + u32 target_loc = (const char*)pc - _text_base; + int low = 0; + int high = _dwarf_table_length - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_dwarf_table[mid].loc < target_loc) { + low = mid + 1; + } else if (_dwarf_table[mid].loc > target_loc) { + high = mid - 1; + } else { + return &_dwarf_table[mid]; + } + } + + return low > 0 ? &_dwarf_table[low - 1] : NULL; +} diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp new file mode 100644 index 000000000..7fb22d0d5 --- /dev/null +++ b/src/async-profiler/dwarf.cpp @@ -0,0 +1,351 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#include +#include "dwarf.h" + + +enum { + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xa, + DW_CFA_restore_state = 0xb, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_register = 0xd, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_def_cfa_expression = 0xf, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_GNU_args_size = 0x2e, + + DW_CFA_advance_loc = 0x1, + DW_CFA_offset = 0x2, + DW_CFA_restore = 0x3, +}; + +enum { + DW_OP_breg_pc = 0x70 + DW_REG_PC, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_minus = 0x1c, + DW_OP_plus = 0x22, +}; + + +FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, -2 * DW_STACK_SLOT}; + + +DwarfParser::DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr) { + _name = name; + _image_base = image_base; + + _capacity = 128; + _count = 0; + _table = (FrameDesc*)malloc(_capacity * sizeof(FrameDesc)); + _prev = NULL; + + _code_align = sizeof(instruction_t); + _data_align = -(int)sizeof(void*); + + parse(eh_frame_hdr); +} + +void DwarfParser::parse(const char* eh_frame_hdr) { + u8 version = eh_frame_hdr[0]; + u8 eh_frame_ptr_enc = eh_frame_hdr[1]; + u8 fde_count_enc = eh_frame_hdr[2]; + u8 table_enc = eh_frame_hdr[3]; + + if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { + return; + } + + int fde_count = *(int*)(eh_frame_hdr + 8); + int* table = (int*)(eh_frame_hdr + 16); + for (int i = 0; i < fde_count; i++) { + _ptr = eh_frame_hdr + table[i * 2]; + parseFde(); + } +} + +void DwarfParser::parseCie() { + u32 cie_len = get32(); + if (cie_len == 0 || cie_len == 0xffffffff) { + return; + } + + const char* cie_start = _ptr; + _ptr += 5; + while (*_ptr++) {} + _code_align = getLeb(); + _data_align = getSLeb(); + _ptr = cie_start + cie_len; +} + +void DwarfParser::parseFde() { + u32 fde_len = get32(); + if (fde_len == 0 || fde_len == 0xffffffff) { + return; + } + + const char* fde_start = _ptr; + u32 cie_offset = get32(); + if (_count == 0) { + _ptr = fde_start - cie_offset; + parseCie(); + _ptr = fde_start + 4; + } + + u32 range_start = getPtr() - _image_base; + u32 range_len = get32(); + _ptr += getLeb(); + parseInstructions(range_start, fde_start + fde_len); + addRecord(range_start + range_len, DW_REG_SP, DW_STACK_SLOT, DW_SAME_FP); +} + +void DwarfParser::parseInstructions(u32 loc, const char* end) { + const u32 code_align = _code_align; + const int data_align = _data_align; + + u32 cfa_reg = DW_REG_SP; + int cfa_off = DW_STACK_SLOT; + int fp_off = DW_SAME_FP; + int pc_off = -DW_STACK_SLOT; + + u32 rem_cfa_reg; + int rem_cfa_off; + int rem_fp_off; + int rem_pc_off; + + while (_ptr < end) { + u8 op = get8(); + switch (op >> 6) { + case 0: + switch (op) { + case DW_CFA_nop: + case DW_CFA_set_loc: + _ptr = end; + break; + case DW_CFA_advance_loc1: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get8() * code_align; + break; + case DW_CFA_advance_loc2: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get16() * code_align; + break; + case DW_CFA_advance_loc4: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get32() * code_align; + break; + case DW_CFA_offset_extended: + switch (getLeb()) { + case DW_REG_FP: fp_off = getLeb() * data_align; break; + case DW_REG_PC: pc_off = getLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + skipLeb(); + break; + case DW_CFA_register: + skipLeb(); + skipLeb(); + break; + case DW_CFA_remember_state: + rem_cfa_reg = cfa_reg; + rem_cfa_off = cfa_off; + rem_fp_off = fp_off; + rem_pc_off = pc_off; + break; + case DW_CFA_restore_state: + cfa_reg = rem_cfa_reg; + cfa_off = rem_cfa_off; + fp_off = rem_fp_off; + pc_off = rem_pc_off; + break; + case DW_CFA_def_cfa: + cfa_reg = getLeb(); + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_register: + cfa_reg = getLeb(); + break; + case DW_CFA_def_cfa_offset: + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_expression: { + u32 len = getLeb(); + cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; + cfa_off = DW_STACK_SLOT; + _ptr += len; + break; + } + case DW_CFA_expression: + skipLeb(); + _ptr += getLeb(); + break; + case DW_CFA_offset_extended_sf: + switch (getLeb()) { + case DW_REG_FP: fp_off = getSLeb() * data_align; break; + case DW_REG_PC: pc_off = getSLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_def_cfa_sf: + cfa_reg = getLeb(); + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_def_cfa_offset_sf: + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_val_offset: + case DW_CFA_val_offset_sf: + skipLeb(); + skipLeb(); + break; + case DW_CFA_val_expression: + if (getLeb() == DW_REG_PC) { + int pc_off = parseExpression(); + if (pc_off != 0) { + fp_off = DW_PC_OFFSET | (pc_off << 1); + } + } else { + _ptr += getLeb(); + } + break; + case DW_CFA_GNU_args_size: + skipLeb(); + break; + default: + return; + } + break; + case DW_CFA_advance_loc: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += (op & 0x3f) * code_align; + break; + case DW_CFA_offset: + switch (op & 0x3f) { + case DW_REG_FP: fp_off = getLeb() * data_align; break; + case DW_REG_PC: pc_off = getLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_restore: + break; + } + } + + addRecord(loc, cfa_reg, cfa_off, fp_off); +} + +// Parse a limited subset of DWARF expressions, which is used in DW_CFA_val_expression +// to point to the previous PC relative to the current PC. +// Returns the offset of the previous PC from the current PC. +int DwarfParser::parseExpression() { + int pc_off = 0; + int tos = 0; + + u32 len = getLeb(); + const char* end = _ptr + len; + + while (_ptr < end) { + u8 op = get8(); + switch (op) { + case DW_OP_breg_pc: + pc_off = getSLeb(); + break; + case DW_OP_const1u: + tos = get8(); + break; + case DW_OP_const1s: + tos = (signed char)get8(); + break; + case DW_OP_const2u: + tos = get16(); + break; + case DW_OP_const2s: + tos = (short)get16(); + break; + case DW_OP_const4u: + case DW_OP_const4s: + tos = get32(); + break; + case DW_OP_constu: + tos = getLeb(); + break; + case DW_OP_consts: + tos = getSLeb(); + break; + case DW_OP_minus: + pc_off -= tos; + break; + case DW_OP_plus: + pc_off += tos; + break; + default: + _ptr = end; + return 0; + } + } + + return pc_off; +} + +void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off) { + int cfa = cfa_reg | cfa_off << 8; + if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || _prev->cfa != cfa || _prev->fp_off != fp_off) { + _prev = addRecordRaw(loc, cfa, fp_off); + } +} + +FrameDesc* DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off) { + if (_count >= _capacity) { + _capacity *= 2; + _table = (FrameDesc*)realloc(_table, _capacity * sizeof(FrameDesc)); + } + + FrameDesc* f = &_table[_count++]; + f->loc = loc; + f->cfa = cfa; + f->fp_off = fp_off; + return f; +} diff --git a/src/async-profiler/mutex.cpp b/src/async-profiler/mutex.cpp new file mode 100644 index 000000000..b15f23b2f --- /dev/null +++ b/src/async-profiler/mutex.cpp @@ -0,0 +1,46 @@ +/* + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mutex.h" + + +Mutex::Mutex() { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&_mutex, &attr); +} + +void Mutex::lock() { + pthread_mutex_lock(&_mutex); +} + +void Mutex::unlock() { + pthread_mutex_unlock(&_mutex); +} + +WaitableMutex::WaitableMutex() : Mutex() { + pthread_cond_init(&_cond, NULL); +} + +bool WaitableMutex::waitUntil(u64 wall_time) { + struct timespec ts = {(time_t)(wall_time / 1000000), (long)(wall_time % 1000000) * 1000}; + return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0; +} + +void WaitableMutex::notify() { + pthread_cond_signal(&_cond); +} diff --git a/src/async-profiler/os.cpp b/src/async-profiler/os.cpp new file mode 100644 index 000000000..d6b2510cc --- /dev/null +++ b/src/async-profiler/os.cpp @@ -0,0 +1,6 @@ +#include + +#include "os.h" + +const size_t OS::page_size = sysconf(_SC_PAGESIZE); +const size_t OS::page_mask = OS::page_size - 1; \ No newline at end of file diff --git a/src/async-profiler/safeAccess.cpp b/src/async-profiler/safeAccess.cpp new file mode 100644 index 000000000..d0858aa1f --- /dev/null +++ b/src/async-profiler/safeAccess.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#include "safeAccess.h" +#include "stackFrame.h" + +static struct sigaction oldact; + +namespace SafeAccess { + +NOINLINE __attribute__((aligned(16))) void* load(void** ptr) { + return *ptr; +} + +// skipFaultInstruction returns the address of the instruction immediately +// following the given instruction. pc is assumed to point to the same kind of +// load that SafeAccess::load would use +static uintptr_t skipFaultInstruction(uintptr_t pc) { +#if defined(__x86_64__) + return *(u16*)pc == 0x8b48 ? 3 : 0; // mov rax, [reg] +#elif defined(__i386__) + return *(u8*)pc == 0x8b ? 2 : 0; // mov eax, [reg] +#elif defined(__arm__) || defined(__thumb__) + return (*(instruction_t*)pc & 0x0e50f000) == 0x04100000 ? 4 : 0; // ldr r0, [reg] +#elif defined(__aarch64__) + return (*(instruction_t*)pc & 0xffc0001f) == 0xf9400000 ? 4 : 0; // ldr x0, [reg] +#else + return sizeof(instruction_t); +#endif +} + +} + +static void segv_handler(int sig, siginfo_t *si, void *ucontext) { + ucontext_t *uc = (ucontext_t *)ucontext; + StackFrame frame(uc); + + // If we segfault in the SafeAccess::load, skip past the bad access and + // set the return value to 0. + // + // We have to check if we are *near* the beginning of load, since there will + // be a few instructions (for frame pointer setup) before the actual bad + // access + if ((frame.pc() - (uintptr_t) SafeAccess::load) < 16) { + uintptr_t instructionEncodedLength = SafeAccess::skipFaultInstruction(frame.pc()); + frame.pc() += instructionEncodedLength; + frame.retval() = 0x0; + return; + } + + // fall back otherwise + if (oldact.sa_sigaction != nullptr) { + oldact.sa_sigaction(sig, si, ucontext); + } else if (oldact.sa_handler != nullptr) { + oldact.sa_handler(sig); + } else { + // If there wasn't a fallback, re-set to the default handler + // (which just aborts the program) and re-raise the signal + struct sigaction sa; + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_handler = SIG_DFL; + sigaction(sig, &sa, nullptr); + raise(sig); + } +} + +__attribute__ ((constructor)) static void init(void) { + struct sigaction sa; + memset(&oldact, 0, sizeof(struct sigaction)); + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; + + sigaction(SIGSEGV, &sa, &oldact); +} \ No newline at end of file diff --git a/src/async-profiler/stackFrame_aarch64.cpp b/src/async-profiler/stackFrame_aarch64.cpp new file mode 100644 index 000000000..1054691be --- /dev/null +++ b/src/async-profiler/stackFrame_aarch64.cpp @@ -0,0 +1,134 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __aarch64__ + +#include +#include +#include +#include "stackFrame.h" + + +#ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +#else +# define REG(l, m) _ucontext->uc_mcontext.l +#endif + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)REG(pc, pc); +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(sp, sp); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(regs[29], fp); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(regs[0], x[0]); +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(regs[0], x[0]); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(regs[1], x[1]); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(regs[2], x[2]); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(regs[3], x[3]); +} + +void StackFrame::ret() { + pc() = REG(regs[30], lr); +} + + +bool StackFrame::popStub(instruction_t* entry, const char* name) { + instruction_t* ip = (instruction_t*)pc(); + if (ip == entry || *ip == 0xd65f03c0 + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strncmp(name, "compare_long_string_", 20) == 0 + || strcmp(name, "zero_blocks") == 0 + || strcmp(name, "forward_copy_longs") == 0 + || strcmp(name, "backward_copy_longs") == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + ret(); + return true; + } else if (entry != NULL && entry[0] == 0xa9bf7bfd) { + // The stub begins with + // stp x29, x30, [sp, #-16]! + // mov x29, sp + if (ip == entry + 1) { + sp() += 16; + ret(); + return true; + } else if (entry[1] == 0x910003fd && withinCurrentStack(fp())) { + sp() = fp() + 16; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; + } + } + return false; +} + +bool StackFrame::popMethod(instruction_t* entry) { + instruction_t* ip = (instruction_t*)pc(); + if ((*ip & 0xffe07fff) == 0xa9007bfd) { + // stp x29, x30, [sp, #offset] + // SP has been adjusted, but FP not yet stored in a new frame + unsigned int offset = (*ip >> 12) & 0x1f8; + sp() += offset + 16; + } + ret(); + return true; +} + +bool StackFrame::checkInterruptedSyscall() { +#ifdef __APPLE__ + // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap + if (*(instruction_t*)pc() == 0xd65f03c0) { + return true; + } + // If carry flag is set, the error code is in low byte of x0 + if (REG(pstate, cpsr) & (1 << 29)) { + return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; + } else { + return retval() == (uintptr_t)-EINTR; + } +#else + return retval() == (uintptr_t)-EINTR; +#endif +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // svc #0 or svc #80 + return (*pc & 0xffffefff) == 0xd4000001; +} + +#endif // __aarch64__ diff --git a/src/async-profiler/stackFrame_arm.cpp b/src/async-profiler/stackFrame_arm.cpp new file mode 100644 index 000000000..1012b9c96 --- /dev/null +++ b/src/async-profiler/stackFrame_arm.cpp @@ -0,0 +1,104 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__arm__) || defined(__thumb__) + +#include +#include +#include "stackFrame.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_pc; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_sp; +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_fp; +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r1; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r2; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r3; +} + +void StackFrame::ret() { + _ucontext->uc_mcontext.arm_pc = _ucontext->uc_mcontext.arm_lr; +} + +bool StackFrame::popStub(instruction_t* entry, const char* name) { + instruction_t* ip = (instruction_t*)pc(); + if (ip == entry || *ip == 0xe12fff1e + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + ret(); + return true; + } + return false; +} + +bool StackFrame::popMethod(instruction_t* entry) { + instruction_t* ip = (instruction_t*)pc(); + if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) { + // push {r11, lr} + // mov r11, sp (optional) + // -> sub sp, sp, #offs + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } else if (*ip == 0xe8bd4800) { + // add sp, sp, #offs + // -> pop {r11, lr} + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } + ret(); + return true; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // swi #0 + return *pc == 0xef000000; +} + +#endif // defined(__arm__) || defined(__thumb__) diff --git a/src/async-profiler/stackFrame_i386.cpp b/src/async-profiler/stackFrame_i386.cpp new file mode 100644 index 000000000..8c8a3dc48 --- /dev/null +++ b/src/async-profiler/stackFrame_i386.cpp @@ -0,0 +1,116 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __i386__ + +#include +#include +#include "stackFrame.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EIP]; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_ESP]; +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EBP]; +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EAX]; +} + +uintptr_t StackFrame::arg0() { + return stackAt(1); +} + +uintptr_t StackFrame::arg1() { + return stackAt(2); +} + +uintptr_t StackFrame::arg2() { + return stackAt(3); +} + +uintptr_t StackFrame::arg3() { + return stackAt(4); +} + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 4; +} + +bool StackFrame::popStub(instruction_t* entry, const char* name) { + instruction_t* ip = (instruction_t*)pc(); + if (ip == entry || *ip == 0xc3 + || strncmp(name, "itable", 6) == 0 + || strncmp(name, "vtable", 6) == 0 + || strcmp(name, "InlineCacheBuffer") == 0) + { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && entry[2] == 0xec) { + // The stub begins with + // push ebp + // mov ebp, esp + if (ip == entry + 1) { + pc() = stackAt(1); + sp() += 8; + return true; + } else if (withinCurrentStack(fp())) { + sp() = fp() + 8; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; + } + } + return false; +} + +bool StackFrame::popMethod(instruction_t* entry) { + instruction_t* ip = (instruction_t*)pc(); + if (ip <= entry || *ip == 0xc3 || *ip == 0x55 // ret or push ebp + || (((uintptr_t)ip & 0xfff) && ip[-1] == 0x5d)) // after pop ebp + { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (*ip == 0x5d) { + // pop ebp + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // int 0x80 + return pc[0] == 0xcd && pc[1] == 0x80; +} + +#endif // __i386__ diff --git a/src/async-profiler/stackFrame_ppc64.cpp b/src/async-profiler/stackFrame_ppc64.cpp new file mode 100644 index 000000000..2929767ca --- /dev/null +++ b/src/async-profiler/stackFrame_ppc64.cpp @@ -0,0 +1,133 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Authors: Andrei Pangin and Gunter Haug + */ + +#if defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +#include +#include +#include "stackFrame.h" + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->nip; +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[1]; +} + +uintptr_t& StackFrame::fp() { + return *((uintptr_t*)_ucontext->uc_mcontext.regs->gpr[1]); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4]; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5]; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6]; +} + +void StackFrame::ret() { + _ucontext->uc_mcontext.regs->nip = _ucontext->uc_mcontext.regs->link; +} + +static inline bool inC1EpilogueCrit(uintptr_t pc) { + if (!(pc & 0xfff)) { + // Make sure we are not at the page boundary, so that reading [pc - 1] is safe + return false; + } + // C1 epilogue and critical section (posX) + // 3821**** add r1,r1,xx + // pos3 xxxxxxxx + // pos2 1000e1eb ld r31,16(r1) + // pos1 a603e87f mtlr r31 + // xxxxxxxx + // 2000804e blr + instruction_t* inst = (instruction_t*)pc; + if (inst[ 1] == 0xebe10010 && inst[2] == 0x7fe803a6 || + inst[ 0] == 0xebe10010 && inst[1] == 0x7fe803a6 || + inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) { + return true; + } + + return false; // not in critical section +} + +static inline bool inC2PrologueCrit(uintptr_t pc) { + // C2 prologue and critical section + // f821**** stdu r1, (xx)r1 + // pos1 fa950010 std r20,16(r21) + instruction_t* inst = (instruction_t*)pc; + if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) { + return true; + } + + return false; // not in critical section +} + + +bool StackFrame::popStub(instruction_t* entry, const char* name) { + pc() = _ucontext->uc_mcontext.regs->link; + return true; +} + +bool StackFrame::popMethod(instruction_t* entry) { + // On PPC there is a valid back link to the previous frame at all times. The callee stores + // the return address in the caller's frame before it constructs its own frame. After it + // has destroyed its frame it restores the link register and returns. A problematic sequence + // is the prologue/epilogue of a compiled method before/after frame construction/destruction. + // Therefore popping the frame would not help here, as it is not yet/anymore present, rather + // more adjusting the pc to the callers pc does the trick. There are two exceptions to this, + // One in the prologue of C2 compiled methods and one in the epilogue of C1 compiled methods. + if (inC1EpilogueCrit(pc())) { + // lr not yet set: use the value stored in the frame + pc() = stackAt(2); + } else if (inC2PrologueCrit(pc())) { + // frame constructed but lr not yet stored in it: just do it here + *(((unsigned long *) _ucontext->uc_mcontext.regs->gpr[21]) + 2) = (unsigned long) _ucontext->uc_mcontext.regs->gpr[20]; + } else { + // most probably caller's framer is still on top but pc is already in callee: use caller's pc + pc() = _ucontext->uc_mcontext.regs->link; + } + + return true; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t* pc) { + // sc/svc + return (*pc & 0x1f) == 17; +} + +#endif // defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) diff --git a/src/async-profiler/stackFrame_x64.cpp b/src/async-profiler/stackFrame_x64.cpp new file mode 100644 index 000000000..4816a6f68 --- /dev/null +++ b/src/async-profiler/stackFrame_x64.cpp @@ -0,0 +1,72 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifdef __x86_64__ + +#include +#include +#include +#include "stackFrame.h" + + +#ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +#else +# define REG(l, m) _ucontext->uc_mcontext.gregs[REG_##l] +#endif + + +uintptr_t& StackFrame::pc() { + return (uintptr_t&)REG(RIP, rip); +} + +uintptr_t& StackFrame::sp() { + return (uintptr_t&)REG(RSP, rsp); +} + +uintptr_t& StackFrame::fp() { + return (uintptr_t&)REG(RBP, rbp); +} + +uintptr_t& StackFrame::retval() { + return (uintptr_t&)REG(RAX, rax); +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)REG(RDI, rdi); +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)REG(RSI, rsi); +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)REG(RDX, rdx); +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)REG(RCX, rcx); +} + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 8; +} + +#endif // __x86_64__ diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp new file mode 100644 index 000000000..8db6f4c3c --- /dev/null +++ b/src/async-profiler/stackWalker.cpp @@ -0,0 +1,118 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ +#include "codeCache.h" +#include "stackWalker.h" +#include "dwarf.h" +#include "safeAccess.h" +#include "stackFrame.h" + +const intptr_t MIN_VALID_PC = 0x1000; +const intptr_t MAX_WALK_SIZE = 0x100000; +const intptr_t MAX_FRAME_SIZE = 0x40000; + +static CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address) { + const int native_lib_count = cache->count(); + for (int i = 0; i < native_lib_count; i++) { + if (cache->operator[](i)->contains(address)) { + return cache->operator[](i); + } + } + return NULL; +} + +bool stepStackContext(StackContext &sc, FrameDesc *f); +bool stepStackContext(StackContext &sc, CodeCacheArray *cache) { + FrameDesc* f; + CodeCache* cc = findLibraryByAddress(cache, sc.pc); + if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { + f = &FrameDesc::default_frame; + } + return stepStackContext(sc, f); +} + +bool stepStackContext(StackContext &sc, FrameDesc *f) { + uintptr_t bottom = sc.sp + MAX_WALK_SIZE; + uintptr_t prev_sp = sc.sp; + + u8 cfa_reg = (u8)f->cfa; + int cfa_off = f->cfa >> 8; + if (cfa_reg == DW_REG_SP) { + sc.sp = sc.sp + cfa_off; + } else if (cfa_reg == DW_REG_FP) { + sc.sp = sc.fp + cfa_off; + } else if (cfa_reg == DW_REG_PLT) { + sc.sp += ((uintptr_t)sc.pc & 15) >= 11 ? cfa_off * 2 : cfa_off; + } else { + return false; + } + + // Check if the next frame is below on the current stack + if (sc.sp < prev_sp || sc.sp >= prev_sp + MAX_FRAME_SIZE || sc.sp >= bottom) { + return false; + } + + // Stack pointer must be word aligned + if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { + return false; + } + + if (f->fp_off & DW_PC_OFFSET) { + sc.pc = (const char*)sc.pc + (f->fp_off >> 1); + } else { + if (f->fp_off != DW_SAME_FP && f->fp_off < MAX_FRAME_SIZE && f->fp_off > -MAX_FRAME_SIZE) { + sc.fp = (uintptr_t)SafeAccess::load((void**)(sc.sp + f->fp_off)); + } + sc.pc = stripPointer(SafeAccess::load((void**)sc.sp - 1)); + } + + if (sc.pc < (const void*)MIN_VALID_PC || sc.pc > (const void*)-MIN_VALID_PC) { + return false; + } + return true; +} + +void populateStackContext(StackContext &sc, void *ucontext) { + if (ucontext == NULL) { + sc.pc = __builtin_return_address(0); + sc.fp = (uintptr_t)__builtin_frame_address(1); // XXX(nick): this isn't safe.... + sc.sp = (uintptr_t)__builtin_frame_address(0); + } else { + StackFrame frame(ucontext); + sc.pc = (const void*)frame.pc(); + sc.fp = frame.fp(); + sc.sp = frame.sp(); + } +} + +int stackWalk(CodeCacheArray *cache, StackContext &sc, const void** callchain, int max_depth, int skip) { + int depth = -skip; + + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + int d = depth++; + if (d >= 0) { + callchain[d] = sc.pc; + } + if (!stepStackContext(sc, cache)) { + break; + } + } + + return depth; +} diff --git a/src/async-profiler/symbols_darwin.cpp b/src/async-profiler/symbols_darwin.cpp new file mode 100644 index 000000000..32b2d0b76 --- /dev/null +++ b/src/async-profiler/symbols_darwin.cpp @@ -0,0 +1,158 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifdef __APPLE__ + +#include +#include +#include +#include +#include +#include +#include "symbols.h" + + +class MachOParser { + private: + CodeCache* _cc; + const mach_header* _image_base; + + static const char* add(const void* base, uint64_t offset) { + return (const char*)base + offset; + } + + void findGlobalOffsetTable(const segment_command_64* sc) { + const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64)); + for (uint32_t i = 0; i < sc->nsects; i++) { + if (strcmp(section->sectname, "__la_symbol_ptr") == 0) { + const char* got_start = add(_image_base, section->addr); + _cc->setGlobalOffsetTable((void**)got_start, (void**)(got_start + section->size), true); + break; + } + section++; + } + } + + void loadSymbols(const symtab_command* symtab, const char* text_base, const char* link_base) { + const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff); + const char* str_table = add(link_base, symtab->stroff); + + for (uint32_t i = 0; i < symtab->nsyms; i++) { + if ((sym->n_type & 0xee) == 0x0e && sym->n_value != 0) { + const char* addr = text_base + sym->n_value; + const char* name = str_table + sym->n_un.n_strx; + if (name[0] == '_') name++; + _cc->add(addr, 0, name); + } + sym++; + } + } + + public: + MachOParser(CodeCache* cc, const mach_header* image_base) : _cc(cc), _image_base(image_base) { + } + + bool parse() { + if (_image_base->magic != MH_MAGIC_64) { + return false; + } + + const mach_header_64* header = (const mach_header_64*)_image_base; + const load_command* lc = (const load_command*)(header + 1); + + const char* UNDEFINED = (const char*)-1; + const char* text_base = UNDEFINED; + const char* link_base = UNDEFINED; + + for (uint32_t i = 0; i < header->ncmds; i++) { + if (lc->cmd == LC_SEGMENT_64) { + const segment_command_64* sc = (const segment_command_64*)lc; + if ((sc->initprot & 4) != 0) { + if (text_base == UNDEFINED || strcmp(sc->segname, "__TEXT") == 0) { + text_base = (const char*)_image_base - sc->vmaddr; + _cc->setTextBase(text_base); + _cc->updateBounds(_image_base, add(_image_base, sc->vmsize)); + } + } else if ((sc->initprot & 7) == 1) { + if (link_base == UNDEFINED || strcmp(sc->segname, "__LINKEDIT") == 0) { + link_base = text_base + sc->vmaddr - sc->fileoff; + } + } else if ((sc->initprot & 2) != 0) { + if (strcmp(sc->segname, "__DATA") == 0) { + findGlobalOffsetTable(sc); + } + } + } else if (lc->cmd == LC_SYMTAB) { + if (text_base == UNDEFINED || link_base == UNDEFINED) { + return false; + } + loadSymbols((const symtab_command*)lc, text_base, link_base); + break; + } + lc = (const load_command*)add(lc, lc->cmdsize); + } + + return true; + } +}; + + +Mutex Symbols::_parse_lock; +bool Symbols::_have_kernel_symbols = false; + +void Symbols::parseKernelSymbols(CodeCache* cc) { +} + +void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { + static std::set _parsed_libraries; + MutexLocker ml(_parse_lock); + uint32_t images = _dyld_image_count(); + + for (uint32_t i = 0; i < images; i++) { + const mach_header* image_base = _dyld_get_image_header(i); + if (image_base == NULL || !_parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + const char* path = _dyld_get_image_name(i); + + // Protect the library from unloading while parsing symbols + void* handle = dlopen(path, RTLD_LAZY | RTLD_NOLOAD); + if (handle == NULL) { + continue; + } + + CodeCache* cc = new CodeCache(path, count); + MachOParser parser(cc, image_base); + if (!parser.parse()) { + //Log::warn("Could not parse symbols from %s", path); + } + dlclose(handle); + + cc->sort(); + array->add(cc); + } +} + +#endif // __APPLE__ diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp new file mode 100644 index 000000000..4c62a63c6 --- /dev/null +++ b/src/async-profiler/symbols_linux.cpp @@ -0,0 +1,554 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "symbols.h" +#include "dwarf.h" + + +class SymbolDesc { + private: + const char* _addr; + const char* _type; + + public: + SymbolDesc(const char* s) { + _addr = s; + _type = strchr(_addr, ' ') + 1; + } + + const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } + char type() { return _type[0]; } + const char* name() { return _type + 2; } +}; + +class MemoryMapDesc { + private: + const char* _addr; + const char* _end; + const char* _perm; + const char* _offs; + const char* _dev; + const char* _inode; + const char* _file; + + public: + MemoryMapDesc(const char* s) { + _addr = s; + _end = strchr(_addr, '-') + 1; + _perm = strchr(_end, ' ') + 1; + _offs = strchr(_perm, ' ') + 1; + _dev = strchr(_offs, ' ') + 1; + _inode = strchr(_dev, ' ') + 1; + _file = strchr(_inode, ' '); + + if (_file != NULL) { + while (*_file == ' ') _file++; + } + } + + const char* file() { return _file; } + bool isReadable() { return _perm[0] == 'r'; } + bool isExecutable() { return _perm[2] == 'x'; } + const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } + const char* end() { return (const char*)strtoul(_end, NULL, 16); } + unsigned long offs() { return strtoul(_offs, NULL, 16); } + unsigned long dev() { return strtoul(_dev, NULL, 16) << 8 | strtoul(_dev + 3, NULL, 16); } + unsigned long inode() { return strtoul(_inode, NULL, 10); } +}; + + +#ifdef __LP64__ +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64; +typedef Elf64_Ehdr ElfHeader; +typedef Elf64_Shdr ElfSection; +typedef Elf64_Phdr ElfProgramHeader; +typedef Elf64_Nhdr ElfNote; +typedef Elf64_Sym ElfSymbol; +typedef Elf64_Rel ElfRelocation; +typedef Elf64_Dyn ElfDyn; +#define ELF_R_TYPE ELF64_R_TYPE +#define ELF_R_SYM ELF64_R_SYM +#else +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32; +typedef Elf32_Ehdr ElfHeader; +typedef Elf32_Shdr ElfSection; +typedef Elf32_Phdr ElfProgramHeader; +typedef Elf32_Nhdr ElfNote; +typedef Elf32_Sym ElfSymbol; +typedef Elf32_Rel ElfRelocation; +typedef Elf32_Dyn ElfDyn; +#define ELF_R_TYPE ELF32_R_TYPE +#define ELF_R_SYM ELF32_R_SYM +#endif // __LP64__ + +#if defined(__x86_64__) +# define R_GLOB_DAT R_X86_64_GLOB_DAT +#elif defined(__i386__) +# define R_GLOB_DAT R_386_GLOB_DAT +#elif defined(__arm__) || defined(__thumb__) +# define R_GLOB_DAT R_ARM_GLOB_DAT +#elif defined(__aarch64__) +# define R_GLOB_DAT R_AARCH64_GLOB_DAT +#elif defined(__PPC64__) +# define R_GLOB_DAT R_PPC64_GLOB_DAT +#else +# error "Compiling on unsupported arch" +#endif + +// GNU dynamic linker relocates pointers in the dynamic section, while musl doesn't. +// A tricky case is when we attach to a musl container from a glibc host. +#ifdef __musl__ +# define DYN_PTR(ptr) (_base + (ptr)) +#else +# define DYN_PTR(ptr) ((char*)(ptr) >= _base ? (char*)(ptr) : _base + (ptr)) +#endif // __musl__ + + +class ElfParser { + private: + CodeCache* _cc; + const char* _base; + const char* _file_name; + ElfHeader* _header; + const char* _sections; + + ElfParser(CodeCache* cc, const char* base, const void* addr, const char* file_name = NULL) { + _cc = cc; + _base = base; + _file_name = file_name; + _header = (ElfHeader*)addr; + _sections = (const char*)addr + _header->e_shoff; + } + + bool validHeader() { + unsigned char* ident = _header->e_ident; + return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && ident[3] == 'F' + && ident[4] == ELFCLASS_SUPPORTED && ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT + && _header->e_shstrndx != SHN_UNDEF; + } + + ElfSection* section(int index) { + return (ElfSection*)(_sections + index * _header->e_shentsize); + } + + const char* at(ElfSection* section) { + return (const char*)_header + section->sh_offset; + } + + const char* at(ElfProgramHeader* pheader) { + return _header->e_type == ET_EXEC ? (const char*)pheader->p_vaddr : (const char*)_header + pheader->p_vaddr; + } + + ElfSection* findSection(uint32_t type, const char* name); + ElfProgramHeader* findProgramHeader(uint32_t type); + + void parseDynamicSection(); + void parseDwarfInfo(); + void loadSymbols(bool use_debug); + bool loadSymbolsUsingBuildId(); + bool loadSymbolsUsingDebugLink(); + void loadSymbolTable(ElfSection* symtab); + void addRelocationSymbols(ElfSection* reltab, const char* plt); + + public: + static void parseProgramHeaders(CodeCache* cc, const char* base); + static bool parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug); + static void parseMem(CodeCache* cc, const char* base); +}; + + +ElfSection* ElfParser::findSection(uint32_t type, const char* name) { + const char* strtab = at(section(_header->e_shstrndx)); + + for (int i = 0; i < _header->e_shnum; i++) { + ElfSection* section = this->section(i); + if (section->sh_type == type && section->sh_name != 0) { + if (strcmp(strtab + section->sh_name, name) == 0) { + return section; + } + } + } + + return NULL; +} + +ElfProgramHeader* ElfParser::findProgramHeader(uint32_t type) { + const char* pheaders = (const char*)_header + _header->e_phoff; + + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); + if (pheader->p_type == type) { + return pheader; + } + } + + return NULL; +} + +bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug) { + int fd = open(file_name, O_RDONLY); + if (fd == -1) { + return false; + } + + size_t length = (size_t)lseek64(fd, 0, SEEK_END); + void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (addr == MAP_FAILED) { + //Log::warn("Could not parse symbols from %s: %s", file_name, strerror(errno)); + } else { + ElfParser elf(cc, base, addr, file_name); + if (elf.validHeader()) { + elf.loadSymbols(use_debug); + } + munmap(addr, length); + } + return true; +} + +void ElfParser::parseMem(CodeCache* cc, const char* base) { + ElfParser elf(cc, base, base); + if (elf.validHeader()) { + elf.loadSymbols(false); + } +} + +void ElfParser::parseProgramHeaders(CodeCache* cc, const char* base) { + ElfParser elf(cc, base, base); + if (elf.validHeader()) { + cc->setTextBase(base); + elf.parseDynamicSection(); + elf.parseDwarfInfo(); + } +} + +void ElfParser::parseDynamicSection() { + ElfProgramHeader* dynamic = findProgramHeader(PT_DYNAMIC); + if (dynamic != NULL) { + void** got_start = NULL; + size_t pltrelsz = 0; + char* rel = NULL; + size_t relsz = 0; + size_t relent = 0; + size_t relcount = 0; + + const char* dyn_start = at(dynamic); + const char* dyn_end = dyn_start + dynamic->p_memsz; + for (ElfDyn* dyn = (ElfDyn*)dyn_start; dyn < (ElfDyn*)dyn_end; dyn++) { + switch (dyn->d_tag) { + case DT_PLTGOT: + got_start = (void**)DYN_PTR(dyn->d_un.d_ptr) + 3; + break; + case DT_PLTRELSZ: + pltrelsz = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_REL: + rel = (char*)DYN_PTR(dyn->d_un.d_ptr); + break; + case DT_RELASZ: + case DT_RELSZ: + relsz = dyn->d_un.d_val; + break; + case DT_RELAENT: + case DT_RELENT: + relent = dyn->d_un.d_val; + break; + case DT_RELACOUNT: + case DT_RELCOUNT: + relcount = dyn->d_un.d_val; + break; + } + } + + if (relent != 0) { + if (pltrelsz != 0 && got_start != NULL) { + // The number of entries in .got.plt section matches the number of entries in .rela.plt + _cc->setGlobalOffsetTable(got_start, got_start + pltrelsz / relent, false); + } else if (rel != NULL && relsz != 0) { + // RELRO technique: .got.plt has been merged into .got and made read-only. + // Find .got end from the highest relocation address. + void** min_addr = (void**)-1; + void** max_addr = (void**)0; + for (size_t offs = relcount * relent; offs < relsz; offs += relent) { + ElfRelocation* r = (ElfRelocation*)(rel + offs); + if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT) { + void** addr = (void**)(_base + r->r_offset); + if (addr < min_addr) min_addr = addr; + if (addr > max_addr) max_addr = addr; + } + } + + if (got_start == NULL) { + got_start = (void**)min_addr; + } + + if (max_addr >= got_start) { + _cc->setGlobalOffsetTable(got_start, max_addr + 1, false); + } + } + } + } +} + +void ElfParser::parseDwarfInfo() { + if (!DWARF_SUPPORTED) return; + + ElfProgramHeader* eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); + if (eh_frame_hdr != NULL) { + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + } +} + +void ElfParser::loadSymbols(bool use_debug) { + // Look for debug symbols in the original .so + ElfSection* section = findSection(SHT_SYMTAB, ".symtab"); + if (section != NULL) { + loadSymbolTable(section); + goto loaded; + } + + // Try to load symbols from an external debuginfo library + if (use_debug) { + if (loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink()) { + goto loaded; + } + } + + // If everything else fails, load only exported symbols + section = findSection(SHT_DYNSYM, ".dynsym"); + if (section != NULL) { + loadSymbolTable(section); + } + +loaded: + if (use_debug) { + // Synthesize names for PLT stubs + ElfSection* plt = findSection(SHT_PROGBITS, ".plt"); + ElfSection* reltab = findSection(SHT_RELA, ".rela.plt"); + if (reltab == NULL) { + reltab = findSection(SHT_REL, ".rel.plt"); + } + if (plt != NULL && reltab != NULL) { + addRelocationSymbols(reltab, _base + plt->sh_offset + PLT_HEADER_SIZE); + } + } +} + +// Load symbols from /usr/lib/debug/.build-id/ab/cdef1234.debug, where abcdef1234 is Build ID +bool ElfParser::loadSymbolsUsingBuildId() { + ElfSection* section = findSection(SHT_NOTE, ".note.gnu.build-id"); + if (section == NULL || section->sh_size <= 16) { + return false; + } + + ElfNote* note = (ElfNote*)at(section); + if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { + return false; + } + + const char* build_id = (const char*)note + sizeof(*note) + 4; + int build_id_len = note->n_descsz; + + char path[PATH_MAX]; + char* p = path + sprintf(path, "/usr/lib/debug/.build-id/%02hhx/", build_id[0]); + for (int i = 1; i < build_id_len; i++) { + p += sprintf(p, "%02hhx", build_id[i]); + } + strcpy(p, ".debug"); + + return parseFile(_cc, _base, path, false); +} + +// Look for debuginfo file specified in .gnu_debuglink section +bool ElfParser::loadSymbolsUsingDebugLink() { + ElfSection* section = findSection(SHT_PROGBITS, ".gnu_debuglink"); + if (section == NULL || section->sh_size <= 4) { + return false; + } + + const char* basename = strrchr(_file_name, '/'); + if (basename == NULL) { + return false; + } + + char* dirname = strndup(_file_name, basename - _file_name); + if (dirname == NULL) { + return false; + } + + const char* debuglink = at(section); + char path[PATH_MAX]; + bool result = false; + + // 1. /path/to/libjvm.so.debug + if (strcmp(debuglink, basename + 1) != 0 && + snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 2. /path/to/.debug/libjvm.so.debug + if (!result && snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 3. /usr/lib/debug/path/to/libjvm.so.debug + if (!result && snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + free(dirname); + return result; +} + +void ElfParser::loadSymbolTable(ElfSection* symtab) { + ElfSection* strtab = section(symtab->sh_link); + const char* strings = at(strtab); + + const char* symbols = at(symtab); + const char* symbols_end = symbols + symtab->sh_size; + for (; symbols < symbols_end; symbols += symtab->sh_entsize) { + ElfSymbol* sym = (ElfSymbol*)symbols; + if (sym->st_name != 0 && sym->st_value != 0) { + // Skip special AArch64 mapping symbols: $x and $d + if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { + _cc->add(_base + sym->st_value, (int)sym->st_size, strings + sym->st_name); + } + } + } +} + +void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) { + ElfSection* symtab = section(reltab->sh_link); + const char* symbols = at(symtab); + + ElfSection* strtab = section(symtab->sh_link); + const char* strings = at(strtab); + + const char* relocations = at(reltab); + const char* relocations_end = relocations + reltab->sh_size; + for (; relocations < relocations_end; relocations += reltab->sh_entsize) { + ElfRelocation* r = (ElfRelocation*)relocations; + ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); + + char name[256]; + if (sym->st_name == 0) { + strcpy(name, "@plt"); + } else { + const char* sym_name = strings + sym->st_name; + snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); + name[sizeof(name) - 1] = 0; + } + + _cc->add(plt, PLT_ENTRY_SIZE, name); + plt += PLT_ENTRY_SIZE; + } +} + + +Mutex Symbols::_parse_lock; +bool Symbols::_have_kernel_symbols = false; + +void Symbols::parseKernelSymbols(CodeCache* cc) { + // XXX(nick): omitted +} + +void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { + // we can't use static global sets due to undefined initialization order stuff + // (see https://stackoverflow.com/questions/27145617/segfault-when-adding-an-element-to-a-stdmap) + // I'm not sure why this original code even worked? + std::set parsed_libraries; + std::set parsed_inodes; + MutexLocker ml(_parse_lock); + + FILE* f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + return; + } + + const char* last_readable_base = NULL; + const char* image_end = NULL; + char* str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + + const char* image_base = map.addr(); + if (image_base != image_end) last_readable_base = image_base; + image_end = map.end(); + + if (map.isExecutable()) { + if (!parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + CodeCache* cc = new CodeCache(map.file(), count, image_base, image_end); + + unsigned long inode = map.inode(); + if (inode != 0) { + // Do not parse the same executable twice, e.g. on Alpine Linux + if (parsed_inodes.insert(map.dev() | inode << 16).second) { + // Be careful: executable file is not always ELF, e.g. classes.jsa + if ((image_base -= map.offs()) >= last_readable_base) { + ElfParser::parseProgramHeaders(cc, image_base); + } + ElfParser::parseFile(cc, image_base, map.file(), true); + } + } else if (strcmp(map.file(), "[vdso]") == 0) { + ElfParser::parseMem(cc, image_base); + } + + cc->sort(); + array->add(cc); + } + } + + free(str); + fclose(f); +} + +#endif // __linux__ diff --git a/src/perf.cc b/src/perf.cc index aa7d9e263..3f18e54d0 100644 --- a/src/perf.cc +++ b/src/perf.cc @@ -8,7 +8,6 @@ #include "defer.hpp" #include "logger.hpp" #include "perf.hpp" -#include "user_override.hpp" #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c01772698..4da6f62fa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -301,6 +301,12 @@ add_unit_test(symbol_map-ut symbol_map-ut.cc ../src/symbol_map.cc) add_benchmark(savecontext-bench savecontext-bench.cc ../src/savecontext.cc ../src/saveregisters.cc) add_benchmark(timer-bench timer-bench.cc ../src/timer.cc ../src/perf.cc) +add_unit_test(dwarf_unwind-ut dwarf_unwind-ut.cc ../src/savecontext.cc ../src/saveregisters.cc ${ASYNC_PROFILER_SRC} + LIBRARIES ${ELFUTILS_LIBRARIES}) +target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) + + + if(NOT CMAKE_BUILD_TYPE STREQUAL "SanitizedDebug") add_exe( simple_malloc-static simple_malloc.cc From 8e567d900cc5800a9ec4269f93a2066def7de4ba Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 22 Sep 2022 11:38:58 +0200 Subject: [PATCH 02/29] Add a small test for dwarf unwinding --- test/dwarf_unwind-ut.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 test/dwarf_unwind-ut.cc diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc new file mode 100644 index 000000000..fa299e0e4 --- /dev/null +++ b/test/dwarf_unwind-ut.cc @@ -0,0 +1,22 @@ +#include + + +#include "unwind_state.hpp" +#include "savecontext.hpp" + +// #include "ddprof_defs.hpp" + +// temp copy pasta +#define PERF_SAMPLE_STACK_SIZE (4096UL * 8) + +std::byte stack[PERF_SAMPLE_STACK_SIZE]; + + +TEST(dwarf_unwind, simple) { + uint64_t regs[K_NB_REGS_UNWIND]; + size_t stack_size = save_context(retrieve_stack_end_address(), regs, stack); + // DO REGNAME(RBP) --> Gives the index inside the table + // DO REGNAME(SP) + // DO REGNAME(PC) + +} From 050df2dbfd8a33f98cf51641a37bf75d677b3bf1 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 26 Sep 2022 10:19:59 +0200 Subject: [PATCH 03/29] Compare between remote vs local unwinding --- include/async-profiler/stackWalker.h | 19 ++---- include/async-profiler/stack_context.h | 29 +++++++++ include/ddprof_base.hpp | 2 +- src/async-profiler/cgotraceback.cpp | 25 ++++++-- src/async-profiler/stackWalker.cpp | 66 +++++++++++++++++--- src/exe/main.cc | 2 +- src/lib/malloc_wrapper.cc | 1 + src/perf.cc | 1 + src/unwind_helpers.cc | 2 +- test/CMakeLists.txt | 2 + test/dwarf_unwind-ut.cc | 86 +++++++++++++++++++++++++- 11 files changed, 199 insertions(+), 36 deletions(-) create mode 100644 include/async-profiler/stack_context.h diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h index a16e1f48f..4ae364322 100644 --- a/include/async-profiler/stackWalker.h +++ b/include/async-profiler/stackWalker.h @@ -23,22 +23,11 @@ #include #include "codeCache.h" +#include "stack_context.h" -struct StackContext { - const void* pc; - uintptr_t sp; - uintptr_t fp; +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address); - void set(const void* pc, uintptr_t sp, uintptr_t fp) { - this->pc = pc; - this->sp = sp; - this->fp = fp; - } -}; - -class StackWalker { - public: - static int walkDwarf(CodeCacheArray *cache, void* ucontext, const void** callchain, int max_depth, int skip); -}; +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const ap::StackBuffer &buffer, + const void** callchain, int max_depth, int skip); #endif // _STACKWALKER_H diff --git a/include/async-profiler/stack_context.h b/include/async-profiler/stack_context.h new file mode 100644 index 000000000..5ef20f67a --- /dev/null +++ b/include/async-profiler/stack_context.h @@ -0,0 +1,29 @@ + + +#pragma once + +#include +#include + +namespace ap { +struct StackContext { + const void* pc; + uintptr_t sp; + uintptr_t fp; + + void set(const void* pc, uintptr_t sp, uintptr_t fp) { + this->pc = pc; + this->sp = sp; + this->fp = fp; + } +}; + +struct StackBuffer { + StackBuffer(std::span bytes, uint64_t start, uint64_t end): + _bytes(bytes), sp_start(start), sp_end(end) {} + std::span _bytes; + uint64_t sp_start; // initial SP (in context of the process) + uint64_t sp_end; // sp + size (so root functions = start of stack) +}; + +} diff --git a/include/ddprof_base.hpp b/include/ddprof_base.hpp index 93fde8fca..f1aab9c68 100644 --- a/include/ddprof_base.hpp +++ b/include/ddprof_base.hpp @@ -30,4 +30,4 @@ template inline DDPROF_ALWAYS_INLINE void DoNotOptimize(Tp &value) { asm volatile("" : "+m,r"(value) : : "memory"); #endif } -} // namespace ddprof \ No newline at end of file +} // namespace ddprof diff --git a/src/async-profiler/cgotraceback.cpp b/src/async-profiler/cgotraceback.cpp index 511cc86a1..06669e7bc 100644 --- a/src/async-profiler/cgotraceback.cpp +++ b/src/async-profiler/cgotraceback.cpp @@ -46,11 +46,12 @@ static __attribute__((constructor)) void init(void) { asmcgocall_base = (uintptr_t) c->getTextBase(); } } -} +} + +void populateStackContext(ap::StackContext &sc, void *ucontext); -void populateStackContext(StackContext &sc, void *ucontext); -int stackWalk(CodeCacheArray *cache, StackContext &sc, const void** callchain, int max_depth, int skip); -bool stepStackContext(StackContext &sc, CodeCacheArray *cache); +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const void** callchain, int max_depth, int skip); +bool stepStackContext(ap::StackContext &sc, CodeCacheArray *cache); extern "C" { @@ -129,6 +130,10 @@ struct cgo_context_arg { uintptr_t p; }; +#ifndef C_G_THING +void async_cgo_context(void *p) { +} +#else void async_cgo_context(void *p) { if (enabled == 0) { return; @@ -144,7 +149,7 @@ void async_cgo_context(void *p) { if (ctx == NULL) { return; } - StackContext sc; + ap::StackContext sc; populateStackContext(sc, nullptr); CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); // There are two frames in the call stack we should skip. The first is this @@ -160,6 +165,8 @@ void async_cgo_context(void *p) { arg->p = (uintptr_t) ctx; return; } +#endif + struct cgo_traceback_arg { uintptr_t context; @@ -168,6 +175,10 @@ struct cgo_traceback_arg { uintptr_t max; }; +#ifndef C_GO_THINGS +void async_cgo_traceback(void *p) { +} +#else void async_cgo_traceback(void *p) { if (enabled == 0) { return; @@ -175,7 +186,7 @@ void async_cgo_traceback(void *p) { struct cgo_traceback_arg *arg = (struct cgo_traceback_arg *)p; struct cgo_context *ctx = NULL; - StackContext sc; + ap::StackContext sc; // If we had a previous context, then we're being called to unwind some // previous C portion of a mixed C/Go call stack. We use the call stack @@ -206,5 +217,7 @@ void async_cgo_traceback(void *p) { return; } +#endif + } // extern "C" \ No newline at end of file diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 8db6f4c3c..ae44fb456 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -21,12 +21,41 @@ #include "dwarf.h" #include "safeAccess.h" #include "stackFrame.h" +#include "logger.hpp" +#include const intptr_t MIN_VALID_PC = 0x1000; const intptr_t MAX_WALK_SIZE = 0x100000; const intptr_t MAX_FRAME_SIZE = 0x40000; -static CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address) { +bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { + if (addr < 4095) { + return false; + } + if ((addr & 0x7) != 0) { + // not aligned + return false; + } + if (addr > addr + sizeof(uint64_t)) { + return false; + } + + if (addr < buffer.sp_start && addr > buffer.sp_start - 4096) { + // todo red zone thing + return false; + } + else if (addr < buffer.sp_start || addr + sizeof(uint64_t) > buffer.sp_end) { + return false; + } + uint64_t stack_idx = addr - buffer.sp_start; + if (stack_idx > addr) { + return false; + } + *res = *(uint64_t *)(buffer._bytes.data() + stack_idx); + return true; +} + +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address) { const int native_lib_count = cache->count(); for (int i = 0; i < native_lib_count; i++) { if (cache->operator[](i)->contains(address)) { @@ -36,17 +65,18 @@ static CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* addres return NULL; } -bool stepStackContext(StackContext &sc, FrameDesc *f); -bool stepStackContext(StackContext &sc, CodeCacheArray *cache) { +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, FrameDesc *f); + +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, CodeCacheArray *cache) { FrameDesc* f; CodeCache* cc = findLibraryByAddress(cache, sc.pc); if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { f = &FrameDesc::default_frame; } - return stepStackContext(sc, f); + return stepStackContext(sc, buffer, f); } -bool stepStackContext(StackContext &sc, FrameDesc *f) { +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, FrameDesc *f) { uintptr_t bottom = sc.sp + MAX_WALK_SIZE; uintptr_t prev_sp = sc.sp; @@ -76,9 +106,24 @@ bool stepStackContext(StackContext &sc, FrameDesc *f) { sc.pc = (const char*)sc.pc + (f->fp_off >> 1); } else { if (f->fp_off != DW_SAME_FP && f->fp_off < MAX_FRAME_SIZE && f->fp_off > -MAX_FRAME_SIZE) { - sc.fp = (uintptr_t)SafeAccess::load((void**)(sc.sp + f->fp_off)); + void* new_fp = SafeAccess::load((void**)(sc.sp + f->fp_off)); + printf("Update FP to value (old code): %p\n", new_fp); + // Update the frame pointer (based on fp offset) + if (!read_memory(sc.sp + f->fp_off, reinterpret_cast(&sc.fp), buffer)) { + printf("Failure __%u \n", __LINE__); + return false; + } + printf("Update FP to value (read mem): %p\n", sc.fp); + } + void* new_pc = stripPointer(SafeAccess::load((void**)sc.sp - 1)); + // Update the pc using return address + printf("Update new instruction pointer to value (old code): %p\n", new_pc); + + if (!read_memory(reinterpret_cast((void**)sc.sp - 1), reinterpret_cast(&sc.pc), buffer)) { + printf("Failure __%u \n", __LINE__); + return false; } - sc.pc = stripPointer(SafeAccess::load((void**)sc.sp - 1)); + printf("Update new instruction pointer to value (new code): %p\n", sc.pc); } if (sc.pc < (const void*)MIN_VALID_PC || sc.pc > (const void*)-MIN_VALID_PC) { @@ -87,7 +132,7 @@ bool stepStackContext(StackContext &sc, FrameDesc *f) { return true; } -void populateStackContext(StackContext &sc, void *ucontext) { +void populateStackContext(ap::StackContext &sc, void *ucontext) { if (ucontext == NULL) { sc.pc = __builtin_return_address(0); sc.fp = (uintptr_t)__builtin_frame_address(1); // XXX(nick): this isn't safe.... @@ -100,7 +145,8 @@ void populateStackContext(StackContext &sc, void *ucontext) { } } -int stackWalk(CodeCacheArray *cache, StackContext &sc, const void** callchain, int max_depth, int skip) { +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const ap::StackBuffer &buffer, + const void** callchain, int max_depth, int skip) { int depth = -skip; // Walk until the bottom of the stack or until the first Java frame @@ -109,7 +155,7 @@ int stackWalk(CodeCacheArray *cache, StackContext &sc, const void** callchain, i if (d >= 0) { callchain[d] = sc.pc; } - if (!stepStackContext(sc, cache)) { + if (!stepStackContext(sc, buffer, cache)) { break; } } diff --git a/src/exe/main.cc b/src/exe/main.cc index 432a96a40..2591ea3ae 100644 --- a/src/exe/main.cc +++ b/src/exe/main.cc @@ -393,7 +393,7 @@ int main(int argc, char *argv[]) { break; case ENOEXEC: case EACCES: - LG_ERR("%s: permission denied", argv[0]); + LG_ERR("%s: pe rmission denied", argv[0]); break; default: LG_WRN("%s: failed to execute (%s)", argv[0], strerror(errno)); diff --git a/src/lib/malloc_wrapper.cc b/src/lib/malloc_wrapper.cc index 55d66f28f..4bbc407d6 100644 --- a/src/lib/malloc_wrapper.cc +++ b/src/lib/malloc_wrapper.cc @@ -173,6 +173,7 @@ void *memalign(size_t alignment, size_t size) { size); return ptr; } + void *temp_memalign(size_t alignment, size_t size) noexcept { check_init(); return s_memalign(alignment, size); diff --git a/src/perf.cc b/src/perf.cc index 3f18e54d0..aa7d9e263 100644 --- a/src/perf.cc +++ b/src/perf.cc @@ -8,6 +8,7 @@ #include "defer.hpp" #include "logger.hpp" #include "perf.hpp" +#include "user_override.hpp" #include #include diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index 7630977c1..c7bab7bd8 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -108,7 +108,7 @@ bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, // stack grows down, so end of stack is start // us->initial_regs.sp does not have to be aligned - uint64_t sp_start = us->initial_regs.regs[REGNAME(SP)]; + uint64_t sp_start = us->initial_regs.regs[REGNAME(SP)]; uint64_t sp_end = sp_start + us->stack_sz; if (addr < sp_start && addr > sp_start - 4096) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4da6f62fa..88c693404 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -301,6 +301,8 @@ add_unit_test(symbol_map-ut symbol_map-ut.cc ../src/symbol_map.cc) add_benchmark(savecontext-bench savecontext-bench.cc ../src/savecontext.cc ../src/saveregisters.cc) add_benchmark(timer-bench timer-bench.cc ../src/timer.cc ../src/perf.cc) +message(STATUS "Async profiler" ${ASYNC_PROFILER_SRC}) + add_unit_test(dwarf_unwind-ut dwarf_unwind-ut.cc ../src/savecontext.cc ../src/saveregisters.cc ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index fa299e0e4..f2a388ea3 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -3,20 +3,102 @@ #include "unwind_state.hpp" #include "savecontext.hpp" +// #include "symbol.hpp" +#include "stackWalker.h" + +#include + +#include "async-profiler/codeCache.h" +#include "async-profiler/symbols.h" + +#include "async-profiler/stack_context.h" + +// Retrieves instruction pointer +#define _THIS_IP_ \ + ({ \ + __label__ __here; \ + __here: \ + (unsigned long)&&__here; \ + }) + // #include "ddprof_defs.hpp" + // temp copy pasta #define PERF_SAMPLE_STACK_SIZE (4096UL * 8) +#define CAST_TO_VOID_STAR(ptr) reinterpret_cast(ptr) + std::byte stack[PERF_SAMPLE_STACK_SIZE]; +DDPROF_NOINLINE size_t funcA(std::array ®s); +DDPROF_NOINLINE size_t funcB(std::array ®s); + +size_t funcB(std::array ®s) { + // Load libraries + CodeCacheArray cache_arary; + Symbols::parseLibraries(&cache_arary, false); + + printf("Here we are in B %lx \n", _THIS_IP_); + size_t size = save_context(retrieve_stack_end_address(), regs, stack); + + { // IP + uint64_t ip = regs[REGNAME(PC)]; + printf("%lx = ip\n", ip); + + { // small useless test + CodeCache *code_cache = findLibraryByAddress(&cache_arary, reinterpret_cast(ip)); + EXPECT_TRUE(code_cache); + } + } + + // context from saving state + ap::StackContext sc; + #ifdef __x86_64__ + sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); + sc.sp = regs[REGNAME(SP)]; + sc.fp = regs[REGNAME(RBP)]; +#elif __aarch64__ + sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); + sc.sp = regs[REGNAME(SP)]; + sc.fp = regs[REGNAME(FP)]; +#endif + + ap::StackBuffer buffer(stack, sc.sp, sc.sp + PERF_SAMPLE_STACK_SIZE); + + void *stack[128]; + int n = stackWalk(&cache_arary, sc, buffer, const_cast(stack), 128, 0); + for (int i = 0; i < n; ++i) { + printf("IP = %p \n", stack[i]); + } + + return size; +} + +size_t funcA(std::array ®s) { + printf("Here we are in A %lx \n", _THIS_IP_); + return funcB(regs); +} + + +void unwind_async_profiler() { + +} + +void unwind_libdwfl(){ + +} TEST(dwarf_unwind, simple) { - uint64_t regs[K_NB_REGS_UNWIND]; - size_t stack_size = save_context(retrieve_stack_end_address(), regs, stack); + std::array regs; + size_t size_stack = funcA(regs); + + + // DO REGNAME(RBP) --> Gives the index inside the table // DO REGNAME(SP) // DO REGNAME(PC) + // int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const void** callchain, int max_depth, int skip) { } From 8b097fcf14b8be6f9a631ea1e3fb85c201fa150e Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 26 Sep 2022 14:15:01 +0200 Subject: [PATCH 04/29] Minor update on symbols --- test/dwarf_unwind-ut.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index f2a388ea3..521dea80a 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -64,13 +64,19 @@ size_t funcB(std::array ®s) { sc.sp = regs[REGNAME(SP)]; sc.fp = regs[REGNAME(FP)]; #endif - - ap::StackBuffer buffer(stack, sc.sp, sc.sp + PERF_SAMPLE_STACK_SIZE); + // size should be < PERF_SAMPLE_STACK_SIZE + ap::StackBuffer buffer(stack, sc.sp, sc.sp + size); void *stack[128]; int n = stackWalk(&cache_arary, sc, buffer, const_cast(stack), 128, 0); for (int i = 0; i < n; ++i) { - printf("IP = %p \n", stack[i]); + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress(&cache_arary, reinterpret_cast(stack[i])); + if (code_cache) { + const char *sym_name = code_cache->binarySearch(stack[i]); + printf("IP = %p - %s\n", stack[i], sym_name); + } + } } return size; @@ -93,7 +99,7 @@ void unwind_libdwfl(){ TEST(dwarf_unwind, simple) { std::array regs; size_t size_stack = funcA(regs); - + EXPECT_TRUE(size_stack); // DO REGNAME(RBP) --> Gives the index inside the table From 7c7ca0aa1a6dc11133db9c3c1ba9b1afd03f858d Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 16 Nov 2022 13:25:28 +0100 Subject: [PATCH 05/29] wip --- design_notes.txt | 37 ++++++++++++++++++++++++++++ src/async-profiler/stackWalker.cpp | 1 + src/async-profiler/symbols_linux.cpp | 1 + 3 files changed, 39 insertions(+) create mode 100644 design_notes.txt diff --git a/design_notes.txt b/design_notes.txt new file mode 100644 index 000000000..08a88d187 --- /dev/null +++ b/design_notes.txt @@ -0,0 +1,37 @@ +## things by pid +################# +DsoHdr +-- DSO + +Lookups +-- pid / bin maps +-- mappings +-- runtime + +DwflHdr +-- DwflWrapper +-- Visited pids + +## Not by pid +############# +-- File info (dso_hdr) +-- Lookup +Dwarf symbol lookup + + +## Async profiler + +### SymbolLinux +parseLibraries parses everything in proc self +Instead, write an API that can work with ddprof object model + +LoadSymbolTable loads at a given base address +We want to store all symbols at an elf address + +Oh wow, everything is added in a weird code blob +Oh wow, native func is stored in a char and we deduce pointer from the offset to the name + +1) Rewrite parseLibraries +we can use DSO information + file info +Ensure the cache is at elf address (not base) +Start is 0 or for non PIE, \ No newline at end of file diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index ae44fb456..5127ab4f7 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -76,6 +76,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, CodeC return stepStackContext(sc, buffer, f); } + bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, FrameDesc *f) { uintptr_t bottom = sc.sp + MAX_WALK_SIZE; uintptr_t prev_sp = sc.sp; diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 4c62a63c6..a57976ca5 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -444,6 +444,7 @@ void ElfParser::loadSymbolTable(ElfSection* symtab) { if (sym->st_name != 0 && sym->st_value != 0) { // Skip special AArch64 mapping symbols: $x and $d if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { + printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + sym->st_name, _base + sym->st_value, _base); _cc->add(_base + sym->st_value, (int)sym->st_size, strings + sym->st_name); } } From a29590378e0708871405bba853591c003ef55512 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 16 Nov 2022 17:24:58 +0100 Subject: [PATCH 06/29] WIP --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 12f53fd10..46e960fb8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -316,7 +316,7 @@ add_benchmark(timer-bench timer-bench.cc ../src/timer.cc ../src/perf.cc) message(STATUS "Async profiler" ${ASYNC_PROFILER_SRC}) -add_unit_test(dwarf_unwind-ut dwarf_unwind-ut.cc ../src/savecontext.cc ../src/saveregisters.cc ${ASYNC_PROFILER_SRC} +add_unit_test(dwarf_unwind-ut dwarf_unwind-ut.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) From e81e633339e2b01e4eb9bd0d371a11d3ee807455 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 17 Nov 2022 10:01:57 +0100 Subject: [PATCH 07/29] Refactor and add a test on async profiler unwinding --- design_notes.txt | 6 +- include/async-profiler/arch.h | 84 +- include/async-profiler/codeCache.h | 218 +++-- include/async-profiler/dwarf.h | 174 ++-- include/async-profiler/mutex.h | 41 +- include/async-profiler/os.h | 6 +- include/async-profiler/safeAccess.h | 6 +- include/async-profiler/stackFrame.h | 68 +- include/async-profiler/stackWalker.h | 7 +- include/async-profiler/stack_context.h | 53 +- include/async-profiler/symbols.h | 17 +- src/async-profiler/cgotraceback.cpp | 268 +++---- src/async-profiler/codeCache.cpp | 314 ++++---- src/async-profiler/dwarf.cpp | 606 +++++++------- src/async-profiler/mutex.cpp | 30 +- src/async-profiler/safeAccess.cpp | 89 +-- src/async-profiler/stackFrame_aarch64.cpp | 164 ++-- src/async-profiler/stackFrame_arm.cpp | 99 ++- src/async-profiler/stackFrame_i386.cpp | 132 ++-- src/async-profiler/stackFrame_ppc64.cpp | 147 ++-- src/async-profiler/stackFrame_x64.cpp | 56 +- src/async-profiler/stackWalker.cpp | 182 ++--- src/async-profiler/stack_context.cpp | 17 + src/async-profiler/symbols_darwin.cpp | 224 +++--- src/async-profiler/symbols_linux.cpp | 919 +++++++++++----------- src/unwind_helpers.cc | 2 +- test/CMakeLists.txt | 7 +- test/dwarf_unwind-ut.cc | 101 +-- 28 files changed, 2003 insertions(+), 2034 deletions(-) create mode 100644 src/async-profiler/stack_context.cpp diff --git a/design_notes.txt b/design_notes.txt index 08a88d187..b592706c1 100644 --- a/design_notes.txt +++ b/design_notes.txt @@ -34,4 +34,8 @@ Oh wow, native func is stored in a char and we deduce pointer from the offset to 1) Rewrite parseLibraries we can use DSO information + file info Ensure the cache is at elf address (not base) -Start is 0 or for non PIE, \ No newline at end of file +Start is 0 or for non PIE, + + +2) Find library is by absolute address +- Keep that ? \ No newline at end of file diff --git a/include/async-profiler/arch.h b/include/async-profiler/arch.h index 5cedcbfd9..94a2397e7 100644 --- a/include/async-profiler/arch.h +++ b/include/async-profiler/arch.h @@ -17,29 +17,27 @@ #ifndef _ARCH_H #define _ARCH_H - typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; -static inline u64 atomicInc(volatile u64& var, u64 increment = 1) { - return __sync_fetch_and_add(&var, increment); +static inline u64 atomicInc(volatile u64 &var, u64 increment = 1) { + return __sync_fetch_and_add(&var, increment); } -static inline int atomicInc(volatile int& var, int increment = 1) { - return __sync_fetch_and_add(&var, increment); +static inline int atomicInc(volatile int &var, int increment = 1) { + return __sync_fetch_and_add(&var, increment); } -static inline u64 loadAcquire(u64& var) { - return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +static inline u64 loadAcquire(u64 &var) { + return __atomic_load_n(&var, __ATOMIC_ACQUIRE); } -static inline void storeRelease(u64& var, u64 value) { - return __atomic_store_n(&var, value, __ATOMIC_RELEASE); +static inline void storeRelease(u64 &var, u64 value) { + return __atomic_store_n(&var, value, __ATOMIC_RELEASE); } - #if defined(__x86_64__) || defined(__i386__) typedef unsigned char instruction_t; @@ -51,11 +49,12 @@ const int FRAME_PC_SLOT = 1; const int ADJUST_RET = 1; const int PLT_HEADER_SIZE = 16; const int PLT_ENTRY_SIZE = 16; -const int PERF_REG_PC = 8; // PERF_REG_X86_IP +const int PERF_REG_PC = 8; // PERF_REG_X86_IP -#define spinPause() asm volatile("pause") -#define rmb() asm volatile("lfence" : : : "memory") -#define flushCache(addr) asm volatile("mfence; clflush (%0); mfence" : : "r" (addr) : "memory") +# define spinPause() asm volatile("pause") +# define rmb() asm volatile("lfence" : : : "memory") +# define flushCache(addr) \ + asm volatile("mfence; clflush (%0); mfence" : : "r"(addr) : "memory") #elif defined(__arm__) || defined(__thumb__) @@ -69,11 +68,13 @@ const int FRAME_PC_SLOT = 1; const int ADJUST_RET = 0; const int PLT_HEADER_SIZE = 20; const int PLT_ENTRY_SIZE = 12; -const int PERF_REG_PC = 15; // PERF_REG_ARM_PC +const int PERF_REG_PC = 15; // PERF_REG_ARM_PC -#define spinPause() asm volatile("yield") -#define rmb() asm volatile("dmb ish" : : : "memory") -#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) +# define spinPause() asm volatile("yield") +# define rmb() asm volatile("dmb ish" : : : "memory") +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) #elif defined(__aarch64__) @@ -86,18 +87,21 @@ const int FRAME_PC_SLOT = 1; const int ADJUST_RET = 0; const int PLT_HEADER_SIZE = 32; const int PLT_ENTRY_SIZE = 16; -const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC +const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC -#define spinPause() asm volatile("isb") -#define rmb() asm volatile("dmb ish" : : : "memory") -#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) +# define spinPause() asm volatile("isb") +# define rmb() asm volatile("dmb ish" : : : "memory") +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) #elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) typedef unsigned int instruction_t; const instruction_t BREAKPOINT = 0x7fe00008; -// We place the break point in the third instruction slot on PPCLE as the first two are skipped if -// the call comes from within the same compilation unit according to the LE ABI. +// We place the break point in the third instruction slot on PPCLE as the first +// two are skipped if the call comes from within the same compilation unit +// according to the LE ABI. const int BREAKPOINT_OFFSET = 8; const int SYSCALL_SIZE = sizeof(instruction_t); @@ -105,35 +109,41 @@ const int FRAME_PC_SLOT = 2; const int ADJUST_RET = 0; const int PLT_HEADER_SIZE = 24; const int PLT_ENTRY_SIZE = 24; -const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP - -#define spinPause() asm volatile("yield") // does nothing, but using or 1,1,1 would lead to other problems -#define rmb() asm volatile ("sync" : : : "memory") // lwsync would do but better safe than sorry -#define flushCache(addr) __builtin___clear_cache((char*)(addr), (char*)(addr) + sizeof(instruction_t)) +const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP + +# define spinPause() \ + asm volatile("yield") // does nothing, but using or 1,1,1 would lead to + // other problems +# define rmb() \ + asm volatile("sync" \ + : \ + : \ + : "memory") // lwsync would do but better safe than sorry +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) #else -#error "Compiling on unsupported arch" +# error "Compiling on unsupported arch" #endif - // Return address signing support. // Apple M1 has 47 bit virtual addresses. #if defined(__aarch64__) && defined(__APPLE__) # define ADDRESS_BITS 47 -# define WX_MEMORY true +# define WX_MEMORY true #else -# define WX_MEMORY false +# define WX_MEMORY false #endif #ifdef ADDRESS_BITS -static inline const void* stripPointer(const void* p) { - return (const void*) ((unsigned long)p & ((1UL << ADDRESS_BITS) - 1)); +static inline const void *stripPointer(const void *p) { + return (const void *)((unsigned long)p & ((1UL << ADDRESS_BITS) - 1)); } #else -# define stripPointer(p) (p) +# define stripPointer(p) (p) #endif - #endif // _ARCH_H diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h index 83d8b6f21..3e2359e19 100644 --- a/include/async-profiler/codeCache.h +++ b/include/async-profiler/codeCache.h @@ -17,174 +17,144 @@ #ifndef _CODECACHE_H #define _CODECACHE_H -//#include +// #include +#define NO_MIN_ADDRESS ((const void *)-1) +#define NO_MAX_ADDRESS ((const void *)0) -#define NO_MIN_ADDRESS ((const void*)-1) -#define NO_MAX_ADDRESS ((const void*)0) - -typedef bool (*NamePredicate)(const char* name); +typedef bool (*NamePredicate)(const char *name); const int INITIAL_CODE_CACHE_CAPACITY = 1000; const int MAX_NATIVE_LIBS = 2048; - class NativeFunc { - private: - short _lib_index; - char _mark; - char _reserved; - char _name[0]; - - static NativeFunc* from(const char* name) { - return (NativeFunc*)(name - sizeof(NativeFunc)); - } +private: + short _lib_index; + char _mark; + char _reserved; + char _name[0]; - public: - static char* create(const char* name, short lib_index); - static void destroy(char* name); + static NativeFunc *from(const char *name) { + return (NativeFunc *)(name - sizeof(NativeFunc)); + } - static short libIndex(const char* name) { - return from(name)->_lib_index; - } +public: + static char *create(const char *name, short lib_index); + static void destroy(char *name); - static bool isMarked(const char* name) { - return from(name)->_mark != 0; - } + static short libIndex(const char *name) { return from(name)->_lib_index; } - static void mark(const char* name) { - from(name)->_mark = 1; - } -}; + static bool isMarked(const char *name) { return from(name)->_mark != 0; } + static void mark(const char *name) { from(name)->_mark = 1; } +}; class CodeBlob { - public: - const void* _start; - const void* _end; - char* _name; - - static int comparator(const void* c1, const void* c2) { - CodeBlob* cb1 = (CodeBlob*)c1; - CodeBlob* cb2 = (CodeBlob*)c2; - if (cb1->_start < cb2->_start) { - return -1; - } else if (cb1->_start > cb2->_start) { - return 1; - } else if (cb1->_end == cb2->_end) { - return 0; - } else { - return cb1->_end > cb2->_end ? -1 : 1; - } - } +public: + const void *_start; + const void *_end; + char *_name; + + static int comparator(const void *c1, const void *c2) { + CodeBlob *cb1 = (CodeBlob *)c1; + CodeBlob *cb2 = (CodeBlob *)c2; + if (cb1->_start < cb2->_start) { + return -1; + } else if (cb1->_start > cb2->_start) { + return 1; + } else if (cb1->_end == cb2->_end) { + return 0; + } else { + return cb1->_end > cb2->_end ? -1 : 1; + } + } }; - class FrameDesc; class CodeCache { - protected: - char* _name; - short _lib_index; - const void* _min_address; - const void* _max_address; - const char* _text_base; +protected: + char *_name; + short _lib_index; + const void *_min_address; + const void *_max_address; + const char *_text_base; - void** _got_start; - void** _got_end; - bool _got_patchable; + void **_got_start; + void **_got_end; + bool _got_patchable; - FrameDesc* _dwarf_table; - int _dwarf_table_length; + FrameDesc *_dwarf_table; + int _dwarf_table_length; - int _capacity; - int _count; - CodeBlob* _blobs; + int _capacity; + int _count; + CodeBlob *_blobs; - void expand(); + void expand(); - public: - CodeCache(const char* name, - short lib_index = -1, - const void* min_address = NO_MIN_ADDRESS, - const void* max_address = NO_MAX_ADDRESS); +public: + CodeCache(const char *name, short lib_index = -1, + const void *min_address = NO_MIN_ADDRESS, + const void *max_address = NO_MAX_ADDRESS); - ~CodeCache(); + ~CodeCache(); - const char* name() const { - return _name; - } + const char *name() const { return _name; } - const void* minAddress() const { - return _min_address; - } + const void *minAddress() const { return _min_address; } - const void* maxAddress() const { - return _max_address; - } + const void *maxAddress() const { return _max_address; } - bool contains(const void* address) const { - return address >= _min_address && address < _max_address; - } + bool contains(const void *address) const { + return address >= _min_address && address < _max_address; + } - void setTextBase(const char* text_base) { - _text_base = text_base; - } + void setTextBase(const char *text_base) { _text_base = text_base; } - const char *getTextBase() { - return _text_base; - } + const char *getTextBase() { return _text_base; } - void** gotStart() const { - return _got_start; - } + void **gotStart() const { return _got_start; } - void** gotEnd() const { - return _got_end; - } + void **gotEnd() const { return _got_end; } - void add(const void* start, int length, const char* name, bool update_bounds = false); - void updateBounds(const void* start, const void* end); - void sort(); - void mark(NamePredicate predicate); + void add(const void *start, int length, const char *name, + bool update_bounds = false); + void updateBounds(const void *start, const void *end); + void sort(); + void mark(NamePredicate predicate); - CodeBlob* find(const void* address); - const char* binarySearch(const void* address); - const void* findSymbol(const char* name); - const void* findSymbolByPrefix(const char* prefix); - const void* findSymbolByPrefix(const char* prefix, int prefix_len); + CodeBlob *find(const void *address); + const char *binarySearch(const void *address); + const void *findSymbol(const char *name); + const void *findSymbolByPrefix(const char *prefix); + const void *findSymbolByPrefix(const char *prefix, int prefix_len); - void setGlobalOffsetTable(void** start, void** end, bool patchable); - void** findGlobalOffsetEntry(void* address); - void makeGotPatchable(); + void setGlobalOffsetTable(void **start, void **end, bool patchable); + void **findGlobalOffsetEntry(void *address); + void makeGotPatchable(); - void setDwarfTable(FrameDesc* table, int length); - FrameDesc* findFrameDesc(const void* pc); + void setDwarfTable(FrameDesc *table, int length); + FrameDesc *findFrameDesc(const void *pc); }; - class CodeCacheArray { - private: - CodeCache* _libs[MAX_NATIVE_LIBS]; - int _count; +private: + CodeCache *_libs[MAX_NATIVE_LIBS]; + int _count; - public: - CodeCacheArray() : _count(0) { - } +public: + CodeCacheArray() : _count(0) {} - CodeCache* operator[](int index) { - return _libs[index]; - } + CodeCache *operator[](int index) { return _libs[index]; } - int count() { - return __atomic_load_n(&_count, __ATOMIC_ACQUIRE); - } + int count() { return __atomic_load_n(&_count, __ATOMIC_ACQUIRE); } - void add(CodeCache* lib) { - int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE); - _libs[index] = lib; - __atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE); - } + void add(CodeCache *lib) { + int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE); + _libs[index] = lib; + __atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE); + } }; #endif // _CODECACHE_H diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h index 85956aea0..3a7524350 100644 --- a/include/async-profiler/dwarf.h +++ b/include/async-profiler/dwarf.h @@ -17,13 +17,12 @@ #ifndef _DWARF_H #define _DWARF_H -#include #include "arch.h" - +#include #if defined(__x86_64__) -#define DWARF_SUPPORTED true +# define DWARF_SUPPORTED true const int DW_REG_FP = 6; const int DW_REG_SP = 7; @@ -31,7 +30,7 @@ const int DW_REG_PC = 16; #elif defined(__i386__) -#define DWARF_SUPPORTED true +# define DWARF_SUPPORTED true const int DW_REG_FP = 5; const int DW_REG_SP = 4; @@ -39,7 +38,7 @@ const int DW_REG_PC = 8; #else -#define DWARF_SUPPORTED false +# define DWARF_SUPPORTED false const int DW_REG_FP = 0; const int DW_REG_SP = 1; @@ -47,114 +46,103 @@ const int DW_REG_PC = 2; #endif -const int DW_REG_PLT = 128; // denotes special rule for PLT entries -const int DW_REG_INVALID = 255; // denotes unsupported configuration +const int DW_REG_PLT = 128; // denotes special rule for PLT entries +const int DW_REG_INVALID = 255; // denotes unsupported configuration const int DW_PC_OFFSET = 1; const int DW_SAME_FP = 0x80000000; -const int DW_STACK_SLOT = sizeof(void*); - +const int DW_STACK_SLOT = sizeof(void *); struct FrameDesc { - u32 loc; - int cfa; - int fp_off; + u32 loc; + int cfa; + int fp_off; - static FrameDesc default_frame; + static FrameDesc default_frame; - static int comparator(const void* p1, const void* p2) { - FrameDesc* fd1 = (FrameDesc*)p1; - FrameDesc* fd2 = (FrameDesc*)p2; - return (int)(fd1->loc - fd2->loc); - } + static int comparator(const void *p1, const void *p2) { + FrameDesc *fd1 = (FrameDesc *)p1; + FrameDesc *fd2 = (FrameDesc *)p2; + return (int)(fd1->loc - fd2->loc); + } }; - class DwarfParser { - private: - const char* _name; - const char* _image_base; - const char* _ptr; - - int _capacity; - int _count; - FrameDesc* _table; - FrameDesc* _prev; - - u32 _code_align; - int _data_align; - - const char* add(size_t size) { - const char* ptr = _ptr; - _ptr = ptr + size; - return ptr; - } - - u8 get8() { - return *_ptr++; - } - - u16 get16() { - return *(u16*)add(2); +private: + const char *_name; + const char *_image_base; + const char *_ptr; + + int _capacity; + int _count; + FrameDesc *_table; + FrameDesc *_prev; + + u32 _code_align; + int _data_align; + + const char *add(size_t size) { + const char *ptr = _ptr; + _ptr = ptr + size; + return ptr; + } + + u8 get8() { return *_ptr++; } + + u16 get16() { return *(u16 *)add(2); } + + u32 get32() { return *(u32 *)add(4); } + + u32 getLeb() { + u32 result = 0; + for (u32 shift = 0;; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } } - - u32 get32() { - return *(u32*)add(4); - } - - u32 getLeb() { - u32 result = 0; - for (u32 shift = 0; ; shift += 7) { - u8 b = *_ptr++; - result |= (b & 0x7f) << shift; - if ((b & 0x80) == 0) { - return result; - } - } - } - - int getSLeb() { - int result = 0; - for (u32 shift = 0; ; shift += 7) { - u8 b = *_ptr++; - result |= (b & 0x7f) << shift; - if ((b & 0x80) == 0) { - if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= -1 << shift; - } - return result; - } + } + + int getSLeb() { + int result = 0; + for (u32 shift = 0;; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= -1 << shift; } + return result; + } } + } - void skipLeb() { - while (*_ptr++ & 0x80) {} - } + void skipLeb() { + while (*_ptr++ & 0x80) {} + } - const char* getPtr() { - const char* ptr = _ptr; - return ptr + *(int*)add(4); - } + const char *getPtr() { + const char *ptr = _ptr; + return ptr + *(int *)add(4); + } - void parse(const char* eh_frame_hdr); - void parseCie(); - void parseFde(); - void parseInstructions(u32 loc, const char* end); - int parseExpression(); + void parse(const char *eh_frame_hdr); + void parseCie(); + void parseFde(); + void parseInstructions(u32 loc, const char *end); + int parseExpression(); - void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off); - FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off); + void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off); + FrameDesc *addRecordRaw(u32 loc, int cfa, int fp_off); - public: - DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); +public: + DwarfParser(const char *name, const char *image_base, + const char *eh_frame_hdr); - FrameDesc* table() const { - return _table; - } + FrameDesc *table() const { return _table; } - int count() const { - return _count; - } + int count() const { return _count; } }; #endif // _DWARF_H diff --git a/include/async-profiler/mutex.h b/include/async-profiler/mutex.h index 48e69205c..b017bcd82 100644 --- a/include/async-profiler/mutex.h +++ b/include/async-profiler/mutex.h @@ -17,44 +17,39 @@ #ifndef _MUTEX_H #define _MUTEX_H -#include #include "arch.h" - +#include class Mutex { - protected: - pthread_mutex_t _mutex; +protected: + pthread_mutex_t _mutex; - public: - Mutex(); +public: + Mutex(); - void lock(); - void unlock(); + void lock(); + void unlock(); }; class WaitableMutex : public Mutex { - protected: - pthread_cond_t _cond; +protected: + pthread_cond_t _cond; - public: - WaitableMutex(); +public: + WaitableMutex(); - bool waitUntil(u64 wall_time); - void notify(); + bool waitUntil(u64 wall_time); + void notify(); }; class MutexLocker { - private: - Mutex* _mutex; +private: + Mutex *_mutex; - public: - MutexLocker(Mutex& mutex) : _mutex(&mutex) { - _mutex->lock(); - } +public: + MutexLocker(Mutex &mutex) : _mutex(&mutex) { _mutex->lock(); } - ~MutexLocker() { - _mutex->unlock(); - } + ~MutexLocker() { _mutex->unlock(); } }; #endif // _MUTEX_H diff --git a/include/async-profiler/os.h b/include/async-profiler/os.h index ad35dc8fa..352cb1d0b 100644 --- a/include/async-profiler/os.h +++ b/include/async-profiler/os.h @@ -23,9 +23,9 @@ #include class OS { - public: - static const size_t page_size; - static const size_t page_mask; +public: + static const size_t page_size; + static const size_t page_mask; }; #endif // _OS_H diff --git a/include/async-profiler/safeAccess.h b/include/async-profiler/safeAccess.h index 8afe71571..652e03d71 100644 --- a/include/async-profiler/safeAccess.h +++ b/include/async-profiler/safeAccess.h @@ -17,18 +17,18 @@ #ifndef _SAFEACCESS_H #define _SAFEACCESS_H -#include #include "arch.h" +#include #ifdef __clang__ # define NOINLINE __attribute__((noinline)) #else -# define NOINLINE __attribute__((noinline,noclone)) +# define NOINLINE __attribute__((noinline, noclone)) #endif namespace SafeAccess { -NOINLINE __attribute__((aligned(16))) void* load(void** ptr); +NOINLINE __attribute__((aligned(16))) void *load(void **ptr); } diff --git a/include/async-profiler/stackFrame.h b/include/async-profiler/stackFrame.h index b658e10fe..3beade264 100644 --- a/include/async-profiler/stackFrame.h +++ b/include/async-profiler/stackFrame.h @@ -17,57 +17,53 @@ #ifndef _STACKFRAME_H #define _STACKFRAME_H +#include "arch.h" #include #include -#include "arch.h" - class StackFrame { - private: - ucontext_t* _ucontext; +private: + ucontext_t *_ucontext; - static bool withinCurrentStack(uintptr_t address) { - // Check that the address is not too far from the stack pointer of current context - void* real_sp; - return address - (uintptr_t)&real_sp <= 0xffff; - } + static bool withinCurrentStack(uintptr_t address) { + // Check that the address is not too far from the stack pointer of current + // context + void *real_sp; + return address - (uintptr_t)&real_sp <= 0xffff; + } - public: - StackFrame(void* ucontext) { - _ucontext = (ucontext_t*)ucontext; - } +public: + StackFrame(void *ucontext) { _ucontext = (ucontext_t *)ucontext; } - void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) { - if (_ucontext != NULL) { - pc() = saved_pc; - sp() = saved_sp; - fp() = saved_fp; - } + void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) { + if (_ucontext != nullptr) { + pc() = saved_pc; + sp() = saved_sp; + fp() = saved_fp; } + } - uintptr_t stackAt(int slot) { - return ((uintptr_t*)sp())[slot]; - } + uintptr_t stackAt(int slot) { return ((uintptr_t *)sp())[slot]; } - uintptr_t& pc(); - uintptr_t& sp(); - uintptr_t& fp(); + uintptr_t &pc(); + uintptr_t &sp(); + uintptr_t &fp(); - uintptr_t& retval(); - uintptr_t arg0(); - uintptr_t arg1(); - uintptr_t arg2(); - uintptr_t arg3(); + uintptr_t &retval(); + uintptr_t arg0(); + uintptr_t arg1(); + uintptr_t arg2(); + uintptr_t arg3(); - void ret(); + void ret(); - bool popStub(instruction_t* entry, const char* name); - bool popMethod(instruction_t* entry); + bool popStub(instruction_t *entry, const char *name); + bool popMethod(instruction_t *entry); - bool checkInterruptedSyscall(); + bool checkInterruptedSyscall(); - // Check if PC points to a syscall instruction - static bool isSyscall(instruction_t* pc); + // Check if PC points to a syscall instruction + static bool isSyscall(instruction_t *pc); }; #endif // _STACKFRAME_H diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h index 4ae364322..c67cac21d 100644 --- a/include/async-profiler/stackWalker.h +++ b/include/async-profiler/stackWalker.h @@ -25,9 +25,10 @@ #include "codeCache.h" #include "stack_context.h" -CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address); +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address); -int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const ap::StackBuffer &buffer, - const void** callchain, int max_depth, int skip); +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, + const ap::StackBuffer &buffer, const void **callchain, + int max_depth, int skip); #endif // _STACKWALKER_H diff --git a/include/async-profiler/stack_context.h b/include/async-profiler/stack_context.h index 5ef20f67a..be47ae1fe 100644 --- a/include/async-profiler/stack_context.h +++ b/include/async-profiler/stack_context.h @@ -2,28 +2,47 @@ #pragma once -#include +#include #include -namespace ap { +#include "perf_archmap.hpp" + +namespace ap { struct StackContext { - const void* pc; - uintptr_t sp; - uintptr_t fp; - - void set(const void* pc, uintptr_t sp, uintptr_t fp) { - this->pc = pc; - this->sp = sp; - this->fp = fp; - } + const void *pc; + uint64_t sp; + uint64_t fp; + + void set(const void *pc, uintptr_t sp, uintptr_t fp) { + this->pc = pc; + this->sp = sp; + this->fp = fp; + } }; +// Async profiler's unwinding only uses a subset of the registers +StackContext from_regs(const std::span regs); + struct StackBuffer { - StackBuffer(std::span bytes, uint64_t start, uint64_t end): - _bytes(bytes), sp_start(start), sp_end(end) {} - std::span _bytes; - uint64_t sp_start; // initial SP (in context of the process) - uint64_t sp_end; // sp + size (so root functions = start of stack) + StackBuffer(std::span bytes, uint64_t start, uint64_t end) + : _bytes(bytes), sp_start(start), sp_end(end) {} + std::span _bytes; + uint64_t sp_start; // initial SP (in context of the process) + uint64_t sp_end; // sp + size (so root functions = start of stack) + /* + sp_end + For this thread, high address matches where the stack begins + as it grows down. + | + Main() + | + FuncA() + | + ... + | + sp_start + This matches the SP register when the stack was captured + */ }; -} +} // namespace ap diff --git a/include/async-profiler/symbols.h b/include/async-profiler/symbols.h index 084376c2d..3271ccbe1 100644 --- a/include/async-profiler/symbols.h +++ b/include/async-profiler/symbols.h @@ -20,19 +20,16 @@ #include "codeCache.h" #include "mutex.h" - class Symbols { - private: - static Mutex _parse_lock; - static bool _have_kernel_symbols; +private: + static Mutex _parse_lock; + static bool _have_kernel_symbols; - public: - static void parseKernelSymbols(CodeCache* cc); - static void parseLibraries(CodeCacheArray* array, bool kernel_symbols); +public: + static void parseKernelSymbols(CodeCache *cc); + static void parseLibraries(CodeCacheArray *array, bool kernel_symbols); - static bool haveKernelSymbols() { - return _have_kernel_symbols; - } + static bool haveKernelSymbols() { return _have_kernel_symbols; } }; #endif // _SYMBOLS_H diff --git a/src/async-profiler/cgotraceback.cpp b/src/async-profiler/cgotraceback.cpp index 06669e7bc..be5fb86f9 100644 --- a/src/async-profiler/cgotraceback.cpp +++ b/src/async-profiler/cgotraceback.cpp @@ -6,71 +6,70 @@ #include "symbols.h" struct CodeCacheArraySingleton { - static CodeCacheArray *getInstance(); - static CodeCacheArray *instance; + static CodeCacheArray *getInstance(); + static CodeCacheArray *instance; }; CodeCacheArray *CodeCacheArraySingleton::instance = nullptr; CodeCacheArray *CodeCacheArraySingleton::getInstance() { - // XXX(nick): I don't know that I need to care about concurrency. This - // should be read-only once init() is called. - if (instance == nullptr) { - instance = new CodeCacheArray(); - } - return instance; + // XXX(nick): I don't know that I need to care about concurrency. This + // should be read-only once init() is called. + if (instance == nullptr) { + instance = new CodeCacheArray(); + } + return instance; } static CodeBlob *asmcgocall_bounds = nullptr; static uintptr_t asmcgocall_base = 0; static __attribute__((constructor)) void init(void) { - auto a = CodeCacheArraySingleton::getInstance(); - Symbols::parseLibraries(a, false); - - int count = a->count(); - for (int i = 0; i < count; i++) { - CodeCache *c = a->operator[](i); - const void *p = NULL; - p = c->findSymbol("runtime.asmcgocall.abi0"); - if (p == nullptr) { - // amscgocall name has "abi0" suffix on more recent Go versions - // but not on older versions - p = c->findSymbol("runtime.asmcgocall"); - } - if (p == nullptr) { - continue; - } - auto cb = c->find(p); - if (cb != nullptr) { - asmcgocall_bounds = cb; - asmcgocall_base = (uintptr_t) c->getTextBase(); - } + auto a = CodeCacheArraySingleton::getInstance(); + Symbols::parseLibraries(a, false); + + int count = a->count(); + for (int i = 0; i < count; i++) { + CodeCache *c = a->operator[](i); + const void *p = NULL; + p = c->findSymbol("runtime.asmcgocall.abi0"); + if (p == nullptr) { + // amscgocall name has "abi0" suffix on more recent Go versions + // but not on older versions + p = c->findSymbol("runtime.asmcgocall"); + } + if (p == nullptr) { + continue; } -} + auto cb = c->find(p); + if (cb != nullptr) { + asmcgocall_bounds = cb; + asmcgocall_base = (uintptr_t)c->getTextBase(); + } + } +} void populateStackContext(ap::StackContext &sc, void *ucontext); -int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const void** callchain, int max_depth, int skip); +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, + const void **callchain, int max_depth, int skip); bool stepStackContext(ap::StackContext &sc, CodeCacheArray *cache); -extern "C" { +extern "C" { static int enabled = 1; // for benchmarking -void async_cgo_traceback_internal_set_enabled(int value) { - enabled = value; -} +void async_cgo_traceback_internal_set_enabled(int value) { enabled = value; } #define STACK_MAX 32 struct cgo_context { - const void *pc; - uintptr_t sp; - uintptr_t fp; - uintptr_t stack[STACK_MAX]; - int cached; - int inuse; + const void *pc; + uintptr_t sp; + uintptr_t fp; + uintptr_t stack[STACK_MAX]; + int cached; + int inuse; }; // There may be multiple C->Go transitions for a single C tread, so we have a @@ -89,135 +88,130 @@ static __thread struct cgo_context cgo_contexts[cgo_contexts_length]; // cgo_contexts is signal safe? static struct cgo_context *cgo_context_get(void) { - for (int i = 0; i < cgo_contexts_length; i++) { - if (cgo_contexts[i].inuse == 0) { - cgo_contexts[i].inuse = 1; - cgo_contexts[i].cached = 0; - return &cgo_contexts[i]; - } + for (int i = 0; i < cgo_contexts_length; i++) { + if (cgo_contexts[i].inuse == 0) { + cgo_contexts[i].inuse = 1; + cgo_contexts[i].cached = 0; + return &cgo_contexts[i]; } - return NULL; + } + return NULL; } -static void cgo_context_release(struct cgo_context *c) { - c->inuse = 0; -} +static void cgo_context_release(struct cgo_context *c) { c->inuse = 0; } // truncate_asmcgocall truncates a call stack after asmcgocall, if asmcgocall is // present in the stack. This function is the first function in the C call stack // for a Go -> C call, and it is not the responsibility of this library to // unwind past that function. static void truncate_asmcgocall(void **stack, int size) { - if (asmcgocall_bounds == nullptr) { + if (asmcgocall_bounds == nullptr) { + return; + } + for (int i = 0; i < size; i++) { + uintptr_t a = (uintptr_t)stack[i]; + a += asmcgocall_base; + if ((a >= (uintptr_t)asmcgocall_bounds->_start) && + (a <= (uintptr_t)asmcgocall_bounds->_end)) { + if ((i + 1) < size) { + // zero out the thing AFTER asmcgocall. We want to stop at + // asmcgocall since that's the "top" of the C stack in a + // Go -> C (-> Go) call + stack[i + 1] = 0; return; + } } - for (int i = 0; i < size; i++) { - uintptr_t a = (uintptr_t) stack[i]; - a += asmcgocall_base; - if ((a >= (uintptr_t) asmcgocall_bounds->_start) && (a <= (uintptr_t) asmcgocall_bounds->_end)) { - if ((i + 1) < size) { - // zero out the thing AFTER asmcgocall. We want to stop at - // asmcgocall since that's the "top" of the C stack in a - // Go -> C (-> Go) call - stack[i + 1] = 0; - return; - } - } - } + } } struct cgo_context_arg { - uintptr_t p; + uintptr_t p; }; #ifndef C_G_THING +void async_cgo_context(void *p) {} +#else void async_cgo_context(void *p) { -} -#else -void async_cgo_context(void *p) { - if (enabled == 0) { - return; - } + if (enabled == 0) { + return; + } - cgo_context_arg *arg = (cgo_context_arg *)p; - struct cgo_context *ctx = (struct cgo_context *) arg->p; - if (ctx != NULL) { - cgo_context_release(ctx); - return; - } - ctx = cgo_context_get(); - if (ctx == NULL) { - return; - } - ap::StackContext sc; - populateStackContext(sc, nullptr); - CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); - // There are two frames in the call stack we should skip. The first is this - // function, and the second is _cgo_wait_runtime_init_done, which calls this - // function to save the C call stack context before calling into Go code. - // The next frame after that is the exported C->Go function, which is where - // unwinding should begin for this context in the traceback function. - stepStackContext(sc, cache); - stepStackContext(sc, cache); - ctx->pc = sc.pc; - ctx->sp = sc.sp; - ctx->fp = sc.fp; - arg->p = (uintptr_t) ctx; + cgo_context_arg *arg = (cgo_context_arg *)p; + struct cgo_context *ctx = (struct cgo_context *)arg->p; + if (ctx != NULL) { + cgo_context_release(ctx); return; + } + ctx = cgo_context_get(); + if (ctx == NULL) { + return; + } + ap::StackContext sc; + populateStackContext(sc, nullptr); + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + // There are two frames in the call stack we should skip. The first is this + // function, and the second is _cgo_wait_runtime_init_done, which calls this + // function to save the C call stack context before calling into Go code. + // The next frame after that is the exported C->Go function, which is where + // unwinding should begin for this context in the traceback function. + stepStackContext(sc, cache); + stepStackContext(sc, cache); + ctx->pc = sc.pc; + ctx->sp = sc.sp; + ctx->fp = sc.fp; + arg->p = (uintptr_t)ctx; + return; } #endif - struct cgo_traceback_arg { - uintptr_t context; - uintptr_t sig_context; - uintptr_t* buf; - uintptr_t max; + uintptr_t context; + uintptr_t sig_context; + uintptr_t *buf; + uintptr_t max; }; #ifndef C_GO_THINGS -void async_cgo_traceback(void *p) { -} +void async_cgo_traceback(void *p) {} #else void async_cgo_traceback(void *p) { - if (enabled == 0) { - return; - } - - struct cgo_traceback_arg *arg = (struct cgo_traceback_arg *)p; - struct cgo_context *ctx = NULL; - ap::StackContext sc; - - // If we had a previous context, then we're being called to unwind some - // previous C portion of a mixed C/Go call stack. We use the call stack - // information saved in the context. - if (arg->context != 0) { - ctx = (struct cgo_context *) arg->context; - if (ctx->cached == 0) { - CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); - sc.pc = ctx->pc; - sc.sp = ctx->sp; - sc.fp = ctx->fp; - int n = stackWalk(cache, sc, (const void **) ctx->stack, STACK_MAX, 0); - truncate_asmcgocall((void **) ctx->stack, n); - ctx->cached = 1; - } - uintptr_t n = (arg->max < STACK_MAX) ? arg->max : STACK_MAX; - memcpy(arg->buf, ctx->stack, n * sizeof(uintptr_t)); - return; + if (enabled == 0) { + return; + } + + struct cgo_traceback_arg *arg = (struct cgo_traceback_arg *)p; + struct cgo_context *ctx = NULL; + ap::StackContext sc; + + // If we had a previous context, then we're being called to unwind some + // previous C portion of a mixed C/Go call stack. We use the call stack + // information saved in the context. + if (arg->context != 0) { + ctx = (struct cgo_context *)arg->context; + if (ctx->cached == 0) { + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + sc.pc = ctx->pc; + sc.sp = ctx->sp; + sc.fp = ctx->fp; + int n = stackWalk(cache, sc, (const void **)ctx->stack, STACK_MAX, 0); + truncate_asmcgocall((void **)ctx->stack, n); + ctx->cached = 1; } + uintptr_t n = (arg->max < STACK_MAX) ? arg->max : STACK_MAX; + memcpy(arg->buf, ctx->stack, n * sizeof(uintptr_t)); + return; + } - populateStackContext(sc, (void *) arg->sig_context); - CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); - int n = stackWalk(cache, sc, (const void **) arg->buf, arg->max, 0); - if (n < arg->max) { - arg->buf[n] = 0; - } - truncate_asmcgocall((void **) arg->buf, n); + populateStackContext(sc, (void *)arg->sig_context); + CodeCacheArray *cache = (CodeCacheArraySingleton::getInstance()); + int n = stackWalk(cache, sc, (const void **)arg->buf, arg->max, 0); + if (n < arg->max) { + arg->buf[n] = 0; + } + truncate_asmcgocall((void **)arg->buf, n); - return; + return; } #endif - } // extern "C" \ No newline at end of file diff --git a/src/async-profiler/codeCache.cpp b/src/async-profiler/codeCache.cpp index c1cec91f9..72b6d63ce 100644 --- a/src/async-profiler/codeCache.cpp +++ b/src/async-profiler/codeCache.cpp @@ -17,212 +17,220 @@ * Modified by Nick Ripley to extract components needed for call stack unwinding */ +#include "codeCache.h" +#include "dwarf.h" +#include "os.h" #include #include #include #include -#include "codeCache.h" -#include "dwarf.h" -#include "os.h" - -char* NativeFunc::create(const char* name, short lib_index) { - NativeFunc* f = (NativeFunc*)malloc(sizeof(NativeFunc) + 1 + strlen(name)); - f->_lib_index = lib_index; - f->_mark = 0; - return strcpy(f->_name, name); -} - -void NativeFunc::destroy(char* name) { - free(from(name)); +char *NativeFunc::create(const char *name, short lib_index) { + NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name)); + f->_lib_index = lib_index; + f->_mark = 0; + return strcpy(f->_name, name); } +void NativeFunc::destroy(char *name) { free(from(name)); } -CodeCache::CodeCache(const char* name, short lib_index, const void* min_address, const void* max_address) { - _name = NativeFunc::create(name, -1); - _lib_index = lib_index; - _min_address = min_address; - _max_address = max_address; - _text_base = NULL; +CodeCache::CodeCache(const char *name, short lib_index, const void *min_address, + const void *max_address) { + _name = NativeFunc::create(name, -1); + _lib_index = lib_index; + _min_address = min_address; + _max_address = max_address; + _text_base = NULL; - _got_start = NULL; - _got_end = NULL; - _got_patchable = false; + _got_start = NULL; + _got_end = NULL; + _got_patchable = false; - _dwarf_table = NULL; - _dwarf_table_length = 0; + _dwarf_table = NULL; + _dwarf_table_length = 0; - _capacity = INITIAL_CODE_CACHE_CAPACITY; - _count = 0; - _blobs = new CodeBlob[_capacity]; + _capacity = INITIAL_CODE_CACHE_CAPACITY; + _count = 0; + _blobs = new CodeBlob[_capacity]; } CodeCache::~CodeCache() { - for (int i = 0; i < _count; i++) { - NativeFunc::destroy(_blobs[i]._name); - } - NativeFunc::destroy(_name); - delete[] _blobs; - free(_dwarf_table); + for (int i = 0; i < _count; i++) { + NativeFunc::destroy(_blobs[i]._name); + } + NativeFunc::destroy(_name); + delete[] _blobs; + free(_dwarf_table); } void CodeCache::expand() { - CodeBlob* old_blobs = _blobs; - CodeBlob* new_blobs = new CodeBlob[_capacity * 2]; + CodeBlob *old_blobs = _blobs; + CodeBlob *new_blobs = new CodeBlob[_capacity * 2]; - memcpy(new_blobs, old_blobs, _count * sizeof(CodeBlob)); + memcpy(new_blobs, old_blobs, _count * sizeof(CodeBlob)); - _capacity *= 2; - _blobs = new_blobs; - delete[] old_blobs; + _capacity *= 2; + _blobs = new_blobs; + delete[] old_blobs; } -void CodeCache::add(const void* start, int length, const char* name, bool update_bounds) { - char* name_copy = NativeFunc::create(name, _lib_index); - // Replace non-printable characters - for (char* s = name_copy; *s != 0; s++) { - if (*s < ' ') *s = '?'; - } +void CodeCache::add(const void *start, int length, const char *name, + bool update_bounds) { + char *name_copy = NativeFunc::create(name, _lib_index); + // Replace non-printable characters + for (char *s = name_copy; *s != 0; s++) { + if (*s < ' ') + *s = '?'; + } - if (_count >= _capacity) { - expand(); - } + if (_count >= _capacity) { + expand(); + } - const void* end = (const char*)start + length; - _blobs[_count]._start = start; - _blobs[_count]._end = end; - _blobs[_count]._name = name_copy; - _count++; + const void *end = (const char *)start + length; + _blobs[_count]._start = start; + _blobs[_count]._end = end; + _blobs[_count]._name = name_copy; + _count++; - if (update_bounds) { - updateBounds(start, end); - } + if (update_bounds) { + updateBounds(start, end); + } } -void CodeCache::updateBounds(const void* start, const void* end) { - if (start < _min_address) _min_address = start; - if (end > _max_address) _max_address = end; +void CodeCache::updateBounds(const void *start, const void *end) { + if (start < _min_address) + _min_address = start; + if (end > _max_address) + _max_address = end; } void CodeCache::sort() { - if (_count == 0) return; + if (_count == 0) + return; - qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator); + qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator); - if (_min_address == NO_MIN_ADDRESS) _min_address = _blobs[0]._start; - if (_max_address == NO_MAX_ADDRESS) _max_address = _blobs[_count - 1]._end; + if (_min_address == NO_MIN_ADDRESS) + _min_address = _blobs[0]._start; + if (_max_address == NO_MAX_ADDRESS) + _max_address = _blobs[_count - 1]._end; } void CodeCache::mark(NamePredicate predicate) { - for (int i = 0; i < _count; i++) { - const char* blob_name = _blobs[i]._name; - if (blob_name != NULL && predicate(blob_name)) { - NativeFunc::mark(blob_name); - } + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && predicate(blob_name)) { + NativeFunc::mark(blob_name); } + } } -CodeBlob* CodeCache::find(const void* address) { - for (int i = 0; i < _count; i++) { - if (address >= _blobs[i]._start && address < _blobs[i]._end) { - return &_blobs[i]; - } - } - return NULL; -} - -const char* CodeCache::binarySearch(const void* address) { - int low = 0; - int high = _count - 1; - - while (low <= high) { - int mid = (unsigned int)(low + high) >> 1; - if (_blobs[mid]._end <= address) { - low = mid + 1; - } else if (_blobs[mid]._start > address) { - high = mid - 1; - } else { - return _blobs[mid]._name; - } +CodeBlob *CodeCache::find(const void *address) { + for (int i = 0; i < _count; i++) { + if (address >= _blobs[i]._start && address < _blobs[i]._end) { + return &_blobs[i]; } - - // Symbols with zero size can be valid functions: e.g. ASM entry points or kernel code. - // Also, in some cases (endless loop) the return address may point beyond the function. - if (low > 0 && (_blobs[low - 1]._start == _blobs[low - 1]._end || _blobs[low - 1]._end == address)) { - return _blobs[low - 1]._name; + } + return NULL; +} + +const char *CodeCache::binarySearch(const void *address) { + int low = 0; + int high = _count - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_blobs[mid]._end <= address) { + low = mid + 1; + } else if (_blobs[mid]._start > address) { + high = mid - 1; + } else { + return _blobs[mid]._name; } - return _name; -} - -const void* CodeCache::findSymbol(const char* name) { - for (int i = 0; i < _count; i++) { - const char* blob_name = _blobs[i]._name; - if (blob_name != NULL && strcmp(blob_name, name) == 0) { - return _blobs[i]._start; - } + } + + // Symbols with zero size can be valid functions: e.g. ASM entry points or + // kernel code. Also, in some cases (endless loop) the return address may + // point beyond the function. + if (low > 0 && + (_blobs[low - 1]._start == _blobs[low - 1]._end || + _blobs[low - 1]._end == address)) { + return _blobs[low - 1]._name; + } + return _name; +} + +const void *CodeCache::findSymbol(const char *name) { + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && strcmp(blob_name, name) == 0) { + return _blobs[i]._start; } - return NULL; + } + return NULL; } -const void* CodeCache::findSymbolByPrefix(const char* prefix) { - return findSymbolByPrefix(prefix, strlen(prefix)); +const void *CodeCache::findSymbolByPrefix(const char *prefix) { + return findSymbolByPrefix(prefix, strlen(prefix)); } -const void* CodeCache::findSymbolByPrefix(const char* prefix, int prefix_len) { - for (int i = 0; i < _count; i++) { - const char* blob_name = _blobs[i]._name; - if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) { - return _blobs[i]._start; - } +const void *CodeCache::findSymbolByPrefix(const char *prefix, int prefix_len) { + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) { + return _blobs[i]._start; } - return NULL; + } + return NULL; } -void CodeCache::setGlobalOffsetTable(void** start, void** end, bool patchable) { - _got_start = start; - _got_end = end; - _got_patchable = patchable; +void CodeCache::setGlobalOffsetTable(void **start, void **end, bool patchable) { + _got_start = start; + _got_end = end; + _got_patchable = patchable; } -void** CodeCache::findGlobalOffsetEntry(void* address) { - for (void** entry = _got_start; entry < _got_end; entry++) { - if (*entry == address) { - makeGotPatchable(); - return entry; - } +void **CodeCache::findGlobalOffsetEntry(void *address) { + for (void **entry = _got_start; entry < _got_end; entry++) { + if (*entry == address) { + makeGotPatchable(); + return entry; } - return NULL; + } + return NULL; } void CodeCache::makeGotPatchable() { - if (!_got_patchable) { - uintptr_t got_start = (uintptr_t)_got_start & ~OS::page_mask; - uintptr_t got_size = ((uintptr_t)_got_end - got_start + OS::page_mask) & ~OS::page_mask; - mprotect((void*)got_start, got_size, PROT_READ | PROT_WRITE); - _got_patchable = true; - } -} - -void CodeCache::setDwarfTable(FrameDesc* table, int length) { - _dwarf_table = table; - _dwarf_table_length = length; -} - -FrameDesc* CodeCache::findFrameDesc(const void* pc) { - u32 target_loc = (const char*)pc - _text_base; - int low = 0; - int high = _dwarf_table_length - 1; - - while (low <= high) { - int mid = (unsigned int)(low + high) >> 1; - if (_dwarf_table[mid].loc < target_loc) { - low = mid + 1; - } else if (_dwarf_table[mid].loc > target_loc) { - high = mid - 1; - } else { - return &_dwarf_table[mid]; - } + if (!_got_patchable) { + uintptr_t got_start = (uintptr_t)_got_start & ~OS::page_mask; + uintptr_t got_size = + ((uintptr_t)_got_end - got_start + OS::page_mask) & ~OS::page_mask; + mprotect((void *)got_start, got_size, PROT_READ | PROT_WRITE); + _got_patchable = true; + } +} + +void CodeCache::setDwarfTable(FrameDesc *table, int length) { + _dwarf_table = table; + _dwarf_table_length = length; +} + +FrameDesc *CodeCache::findFrameDesc(const void *pc) { + u32 target_loc = (const char *)pc - _text_base; + int low = 0; + int high = _dwarf_table_length - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_dwarf_table[mid].loc < target_loc) { + low = mid + 1; + } else if (_dwarf_table[mid].loc > target_loc) { + high = mid - 1; + } else { + return &_dwarf_table[mid]; } + } - return low > 0 ? &_dwarf_table[low - 1] : NULL; + return low > 0 ? &_dwarf_table[low - 1] : NULL; } diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index 7fb22d0d5..68640dfd1 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -17,335 +17,351 @@ * Modified by Nick Ripley to extract components needed for call stack unwinding */ -#include #include "dwarf.h" - +#include enum { - DW_CFA_nop = 0x0, - DW_CFA_set_loc = 0x1, - DW_CFA_advance_loc1 = 0x2, - DW_CFA_advance_loc2 = 0x3, - DW_CFA_advance_loc4 = 0x4, - DW_CFA_offset_extended = 0x5, - DW_CFA_restore_extended = 0x6, - DW_CFA_undefined = 0x7, - DW_CFA_same_value = 0x8, - DW_CFA_register = 0x9, - DW_CFA_remember_state = 0xa, - DW_CFA_restore_state = 0xb, - DW_CFA_def_cfa = 0xc, - DW_CFA_def_cfa_register = 0xd, - DW_CFA_def_cfa_offset = 0xe, - DW_CFA_def_cfa_expression = 0xf, - DW_CFA_expression = 0x10, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_def_cfa_sf = 0x12, - DW_CFA_def_cfa_offset_sf = 0x13, - DW_CFA_val_offset = 0x14, - DW_CFA_val_offset_sf = 0x15, - DW_CFA_val_expression = 0x16, - DW_CFA_GNU_args_size = 0x2e, - - DW_CFA_advance_loc = 0x1, - DW_CFA_offset = 0x2, - DW_CFA_restore = 0x3, + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xa, + DW_CFA_restore_state = 0xb, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_register = 0xd, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_def_cfa_expression = 0xf, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_GNU_args_size = 0x2e, + + DW_CFA_advance_loc = 0x1, + DW_CFA_offset = 0x2, + DW_CFA_restore = 0x3, }; enum { - DW_OP_breg_pc = 0x70 + DW_REG_PC, - DW_OP_const1u = 0x08, - DW_OP_const1s = 0x09, - DW_OP_const2u = 0x0a, - DW_OP_const2s = 0x0b, - DW_OP_const4u = 0x0c, - DW_OP_const4s = 0x0d, - DW_OP_constu = 0x10, - DW_OP_consts = 0x11, - DW_OP_minus = 0x1c, - DW_OP_plus = 0x22, + DW_OP_breg_pc = 0x70 + DW_REG_PC, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_minus = 0x1c, + DW_OP_plus = 0x22, }; +FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, + -2 * DW_STACK_SLOT}; -FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, -2 * DW_STACK_SLOT}; +DwarfParser::DwarfParser(const char *name, const char *image_base, + const char *eh_frame_hdr) { + _name = name; + _image_base = image_base; + _capacity = 128; + _count = 0; + _table = (FrameDesc *)malloc(_capacity * sizeof(FrameDesc)); + _prev = NULL; -DwarfParser::DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr) { - _name = name; - _image_base = image_base; + _code_align = sizeof(instruction_t); + _data_align = -(int)sizeof(void *); - _capacity = 128; - _count = 0; - _table = (FrameDesc*)malloc(_capacity * sizeof(FrameDesc)); - _prev = NULL; - - _code_align = sizeof(instruction_t); - _data_align = -(int)sizeof(void*); - - parse(eh_frame_hdr); + parse(eh_frame_hdr); } -void DwarfParser::parse(const char* eh_frame_hdr) { - u8 version = eh_frame_hdr[0]; - u8 eh_frame_ptr_enc = eh_frame_hdr[1]; - u8 fde_count_enc = eh_frame_hdr[2]; - u8 table_enc = eh_frame_hdr[3]; - - if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { - return; - } - - int fde_count = *(int*)(eh_frame_hdr + 8); - int* table = (int*)(eh_frame_hdr + 16); - for (int i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2]; - parseFde(); - } +void DwarfParser::parse(const char *eh_frame_hdr) { + u8 version = eh_frame_hdr[0]; + u8 eh_frame_ptr_enc = eh_frame_hdr[1]; + u8 fde_count_enc = eh_frame_hdr[2]; + u8 table_enc = eh_frame_hdr[3]; + + if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || + (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { + return; + } + + int fde_count = *(int *)(eh_frame_hdr + 8); + int *table = (int *)(eh_frame_hdr + 16); + for (int i = 0; i < fde_count; i++) { + _ptr = eh_frame_hdr + table[i * 2]; + parseFde(); + } } void DwarfParser::parseCie() { - u32 cie_len = get32(); - if (cie_len == 0 || cie_len == 0xffffffff) { - return; - } - - const char* cie_start = _ptr; - _ptr += 5; - while (*_ptr++) {} - _code_align = getLeb(); - _data_align = getSLeb(); - _ptr = cie_start + cie_len; + u32 cie_len = get32(); + if (cie_len == 0 || cie_len == 0xffffffff) { + return; + } + + const char *cie_start = _ptr; + _ptr += 5; + while (*_ptr++) {} + _code_align = getLeb(); + _data_align = getSLeb(); + _ptr = cie_start + cie_len; } void DwarfParser::parseFde() { - u32 fde_len = get32(); - if (fde_len == 0 || fde_len == 0xffffffff) { - return; - } - - const char* fde_start = _ptr; - u32 cie_offset = get32(); - if (_count == 0) { - _ptr = fde_start - cie_offset; - parseCie(); - _ptr = fde_start + 4; - } - - u32 range_start = getPtr() - _image_base; - u32 range_len = get32(); - _ptr += getLeb(); - parseInstructions(range_start, fde_start + fde_len); - addRecord(range_start + range_len, DW_REG_SP, DW_STACK_SLOT, DW_SAME_FP); + u32 fde_len = get32(); + if (fde_len == 0 || fde_len == 0xffffffff) { + return; + } + + const char *fde_start = _ptr; + u32 cie_offset = get32(); + if (_count == 0) { + _ptr = fde_start - cie_offset; + parseCie(); + _ptr = fde_start + 4; + } + + u32 range_start = getPtr() - _image_base; + u32 range_len = get32(); + _ptr += getLeb(); + parseInstructions(range_start, fde_start + fde_len); + addRecord(range_start + range_len, DW_REG_SP, DW_STACK_SLOT, DW_SAME_FP); } -void DwarfParser::parseInstructions(u32 loc, const char* end) { - const u32 code_align = _code_align; - const int data_align = _data_align; - - u32 cfa_reg = DW_REG_SP; - int cfa_off = DW_STACK_SLOT; - int fp_off = DW_SAME_FP; - int pc_off = -DW_STACK_SLOT; - - u32 rem_cfa_reg; - int rem_cfa_off; - int rem_fp_off; - int rem_pc_off; - - while (_ptr < end) { - u8 op = get8(); - switch (op >> 6) { - case 0: - switch (op) { - case DW_CFA_nop: - case DW_CFA_set_loc: - _ptr = end; - break; - case DW_CFA_advance_loc1: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += get8() * code_align; - break; - case DW_CFA_advance_loc2: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += get16() * code_align; - break; - case DW_CFA_advance_loc4: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += get32() * code_align; - break; - case DW_CFA_offset_extended: - switch (getLeb()) { - case DW_REG_FP: fp_off = getLeb() * data_align; break; - case DW_REG_PC: pc_off = getLeb() * data_align; break; - default: skipLeb(); - } - break; - case DW_CFA_restore_extended: - case DW_CFA_undefined: - case DW_CFA_same_value: - skipLeb(); - break; - case DW_CFA_register: - skipLeb(); - skipLeb(); - break; - case DW_CFA_remember_state: - rem_cfa_reg = cfa_reg; - rem_cfa_off = cfa_off; - rem_fp_off = fp_off; - rem_pc_off = pc_off; - break; - case DW_CFA_restore_state: - cfa_reg = rem_cfa_reg; - cfa_off = rem_cfa_off; - fp_off = rem_fp_off; - pc_off = rem_pc_off; - break; - case DW_CFA_def_cfa: - cfa_reg = getLeb(); - cfa_off = getLeb(); - break; - case DW_CFA_def_cfa_register: - cfa_reg = getLeb(); - break; - case DW_CFA_def_cfa_offset: - cfa_off = getLeb(); - break; - case DW_CFA_def_cfa_expression: { - u32 len = getLeb(); - cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; - cfa_off = DW_STACK_SLOT; - _ptr += len; - break; - } - case DW_CFA_expression: - skipLeb(); - _ptr += getLeb(); - break; - case DW_CFA_offset_extended_sf: - switch (getLeb()) { - case DW_REG_FP: fp_off = getSLeb() * data_align; break; - case DW_REG_PC: pc_off = getSLeb() * data_align; break; - default: skipLeb(); - } - break; - case DW_CFA_def_cfa_sf: - cfa_reg = getLeb(); - cfa_off = getSLeb() * data_align; - break; - case DW_CFA_def_cfa_offset_sf: - cfa_off = getSLeb() * data_align; - break; - case DW_CFA_val_offset: - case DW_CFA_val_offset_sf: - skipLeb(); - skipLeb(); - break; - case DW_CFA_val_expression: - if (getLeb() == DW_REG_PC) { - int pc_off = parseExpression(); - if (pc_off != 0) { - fp_off = DW_PC_OFFSET | (pc_off << 1); - } - } else { - _ptr += getLeb(); - } - break; - case DW_CFA_GNU_args_size: - skipLeb(); - break; - default: - return; - } - break; - case DW_CFA_advance_loc: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += (op & 0x3f) * code_align; - break; - case DW_CFA_offset: - switch (op & 0x3f) { - case DW_REG_FP: fp_off = getLeb() * data_align; break; - case DW_REG_PC: pc_off = getLeb() * data_align; break; - default: skipLeb(); - } - break; - case DW_CFA_restore: - break; +void DwarfParser::parseInstructions(u32 loc, const char *end) { + const u32 code_align = _code_align; + const int data_align = _data_align; + + u32 cfa_reg = DW_REG_SP; + int cfa_off = DW_STACK_SLOT; + int fp_off = DW_SAME_FP; + int pc_off = -DW_STACK_SLOT; + + u32 rem_cfa_reg; + int rem_cfa_off; + int rem_fp_off; + int rem_pc_off; + + while (_ptr < end) { + u8 op = get8(); + switch (op >> 6) { + case 0: + switch (op) { + case DW_CFA_nop: + case DW_CFA_set_loc: + _ptr = end; + break; + case DW_CFA_advance_loc1: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get8() * code_align; + break; + case DW_CFA_advance_loc2: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get16() * code_align; + break; + case DW_CFA_advance_loc4: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get32() * code_align; + break; + case DW_CFA_offset_extended: + switch (getLeb()) { + case DW_REG_FP: + fp_off = getLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + skipLeb(); + break; + case DW_CFA_register: + skipLeb(); + skipLeb(); + break; + case DW_CFA_remember_state: + rem_cfa_reg = cfa_reg; + rem_cfa_off = cfa_off; + rem_fp_off = fp_off; + rem_pc_off = pc_off; + break; + case DW_CFA_restore_state: + cfa_reg = rem_cfa_reg; + cfa_off = rem_cfa_off; + fp_off = rem_fp_off; + pc_off = rem_pc_off; + break; + case DW_CFA_def_cfa: + cfa_reg = getLeb(); + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_register: + cfa_reg = getLeb(); + break; + case DW_CFA_def_cfa_offset: + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_expression: { + u32 len = getLeb(); + cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; + cfa_off = DW_STACK_SLOT; + _ptr += len; + break; + } + case DW_CFA_expression: + skipLeb(); + _ptr += getLeb(); + break; + case DW_CFA_offset_extended_sf: + switch (getLeb()) { + case DW_REG_FP: + fp_off = getSLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getSLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_def_cfa_sf: + cfa_reg = getLeb(); + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_def_cfa_offset_sf: + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_val_offset: + case DW_CFA_val_offset_sf: + skipLeb(); + skipLeb(); + break; + case DW_CFA_val_expression: + if (getLeb() == DW_REG_PC) { + int pc_off = parseExpression(); + if (pc_off != 0) { + fp_off = DW_PC_OFFSET | (pc_off << 1); + } + } else { + _ptr += getLeb(); } + break; + case DW_CFA_GNU_args_size: + skipLeb(); + break; + default: + return; + } + break; + case DW_CFA_advance_loc: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += (op & 0x3f) * code_align; + break; + case DW_CFA_offset: + switch (op & 0x3f) { + case DW_REG_FP: + fp_off = getLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_restore: + break; } + } - addRecord(loc, cfa_reg, cfa_off, fp_off); + addRecord(loc, cfa_reg, cfa_off, fp_off); } -// Parse a limited subset of DWARF expressions, which is used in DW_CFA_val_expression -// to point to the previous PC relative to the current PC. +// Parse a limited subset of DWARF expressions, which is used in +// DW_CFA_val_expression to point to the previous PC relative to the current PC. // Returns the offset of the previous PC from the current PC. int DwarfParser::parseExpression() { - int pc_off = 0; - int tos = 0; - - u32 len = getLeb(); - const char* end = _ptr + len; - - while (_ptr < end) { - u8 op = get8(); - switch (op) { - case DW_OP_breg_pc: - pc_off = getSLeb(); - break; - case DW_OP_const1u: - tos = get8(); - break; - case DW_OP_const1s: - tos = (signed char)get8(); - break; - case DW_OP_const2u: - tos = get16(); - break; - case DW_OP_const2s: - tos = (short)get16(); - break; - case DW_OP_const4u: - case DW_OP_const4s: - tos = get32(); - break; - case DW_OP_constu: - tos = getLeb(); - break; - case DW_OP_consts: - tos = getSLeb(); - break; - case DW_OP_minus: - pc_off -= tos; - break; - case DW_OP_plus: - pc_off += tos; - break; - default: - _ptr = end; - return 0; - } + int pc_off = 0; + int tos = 0; + + u32 len = getLeb(); + const char *end = _ptr + len; + + while (_ptr < end) { + u8 op = get8(); + switch (op) { + case DW_OP_breg_pc: + pc_off = getSLeb(); + break; + case DW_OP_const1u: + tos = get8(); + break; + case DW_OP_const1s: + tos = (signed char)get8(); + break; + case DW_OP_const2u: + tos = get16(); + break; + case DW_OP_const2s: + tos = (short)get16(); + break; + case DW_OP_const4u: + case DW_OP_const4s: + tos = get32(); + break; + case DW_OP_constu: + tos = getLeb(); + break; + case DW_OP_consts: + tos = getSLeb(); + break; + case DW_OP_minus: + pc_off -= tos; + break; + case DW_OP_plus: + pc_off += tos; + break; + default: + _ptr = end; + return 0; } + } - return pc_off; + return pc_off; } void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off) { - int cfa = cfa_reg | cfa_off << 8; - if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || _prev->cfa != cfa || _prev->fp_off != fp_off) { - _prev = addRecordRaw(loc, cfa, fp_off); - } + int cfa = cfa_reg | cfa_off << 8; + if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || + _prev->cfa != cfa || _prev->fp_off != fp_off) { + _prev = addRecordRaw(loc, cfa, fp_off); + } } -FrameDesc* DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off) { - if (_count >= _capacity) { - _capacity *= 2; - _table = (FrameDesc*)realloc(_table, _capacity * sizeof(FrameDesc)); - } - - FrameDesc* f = &_table[_count++]; - f->loc = loc; - f->cfa = cfa; - f->fp_off = fp_off; - return f; +FrameDesc *DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off) { + if (_count >= _capacity) { + _capacity *= 2; + _table = (FrameDesc *)realloc(_table, _capacity * sizeof(FrameDesc)); + } + + FrameDesc *f = &_table[_count++]; + f->loc = loc; + f->cfa = cfa; + f->fp_off = fp_off; + return f; } diff --git a/src/async-profiler/mutex.cpp b/src/async-profiler/mutex.cpp index b15f23b2f..8f58cc091 100644 --- a/src/async-profiler/mutex.cpp +++ b/src/async-profiler/mutex.cpp @@ -16,31 +16,23 @@ #include "mutex.h" - Mutex::Mutex() { - pthread_mutexattr_t attr; - pthread_mutexattr_init(&attr); - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - pthread_mutex_init(&_mutex, &attr); + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&_mutex, &attr); } -void Mutex::lock() { - pthread_mutex_lock(&_mutex); -} +void Mutex::lock() { pthread_mutex_lock(&_mutex); } -void Mutex::unlock() { - pthread_mutex_unlock(&_mutex); -} +void Mutex::unlock() { pthread_mutex_unlock(&_mutex); } -WaitableMutex::WaitableMutex() : Mutex() { - pthread_cond_init(&_cond, NULL); -} +WaitableMutex::WaitableMutex() : Mutex() { pthread_cond_init(&_cond, NULL); } bool WaitableMutex::waitUntil(u64 wall_time) { - struct timespec ts = {(time_t)(wall_time / 1000000), (long)(wall_time % 1000000) * 1000}; - return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0; + struct timespec ts = {(time_t)(wall_time / 1000000), + (long)(wall_time % 1000000) * 1000}; + return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0; } -void WaitableMutex::notify() { - pthread_cond_signal(&_cond); -} +void WaitableMutex::notify() { pthread_cond_signal(&_cond); } diff --git a/src/async-profiler/safeAccess.cpp b/src/async-profiler/safeAccess.cpp index d0858aa1f..e5ce221c0 100644 --- a/src/async-profiler/safeAccess.cpp +++ b/src/async-profiler/safeAccess.cpp @@ -11,68 +11,69 @@ static struct sigaction oldact; namespace SafeAccess { -NOINLINE __attribute__((aligned(16))) void* load(void** ptr) { - return *ptr; -} +NOINLINE __attribute__((aligned(16))) void *load(void **ptr) { return *ptr; } // skipFaultInstruction returns the address of the instruction immediately // following the given instruction. pc is assumed to point to the same kind of // load that SafeAccess::load would use static uintptr_t skipFaultInstruction(uintptr_t pc) { #if defined(__x86_64__) - return *(u16*)pc == 0x8b48 ? 3 : 0; // mov rax, [reg] + return *(u16 *)pc == 0x8b48 ? 3 : 0; // mov rax, [reg] #elif defined(__i386__) - return *(u8*)pc == 0x8b ? 2 : 0; // mov eax, [reg] + return *(u8 *)pc == 0x8b ? 2 : 0; // mov eax, [reg] #elif defined(__arm__) || defined(__thumb__) - return (*(instruction_t*)pc & 0x0e50f000) == 0x04100000 ? 4 : 0; // ldr r0, [reg] + return (*(instruction_t *)pc & 0x0e50f000) == 0x04100000 ? 4 + : 0; // ldr r0, [reg] #elif defined(__aarch64__) - return (*(instruction_t*)pc & 0xffc0001f) == 0xf9400000 ? 4 : 0; // ldr x0, [reg] + return (*(instruction_t *)pc & 0xffc0001f) == 0xf9400000 ? 4 + : 0; // ldr x0, [reg] #else - return sizeof(instruction_t); + return sizeof(instruction_t); #endif } -} +} // namespace SafeAccess static void segv_handler(int sig, siginfo_t *si, void *ucontext) { - ucontext_t *uc = (ucontext_t *)ucontext; - StackFrame frame(uc); - - // If we segfault in the SafeAccess::load, skip past the bad access and - // set the return value to 0. - // - // We have to check if we are *near* the beginning of load, since there will - // be a few instructions (for frame pointer setup) before the actual bad - // access - if ((frame.pc() - (uintptr_t) SafeAccess::load) < 16) { - uintptr_t instructionEncodedLength = SafeAccess::skipFaultInstruction(frame.pc()); - frame.pc() += instructionEncodedLength; - frame.retval() = 0x0; - return; - } + ucontext_t *uc = (ucontext_t *)ucontext; + StackFrame frame(uc); - // fall back otherwise - if (oldact.sa_sigaction != nullptr) { - oldact.sa_sigaction(sig, si, ucontext); - } else if (oldact.sa_handler != nullptr) { - oldact.sa_handler(sig); - } else { - // If there wasn't a fallback, re-set to the default handler - // (which just aborts the program) and re-raise the signal - struct sigaction sa; - memset(&sa, 0, sizeof(struct sigaction)); - sa.sa_handler = SIG_DFL; - sigaction(sig, &sa, nullptr); - raise(sig); - } -} + // If we segfault in the SafeAccess::load, skip past the bad access and + // set the return value to 0. + // + // We have to check if we are *near* the beginning of load, since there will + // be a few instructions (for frame pointer setup) before the actual bad + // access + if ((frame.pc() - (uintptr_t)SafeAccess::load) < 16) { + uintptr_t instructionEncodedLength = + SafeAccess::skipFaultInstruction(frame.pc()); + frame.pc() += instructionEncodedLength; + frame.retval() = 0x0; + return; + } -__attribute__ ((constructor)) static void init(void) { + // fall back otherwise + if (oldact.sa_sigaction != nullptr) { + oldact.sa_sigaction(sig, si, ucontext); + } else if (oldact.sa_handler != nullptr) { + oldact.sa_handler(sig); + } else { + // If there wasn't a fallback, re-set to the default handler + // (which just aborts the program) and re-raise the signal struct sigaction sa; - memset(&oldact, 0, sizeof(struct sigaction)); memset(&sa, 0, sizeof(struct sigaction)); - sa.sa_sigaction = segv_handler; - sa.sa_flags = SA_SIGINFO; + sa.sa_handler = SIG_DFL; + sigaction(sig, &sa, nullptr); + raise(sig); + } +} + +__attribute__((constructor)) static void init(void) { + struct sigaction sa; + memset(&oldact, 0, sizeof(struct sigaction)); + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &sa, &oldact); + sigaction(SIGSEGV, &sa, &oldact); } \ No newline at end of file diff --git a/src/async-profiler/stackFrame_aarch64.cpp b/src/async-profiler/stackFrame_aarch64.cpp index 1054691be..9b7cc6c71 100644 --- a/src/async-profiler/stackFrame_aarch64.cpp +++ b/src/async-profiler/stackFrame_aarch64.cpp @@ -16,119 +16,97 @@ #ifdef __aarch64__ -#include -#include -#include -#include "stackFrame.h" +# include "stackFrame.h" +# include +# include +# include +# ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +# else +# define REG(l, m) _ucontext->uc_mcontext.l +# endif -#ifdef __APPLE__ -# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m -#else -# define REG(l, m) _ucontext->uc_mcontext.l -#endif +uintptr_t &StackFrame::pc() { return (uintptr_t &)REG(pc, pc); } +uintptr_t &StackFrame::sp() { return (uintptr_t &)REG(sp, sp); } -uintptr_t& StackFrame::pc() { - return (uintptr_t&)REG(pc, pc); -} - -uintptr_t& StackFrame::sp() { - return (uintptr_t&)REG(sp, sp); -} - -uintptr_t& StackFrame::fp() { - return (uintptr_t&)REG(regs[29], fp); -} +uintptr_t &StackFrame::fp() { return (uintptr_t &)REG(regs[29], fp); } -uintptr_t& StackFrame::retval() { - return (uintptr_t&)REG(regs[0], x[0]); -} +uintptr_t &StackFrame::retval() { return (uintptr_t &)REG(regs[0], x[0]); } -uintptr_t StackFrame::arg0() { - return (uintptr_t)REG(regs[0], x[0]); -} +uintptr_t StackFrame::arg0() { return (uintptr_t)REG(regs[0], x[0]); } -uintptr_t StackFrame::arg1() { - return (uintptr_t)REG(regs[1], x[1]); -} - -uintptr_t StackFrame::arg2() { - return (uintptr_t)REG(regs[2], x[2]); -} +uintptr_t StackFrame::arg1() { return (uintptr_t)REG(regs[1], x[1]); } -uintptr_t StackFrame::arg3() { - return (uintptr_t)REG(regs[3], x[3]); -} +uintptr_t StackFrame::arg2() { return (uintptr_t)REG(regs[2], x[2]); } -void StackFrame::ret() { - pc() = REG(regs[30], lr); -} +uintptr_t StackFrame::arg3() { return (uintptr_t)REG(regs[3], x[3]); } +void StackFrame::ret() { pc() = REG(regs[30], lr); } -bool StackFrame::popStub(instruction_t* entry, const char* name) { - instruction_t* ip = (instruction_t*)pc(); - if (ip == entry || *ip == 0xd65f03c0 - || strncmp(name, "itable", 6) == 0 - || strncmp(name, "vtable", 6) == 0 - || strncmp(name, "compare_long_string_", 20) == 0 - || strcmp(name, "zero_blocks") == 0 - || strcmp(name, "forward_copy_longs") == 0 - || strcmp(name, "backward_copy_longs") == 0 - || strcmp(name, "InlineCacheBuffer") == 0) - { - ret(); - return true; - } else if (entry != NULL && entry[0] == 0xa9bf7bfd) { - // The stub begins with - // stp x29, x30, [sp, #-16]! - // mov x29, sp - if (ip == entry + 1) { - sp() += 16; - ret(); - return true; - } else if (entry[1] == 0x910003fd && withinCurrentStack(fp())) { - sp() = fp() + 16; - fp() = stackAt(-2); - pc() = stackAt(-1); - return true; - } +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xd65f03c0 || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strncmp(name, "compare_long_string_", 20) == 0 || + strcmp(name, "zero_blocks") == 0 || + strcmp(name, "forward_copy_longs") == 0 || + strcmp(name, "backward_copy_longs") == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + ret(); + return true; + } else if (entry != NULL && entry[0] == 0xa9bf7bfd) { + // The stub begins with + // stp x29, x30, [sp, #-16]! + // mov x29, sp + if (ip == entry + 1) { + sp() += 16; + ret(); + return true; + } else if (entry[1] == 0x910003fd && withinCurrentStack(fp())) { + sp() = fp() + 16; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; } - return false; + } + return false; } -bool StackFrame::popMethod(instruction_t* entry) { - instruction_t* ip = (instruction_t*)pc(); - if ((*ip & 0xffe07fff) == 0xa9007bfd) { - // stp x29, x30, [sp, #offset] - // SP has been adjusted, but FP not yet stored in a new frame - unsigned int offset = (*ip >> 12) & 0x1f8; - sp() += offset + 16; - } - ret(); - return true; +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if ((*ip & 0xffe07fff) == 0xa9007bfd) { + // stp x29, x30, [sp, #offset] + // SP has been adjusted, but FP not yet stored in a new frame + unsigned int offset = (*ip >> 12) & 0x1f8; + sp() += offset + 16; + } + ret(); + return true; } bool StackFrame::checkInterruptedSyscall() { -#ifdef __APPLE__ - // We are not interested in syscalls that do not check error code, e.g. semaphore_wait_trap - if (*(instruction_t*)pc() == 0xd65f03c0) { - return true; - } - // If carry flag is set, the error code is in low byte of x0 - if (REG(pstate, cpsr) & (1 << 29)) { - return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; - } else { - return retval() == (uintptr_t)-EINTR; - } -#else +# ifdef __APPLE__ + // We are not interested in syscalls that do not check error code, e.g. + // semaphore_wait_trap + if (*(instruction_t *)pc() == 0xd65f03c0) { + return true; + } + // If carry flag is set, the error code is in low byte of x0 + if (REG(pstate, cpsr) & (1 << 29)) { + return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; + } else { return retval() == (uintptr_t)-EINTR; -#endif + } +# else + return retval() == (uintptr_t)-EINTR; +# endif } -bool StackFrame::isSyscall(instruction_t* pc) { - // svc #0 or svc #80 - return (*pc & 0xffffefff) == 0xd4000001; +bool StackFrame::isSyscall(instruction_t *pc) { + // svc #0 or svc #80 + return (*pc & 0xffffefff) == 0xd4000001; } #endif // __aarch64__ diff --git a/src/async-profiler/stackFrame_arm.cpp b/src/async-profiler/stackFrame_arm.cpp index 1012b9c96..0f1b46b1f 100644 --- a/src/async-profiler/stackFrame_arm.cpp +++ b/src/async-profiler/stackFrame_arm.cpp @@ -16,89 +16,86 @@ #if defined(__arm__) || defined(__thumb__) -#include -#include -#include "stackFrame.h" +# include "stackFrame.h" +# include +# include - -uintptr_t& StackFrame::pc() { - return (uintptr_t&)_ucontext->uc_mcontext.arm_pc; +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_pc; } -uintptr_t& StackFrame::sp() { - return (uintptr_t&)_ucontext->uc_mcontext.arm_sp; +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_sp; } -uintptr_t& StackFrame::fp() { - return (uintptr_t&)_ucontext->uc_mcontext.arm_fp; +uintptr_t &StackFrame::fp() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_fp; } -uintptr_t& StackFrame::retval() { - return (uintptr_t&)_ucontext->uc_mcontext.arm_r0; +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_r0; } uintptr_t StackFrame::arg0() { - return (uintptr_t)_ucontext->uc_mcontext.arm_r0; + return (uintptr_t)_ucontext->uc_mcontext.arm_r0; } uintptr_t StackFrame::arg1() { - return (uintptr_t)_ucontext->uc_mcontext.arm_r1; + return (uintptr_t)_ucontext->uc_mcontext.arm_r1; } uintptr_t StackFrame::arg2() { - return (uintptr_t)_ucontext->uc_mcontext.arm_r2; + return (uintptr_t)_ucontext->uc_mcontext.arm_r2; } uintptr_t StackFrame::arg3() { - return (uintptr_t)_ucontext->uc_mcontext.arm_r3; + return (uintptr_t)_ucontext->uc_mcontext.arm_r3; } void StackFrame::ret() { - _ucontext->uc_mcontext.arm_pc = _ucontext->uc_mcontext.arm_lr; + _ucontext->uc_mcontext.arm_pc = _ucontext->uc_mcontext.arm_lr; } -bool StackFrame::popStub(instruction_t* entry, const char* name) { - instruction_t* ip = (instruction_t*)pc(); - if (ip == entry || *ip == 0xe12fff1e - || strncmp(name, "itable", 6) == 0 - || strncmp(name, "vtable", 6) == 0 - || strcmp(name, "InlineCacheBuffer") == 0) - { - ret(); - return true; - } - return false; +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xe12fff1e || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + ret(); + return true; + } + return false; } -bool StackFrame::popMethod(instruction_t* entry) { - instruction_t* ip = (instruction_t*)pc(); - if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) { - // push {r11, lr} - // mov r11, sp (optional) - // -> sub sp, sp, #offs - fp() = stackAt(0); - pc() = stackAt(1); - sp() += 8; - return true; - } else if (*ip == 0xe8bd4800) { - // add sp, sp, #offs - // -> pop {r11, lr} - fp() = stackAt(0); - pc() = stackAt(1); - sp() += 8; - return true; - } - ret(); +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) { + // push {r11, lr} + // mov r11, sp (optional) + // -> sub sp, sp, #offs + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } else if (*ip == 0xe8bd4800) { + // add sp, sp, #offs + // -> pop {r11, lr} + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; return true; + } + ret(); + return true; } bool StackFrame::checkInterruptedSyscall() { - return retval() == (uintptr_t)-EINTR; + return retval() == (uintptr_t)-EINTR; } -bool StackFrame::isSyscall(instruction_t* pc) { - // swi #0 - return *pc == 0xef000000; +bool StackFrame::isSyscall(instruction_t *pc) { + // swi #0 + return *pc == 0xef000000; } #endif // defined(__arm__) || defined(__thumb__) diff --git a/src/async-profiler/stackFrame_i386.cpp b/src/async-profiler/stackFrame_i386.cpp index 8c8a3dc48..a08e5f11f 100644 --- a/src/async-profiler/stackFrame_i386.cpp +++ b/src/async-profiler/stackFrame_i386.cpp @@ -16,101 +16,91 @@ #ifdef __i386__ -#include -#include -#include "stackFrame.h" +# include "stackFrame.h" +# include +# include - -uintptr_t& StackFrame::pc() { - return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EIP]; +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EIP]; } -uintptr_t& StackFrame::sp() { - return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_ESP]; +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_ESP]; } -uintptr_t& StackFrame::fp() { - return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EBP]; +uintptr_t &StackFrame::fp() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EBP]; } -uintptr_t& StackFrame::retval() { - return (uintptr_t&)_ucontext->uc_mcontext.gregs[REG_EAX]; +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EAX]; } -uintptr_t StackFrame::arg0() { - return stackAt(1); -} +uintptr_t StackFrame::arg0() { return stackAt(1); } -uintptr_t StackFrame::arg1() { - return stackAt(2); -} +uintptr_t StackFrame::arg1() { return stackAt(2); } -uintptr_t StackFrame::arg2() { - return stackAt(3); -} +uintptr_t StackFrame::arg2() { return stackAt(3); } -uintptr_t StackFrame::arg3() { - return stackAt(4); -} +uintptr_t StackFrame::arg3() { return stackAt(4); } void StackFrame::ret() { - pc() = stackAt(0); - sp() += 4; + pc() = stackAt(0); + sp() += 4; } -bool StackFrame::popStub(instruction_t* entry, const char* name) { - instruction_t* ip = (instruction_t*)pc(); - if (ip == entry || *ip == 0xc3 - || strncmp(name, "itable", 6) == 0 - || strncmp(name, "vtable", 6) == 0 - || strcmp(name, "InlineCacheBuffer") == 0) - { - pc() = stackAt(0); - sp() += 4; - return true; - } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && entry[2] == 0xec) { - // The stub begins with - // push ebp - // mov ebp, esp - if (ip == entry + 1) { - pc() = stackAt(1); - sp() += 8; - return true; - } else if (withinCurrentStack(fp())) { - sp() = fp() + 8; - fp() = stackAt(-2); - pc() = stackAt(-1); - return true; - } +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xc3 || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && + entry[2] == 0xec) { + // The stub begins with + // push ebp + // mov ebp, esp + if (ip == entry + 1) { + pc() = stackAt(1); + sp() += 8; + return true; + } else if (withinCurrentStack(fp())) { + sp() = fp() + 8; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; } - return false; + } + return false; } -bool StackFrame::popMethod(instruction_t* entry) { - instruction_t* ip = (instruction_t*)pc(); - if (ip <= entry || *ip == 0xc3 || *ip == 0x55 // ret or push ebp - || (((uintptr_t)ip & 0xfff) && ip[-1] == 0x5d)) // after pop ebp - { - pc() = stackAt(0); - sp() += 4; - return true; - } else if (*ip == 0x5d) { - // pop ebp - fp() = stackAt(0); - pc() = stackAt(1); - sp() += 8; - return true; - } - return false; +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if (ip <= entry || *ip == 0xc3 || *ip == 0x55 // ret or push ebp + || (((uintptr_t)ip & 0xfff) && ip[-1] == 0x5d)) // after pop ebp + { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (*ip == 0x5d) { + // pop ebp + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } + return false; } bool StackFrame::checkInterruptedSyscall() { - return retval() == (uintptr_t)-EINTR; + return retval() == (uintptr_t)-EINTR; } -bool StackFrame::isSyscall(instruction_t* pc) { - // int 0x80 - return pc[0] == 0xcd && pc[1] == 0x80; +bool StackFrame::isSyscall(instruction_t *pc) { + // int 0x80 + return pc[0] == 0xcd && pc[1] == 0x80; } #endif // __i386__ diff --git a/src/async-profiler/stackFrame_ppc64.cpp b/src/async-profiler/stackFrame_ppc64.cpp index 2929767ca..465e0a0a8 100644 --- a/src/async-profiler/stackFrame_ppc64.cpp +++ b/src/async-profiler/stackFrame_ppc64.cpp @@ -18,116 +18,119 @@ #if defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#include -#include -#include "stackFrame.h" +# include "stackFrame.h" +# include +# include - -uintptr_t& StackFrame::pc() { - return (uintptr_t&)_ucontext->uc_mcontext.regs->nip; +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->nip; } -uintptr_t& StackFrame::sp() { - return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[1]; +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->gpr[1]; } -uintptr_t& StackFrame::fp() { - return *((uintptr_t*)_ucontext->uc_mcontext.regs->gpr[1]); +uintptr_t &StackFrame::fp() { + return *((uintptr_t *)_ucontext->uc_mcontext.regs->gpr[1]); } -uintptr_t& StackFrame::retval() { - return (uintptr_t&)_ucontext->uc_mcontext.regs->gpr[3]; +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->gpr[3]; } uintptr_t StackFrame::arg0() { - return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3]; + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3]; } uintptr_t StackFrame::arg1() { - return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4]; + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4]; } uintptr_t StackFrame::arg2() { - return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5]; + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5]; } uintptr_t StackFrame::arg3() { - return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6]; + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6]; } void StackFrame::ret() { - _ucontext->uc_mcontext.regs->nip = _ucontext->uc_mcontext.regs->link; + _ucontext->uc_mcontext.regs->nip = _ucontext->uc_mcontext.regs->link; } static inline bool inC1EpilogueCrit(uintptr_t pc) { - if (!(pc & 0xfff)) { - // Make sure we are not at the page boundary, so that reading [pc - 1] is safe - return false; - } - // C1 epilogue and critical section (posX) - // 3821**** add r1,r1,xx - // pos3 xxxxxxxx - // pos2 1000e1eb ld r31,16(r1) - // pos1 a603e87f mtlr r31 - // xxxxxxxx - // 2000804e blr - instruction_t* inst = (instruction_t*)pc; - if (inst[ 1] == 0xebe10010 && inst[2] == 0x7fe803a6 || - inst[ 0] == 0xebe10010 && inst[1] == 0x7fe803a6 || - inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) { - return true; - } - - return false; // not in critical section + if (!(pc & 0xfff)) { + // Make sure we are not at the page boundary, so that reading [pc - 1] is + // safe + return false; + } + // C1 epilogue and critical section (posX) + // 3821**** add r1,r1,xx + // pos3 xxxxxxxx + // pos2 1000e1eb ld r31,16(r1) + // pos1 a603e87f mtlr r31 + // xxxxxxxx + // 2000804e blr + instruction_t *inst = (instruction_t *)pc; + if (inst[1] == 0xebe10010 && inst[2] == 0x7fe803a6 || + inst[0] == 0xebe10010 && inst[1] == 0x7fe803a6 || + inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) { + return true; + } + + return false; // not in critical section } static inline bool inC2PrologueCrit(uintptr_t pc) { - // C2 prologue and critical section - // f821**** stdu r1, (xx)r1 - // pos1 fa950010 std r20,16(r21) - instruction_t* inst = (instruction_t*)pc; - if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) { - return true; - } - - return false; // not in critical section -} + // C2 prologue and critical section + // f821**** stdu r1, (xx)r1 + // pos1 fa950010 std r20,16(r21) + instruction_t *inst = (instruction_t *)pc; + if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) { + return true; + } + return false; // not in critical section +} -bool StackFrame::popStub(instruction_t* entry, const char* name) { - pc() = _ucontext->uc_mcontext.regs->link; - return true; +bool StackFrame::popStub(instruction_t *entry, const char *name) { + pc() = _ucontext->uc_mcontext.regs->link; + return true; } -bool StackFrame::popMethod(instruction_t* entry) { - // On PPC there is a valid back link to the previous frame at all times. The callee stores - // the return address in the caller's frame before it constructs its own frame. After it - // has destroyed its frame it restores the link register and returns. A problematic sequence - // is the prologue/epilogue of a compiled method before/after frame construction/destruction. - // Therefore popping the frame would not help here, as it is not yet/anymore present, rather - // more adjusting the pc to the callers pc does the trick. There are two exceptions to this, - // One in the prologue of C2 compiled methods and one in the epilogue of C1 compiled methods. - if (inC1EpilogueCrit(pc())) { - // lr not yet set: use the value stored in the frame - pc() = stackAt(2); - } else if (inC2PrologueCrit(pc())) { - // frame constructed but lr not yet stored in it: just do it here - *(((unsigned long *) _ucontext->uc_mcontext.regs->gpr[21]) + 2) = (unsigned long) _ucontext->uc_mcontext.regs->gpr[20]; - } else { - // most probably caller's framer is still on top but pc is already in callee: use caller's pc - pc() = _ucontext->uc_mcontext.regs->link; - } +bool StackFrame::popMethod(instruction_t *entry) { + // On PPC there is a valid back link to the previous frame at all times. The + // callee stores the return address in the caller's frame before it constructs + // its own frame. After it has destroyed its frame it restores the link + // register and returns. A problematic sequence is the prologue/epilogue of a + // compiled method before/after frame construction/destruction. Therefore + // popping the frame would not help here, as it is not yet/anymore present, + // rather more adjusting the pc to the callers pc does the trick. There are + // two exceptions to this, One in the prologue of C2 compiled methods and one + // in the epilogue of C1 compiled methods. + if (inC1EpilogueCrit(pc())) { + // lr not yet set: use the value stored in the frame + pc() = stackAt(2); + } else if (inC2PrologueCrit(pc())) { + // frame constructed but lr not yet stored in it: just do it here + *(((unsigned long *)_ucontext->uc_mcontext.regs->gpr[21]) + 2) = + (unsigned long)_ucontext->uc_mcontext.regs->gpr[20]; + } else { + // most probably caller's framer is still on top but pc is already in + // callee: use caller's pc + pc() = _ucontext->uc_mcontext.regs->link; + } - return true; + return true; } bool StackFrame::checkInterruptedSyscall() { - return retval() == (uintptr_t)-EINTR; + return retval() == (uintptr_t)-EINTR; } -bool StackFrame::isSyscall(instruction_t* pc) { - // sc/svc - return (*pc & 0x1f) == 17; +bool StackFrame::isSyscall(instruction_t *pc) { + // sc/svc + return (*pc & 0x1f) == 17; } #endif // defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) diff --git a/src/async-profiler/stackFrame_x64.cpp b/src/async-profiler/stackFrame_x64.cpp index 4816a6f68..a9c08d32d 100644 --- a/src/async-profiler/stackFrame_x64.cpp +++ b/src/async-profiler/stackFrame_x64.cpp @@ -19,54 +19,36 @@ #ifdef __x86_64__ -#include -#include -#include -#include "stackFrame.h" +# include "stackFrame.h" +# include +# include +# include +# ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +# else +# define REG(l, m) _ucontext->uc_mcontext.gregs[REG_##l] +# endif -#ifdef __APPLE__ -# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m -#else -# define REG(l, m) _ucontext->uc_mcontext.gregs[REG_##l] -#endif +uintptr_t &StackFrame::pc() { return (uintptr_t &)REG(RIP, rip); } +uintptr_t &StackFrame::sp() { return (uintptr_t &)REG(RSP, rsp); } -uintptr_t& StackFrame::pc() { - return (uintptr_t&)REG(RIP, rip); -} - -uintptr_t& StackFrame::sp() { - return (uintptr_t&)REG(RSP, rsp); -} - -uintptr_t& StackFrame::fp() { - return (uintptr_t&)REG(RBP, rbp); -} +uintptr_t &StackFrame::fp() { return (uintptr_t &)REG(RBP, rbp); } -uintptr_t& StackFrame::retval() { - return (uintptr_t&)REG(RAX, rax); -} +uintptr_t &StackFrame::retval() { return (uintptr_t &)REG(RAX, rax); } -uintptr_t StackFrame::arg0() { - return (uintptr_t)REG(RDI, rdi); -} +uintptr_t StackFrame::arg0() { return (uintptr_t)REG(RDI, rdi); } -uintptr_t StackFrame::arg1() { - return (uintptr_t)REG(RSI, rsi); -} +uintptr_t StackFrame::arg1() { return (uintptr_t)REG(RSI, rsi); } -uintptr_t StackFrame::arg2() { - return (uintptr_t)REG(RDX, rdx); -} +uintptr_t StackFrame::arg2() { return (uintptr_t)REG(RDX, rdx); } -uintptr_t StackFrame::arg3() { - return (uintptr_t)REG(RCX, rcx); -} +uintptr_t StackFrame::arg3() { return (uintptr_t)REG(RCX, rcx); } void StackFrame::ret() { - pc() = stackAt(0); - sp() += 8; + pc() = stackAt(0); + sp() += 8; } #endif // __x86_64__ diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 5127ab4f7..d7dc31530 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -16,12 +16,12 @@ * * Modified by Nick Ripley to extract components needed for call stack unwinding */ -#include "codeCache.h" #include "stackWalker.h" +#include "codeCache.h" #include "dwarf.h" +#include "logger.hpp" #include "safeAccess.h" #include "stackFrame.h" -#include "logger.hpp" #include const intptr_t MIN_VALID_PC = 0x1000; @@ -43,8 +43,8 @@ bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { if (addr < buffer.sp_start && addr > buffer.sp_start - 4096) { // todo red zone thing return false; - } - else if (addr < buffer.sp_start || addr + sizeof(uint64_t) > buffer.sp_end) { + } else if (addr < buffer.sp_start || + addr + sizeof(uint64_t) > buffer.sp_end) { return false; } uint64_t stack_idx = addr - buffer.sp_start; @@ -55,111 +55,111 @@ bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { return true; } -CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void* address) { - const int native_lib_count = cache->count(); - for (int i = 0; i < native_lib_count; i++) { - if (cache->operator[](i)->contains(address)) { - return cache->operator[](i); - } +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address) { + const int native_lib_count = cache->count(); + for (int i = 0; i < native_lib_count; i++) { + if (cache->operator[](i)->contains(address)) { + return cache->operator[](i); } - return NULL; + } + return NULL; } -bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, FrameDesc *f); +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + FrameDesc *f); -bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, CodeCacheArray *cache) { - FrameDesc* f; - CodeCache* cc = findLibraryByAddress(cache, sc.pc); - if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { - f = &FrameDesc::default_frame; - } - return stepStackContext(sc, buffer, f); +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + CodeCacheArray *cache) { + FrameDesc *f; + CodeCache *cc = findLibraryByAddress(cache, sc.pc); + if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { + f = &FrameDesc::default_frame; + } + return stepStackContext(sc, buffer, f); } +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + FrameDesc *f) { + uintptr_t bottom = sc.sp + MAX_WALK_SIZE; + uintptr_t prev_sp = sc.sp; + + u8 cfa_reg = (u8)f->cfa; + int cfa_off = f->cfa >> 8; + if (cfa_reg == DW_REG_SP) { + sc.sp = sc.sp + cfa_off; + } else if (cfa_reg == DW_REG_FP) { + sc.sp = sc.fp + cfa_off; + } else if (cfa_reg == DW_REG_PLT) { + sc.sp += ((uintptr_t)sc.pc & 15) >= 11 ? cfa_off * 2 : cfa_off; + } else { + return false; + } -bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, FrameDesc *f) { - uintptr_t bottom = sc.sp + MAX_WALK_SIZE; - uintptr_t prev_sp = sc.sp; - - u8 cfa_reg = (u8)f->cfa; - int cfa_off = f->cfa >> 8; - if (cfa_reg == DW_REG_SP) { - sc.sp = sc.sp + cfa_off; - } else if (cfa_reg == DW_REG_FP) { - sc.sp = sc.fp + cfa_off; - } else if (cfa_reg == DW_REG_PLT) { - sc.sp += ((uintptr_t)sc.pc & 15) >= 11 ? cfa_off * 2 : cfa_off; - } else { - return false; - } + // Check if the next frame is below on the current stack + if (sc.sp < prev_sp || sc.sp >= prev_sp + MAX_FRAME_SIZE || sc.sp >= bottom) { + return false; + } - // Check if the next frame is below on the current stack - if (sc.sp < prev_sp || sc.sp >= prev_sp + MAX_FRAME_SIZE || sc.sp >= bottom) { - return false; - } + // Stack pointer must be word aligned + if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { + return false; + } - // Stack pointer must be word aligned - if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { + if (f->fp_off & DW_PC_OFFSET) { + sc.pc = (const char *)sc.pc + (f->fp_off >> 1); + } else { + if (f->fp_off != DW_SAME_FP && f->fp_off < MAX_FRAME_SIZE && + f->fp_off > -MAX_FRAME_SIZE) { + // Update the frame pointer (based on fp offset) + if (!read_memory(sc.sp + f->fp_off, reinterpret_cast(&sc.fp), + buffer)) { return false; + } } - - if (f->fp_off & DW_PC_OFFSET) { - sc.pc = (const char*)sc.pc + (f->fp_off >> 1); - } else { - if (f->fp_off != DW_SAME_FP && f->fp_off < MAX_FRAME_SIZE && f->fp_off > -MAX_FRAME_SIZE) { - void* new_fp = SafeAccess::load((void**)(sc.sp + f->fp_off)); - printf("Update FP to value (old code): %p\n", new_fp); - // Update the frame pointer (based on fp offset) - if (!read_memory(sc.sp + f->fp_off, reinterpret_cast(&sc.fp), buffer)) { - printf("Failure __%u \n", __LINE__); - return false; - } - printf("Update FP to value (read mem): %p\n", sc.fp); - } - void* new_pc = stripPointer(SafeAccess::load((void**)sc.sp - 1)); - // Update the pc using return address - printf("Update new instruction pointer to value (old code): %p\n", new_pc); - - if (!read_memory(reinterpret_cast((void**)sc.sp - 1), reinterpret_cast(&sc.pc), buffer)) { - printf("Failure __%u \n", __LINE__); - return false; - } - printf("Update new instruction pointer to value (new code): %p\n", sc.pc); + void *new_pc = stripPointer(SafeAccess::load((void **)sc.sp - 1)); + // Update the pc using return address + if (!read_memory(reinterpret_cast((void **)sc.sp - 1), + reinterpret_cast(&sc.pc), buffer)) { + return false; } + } - if (sc.pc < (const void*)MIN_VALID_PC || sc.pc > (const void*)-MIN_VALID_PC) { - return false; - } - return true; + if (sc.pc < (const void *)MIN_VALID_PC || + sc.pc > (const void *)-MIN_VALID_PC) { + return false; + } + return true; } void populateStackContext(ap::StackContext &sc, void *ucontext) { - if (ucontext == NULL) { - sc.pc = __builtin_return_address(0); - sc.fp = (uintptr_t)__builtin_frame_address(1); // XXX(nick): this isn't safe.... - sc.sp = (uintptr_t)__builtin_frame_address(0); - } else { - StackFrame frame(ucontext); - sc.pc = (const void*)frame.pc(); - sc.fp = frame.fp(); - sc.sp = frame.sp(); - } + if (ucontext == NULL) { + sc.pc = __builtin_return_address(0); + sc.fp = + (uintptr_t)__builtin_frame_address(1); // XXX(nick): this isn't safe.... + sc.sp = (uintptr_t)__builtin_frame_address(0); + } else { + StackFrame frame(ucontext); + sc.pc = (const void *)frame.pc(); + sc.fp = frame.fp(); + sc.sp = frame.sp(); + } } -int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const ap::StackBuffer &buffer, - const void** callchain, int max_depth, int skip) { - int depth = -skip; - - // Walk until the bottom of the stack or until the first Java frame - while (depth < max_depth) { - int d = depth++; - if (d >= 0) { - callchain[d] = sc.pc; - } - if (!stepStackContext(sc, buffer, cache)) { - break; - } +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, + const ap::StackBuffer &buffer, const void **callchain, + int max_depth, int skip) { + int depth = -skip; + + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + int d = depth++; + if (d >= 0) { + callchain[d] = sc.pc; + } + if (!stepStackContext(sc, buffer, cache)) { + break; } + } - return depth; + return depth; } diff --git a/src/async-profiler/stack_context.cpp b/src/async-profiler/stack_context.cpp new file mode 100644 index 000000000..dc06b1ebc --- /dev/null +++ b/src/async-profiler/stack_context.cpp @@ -0,0 +1,17 @@ +#include "async-profiler/stack_context.h" + +#define CAST_TO_VOID_STAR(ptr) reinterpret_cast(ptr) + +namespace ap { + +// Async profiler's unwinding only uses a subset of the registers +StackContext from_regs(const std::span regs) { + // context from saving state + ap::StackContext sc; + sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); + sc.sp = regs[REGNAME(SP)]; + sc.fp = regs[REGNAME(RBP)]; + return sc; +} + +} // namespace ap \ No newline at end of file diff --git a/src/async-profiler/symbols_darwin.cpp b/src/async-profiler/symbols_darwin.cpp index 32b2d0b76..a7211261a 100644 --- a/src/async-profiler/symbols_darwin.cpp +++ b/src/async-profiler/symbols_darwin.cpp @@ -19,140 +19,142 @@ #ifdef __APPLE__ -#include -#include -#include -#include -#include -#include -#include "symbols.h" - +# include "symbols.h" +# include +# include +# include +# include +# include +# include class MachOParser { - private: - CodeCache* _cc; - const mach_header* _image_base; - - static const char* add(const void* base, uint64_t offset) { - return (const char*)base + offset; +private: + CodeCache *_cc; + const mach_header *_image_base; + + static const char *add(const void *base, uint64_t offset) { + return (const char *)base + offset; + } + + void findGlobalOffsetTable(const segment_command_64 *sc) { + const section_64 *section = + (const section_64 *)add(sc, sizeof(segment_command_64)); + for (uint32_t i = 0; i < sc->nsects; i++) { + if (strcmp(section->sectname, "__la_symbol_ptr") == 0) { + const char *got_start = add(_image_base, section->addr); + _cc->setGlobalOffsetTable((void **)got_start, + (void **)(got_start + section->size), true); + break; + } + section++; } - - void findGlobalOffsetTable(const segment_command_64* sc) { - const section_64* section = (const section_64*)add(sc, sizeof(segment_command_64)); - for (uint32_t i = 0; i < sc->nsects; i++) { - if (strcmp(section->sectname, "__la_symbol_ptr") == 0) { - const char* got_start = add(_image_base, section->addr); - _cc->setGlobalOffsetTable((void**)got_start, (void**)(got_start + section->size), true); - break; - } - section++; - } + } + + void loadSymbols(const symtab_command *symtab, const char *text_base, + const char *link_base) { + const nlist_64 *sym = (const nlist_64 *)add(link_base, symtab->symoff); + const char *str_table = add(link_base, symtab->stroff); + + for (uint32_t i = 0; i < symtab->nsyms; i++) { + if ((sym->n_type & 0xee) == 0x0e && sym->n_value != 0) { + const char *addr = text_base + sym->n_value; + const char *name = str_table + sym->n_un.n_strx; + if (name[0] == '_') + name++; + _cc->add(addr, 0, name); + } + sym++; } + } - void loadSymbols(const symtab_command* symtab, const char* text_base, const char* link_base) { - const nlist_64* sym = (const nlist_64*)add(link_base, symtab->symoff); - const char* str_table = add(link_base, symtab->stroff); - - for (uint32_t i = 0; i < symtab->nsyms; i++) { - if ((sym->n_type & 0xee) == 0x0e && sym->n_value != 0) { - const char* addr = text_base + sym->n_value; - const char* name = str_table + sym->n_un.n_strx; - if (name[0] == '_') name++; - _cc->add(addr, 0, name); - } - sym++; - } - } +public: + MachOParser(CodeCache *cc, const mach_header *image_base) + : _cc(cc), _image_base(image_base) {} - public: - MachOParser(CodeCache* cc, const mach_header* image_base) : _cc(cc), _image_base(image_base) { + bool parse() { + if (_image_base->magic != MH_MAGIC_64) { + return false; } - bool parse() { - if (_image_base->magic != MH_MAGIC_64) { - return false; + const mach_header_64 *header = (const mach_header_64 *)_image_base; + const load_command *lc = (const load_command *)(header + 1); + + const char *UNDEFINED = (const char *)-1; + const char *text_base = UNDEFINED; + const char *link_base = UNDEFINED; + + for (uint32_t i = 0; i < header->ncmds; i++) { + if (lc->cmd == LC_SEGMENT_64) { + const segment_command_64 *sc = (const segment_command_64 *)lc; + if ((sc->initprot & 4) != 0) { + if (text_base == UNDEFINED || strcmp(sc->segname, "__TEXT") == 0) { + text_base = (const char *)_image_base - sc->vmaddr; + _cc->setTextBase(text_base); + _cc->updateBounds(_image_base, add(_image_base, sc->vmsize)); + } + } else if ((sc->initprot & 7) == 1) { + if (link_base == UNDEFINED || + strcmp(sc->segname, "__LINKEDIT") == 0) { + link_base = text_base + sc->vmaddr - sc->fileoff; + } + } else if ((sc->initprot & 2) != 0) { + if (strcmp(sc->segname, "__DATA") == 0) { + findGlobalOffsetTable(sc); + } } - - const mach_header_64* header = (const mach_header_64*)_image_base; - const load_command* lc = (const load_command*)(header + 1); - - const char* UNDEFINED = (const char*)-1; - const char* text_base = UNDEFINED; - const char* link_base = UNDEFINED; - - for (uint32_t i = 0; i < header->ncmds; i++) { - if (lc->cmd == LC_SEGMENT_64) { - const segment_command_64* sc = (const segment_command_64*)lc; - if ((sc->initprot & 4) != 0) { - if (text_base == UNDEFINED || strcmp(sc->segname, "__TEXT") == 0) { - text_base = (const char*)_image_base - sc->vmaddr; - _cc->setTextBase(text_base); - _cc->updateBounds(_image_base, add(_image_base, sc->vmsize)); - } - } else if ((sc->initprot & 7) == 1) { - if (link_base == UNDEFINED || strcmp(sc->segname, "__LINKEDIT") == 0) { - link_base = text_base + sc->vmaddr - sc->fileoff; - } - } else if ((sc->initprot & 2) != 0) { - if (strcmp(sc->segname, "__DATA") == 0) { - findGlobalOffsetTable(sc); - } - } - } else if (lc->cmd == LC_SYMTAB) { - if (text_base == UNDEFINED || link_base == UNDEFINED) { - return false; - } - loadSymbols((const symtab_command*)lc, text_base, link_base); - break; - } - lc = (const load_command*)add(lc, lc->cmdsize); + } else if (lc->cmd == LC_SYMTAB) { + if (text_base == UNDEFINED || link_base == UNDEFINED) { + return false; } - - return true; + loadSymbols((const symtab_command *)lc, text_base, link_base); + break; + } + lc = (const load_command *)add(lc, lc->cmdsize); } -}; + return true; + } +}; Mutex Symbols::_parse_lock; bool Symbols::_have_kernel_symbols = false; -void Symbols::parseKernelSymbols(CodeCache* cc) { -} - -void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { - static std::set _parsed_libraries; - MutexLocker ml(_parse_lock); - uint32_t images = _dyld_image_count(); +void Symbols::parseKernelSymbols(CodeCache *cc) {} - for (uint32_t i = 0; i < images; i++) { - const mach_header* image_base = _dyld_get_image_header(i); - if (image_base == NULL || !_parsed_libraries.insert(image_base).second) { - continue; // the library was already parsed - } +void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { + static std::set _parsed_libraries; + MutexLocker ml(_parse_lock); + uint32_t images = _dyld_image_count(); - int count = array->count(); - if (count >= MAX_NATIVE_LIBS) { - break; - } + for (uint32_t i = 0; i < images; i++) { + const mach_header *image_base = _dyld_get_image_header(i); + if (image_base == NULL || !_parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } - const char* path = _dyld_get_image_name(i); + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } - // Protect the library from unloading while parsing symbols - void* handle = dlopen(path, RTLD_LAZY | RTLD_NOLOAD); - if (handle == NULL) { - continue; - } + const char *path = _dyld_get_image_name(i); - CodeCache* cc = new CodeCache(path, count); - MachOParser parser(cc, image_base); - if (!parser.parse()) { - //Log::warn("Could not parse symbols from %s", path); - } - dlclose(handle); + // Protect the library from unloading while parsing symbols + void *handle = dlopen(path, RTLD_LAZY | RTLD_NOLOAD); + if (handle == NULL) { + continue; + } - cc->sort(); - array->add(cc); + CodeCache *cc = new CodeCache(path, count); + MachOParser parser(cc, image_base); + if (!parser.parse()) { + // Log::warn("Could not parse symbols from %s", path); } + dlclose(handle); + + cc->sort(); + array->add(cc); + } } #endif // __APPLE__ diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index a57976ca5..d6bc67d64 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -19,537 +19,562 @@ #ifdef __linux__ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "symbols.h" -#include "dwarf.h" - +# include "dwarf.h" +# include "symbols.h" +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include class SymbolDesc { - private: - const char* _addr; - const char* _type; - - public: - SymbolDesc(const char* s) { - _addr = s; - _type = strchr(_addr, ' ') + 1; - } - - const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } - char type() { return _type[0]; } - const char* name() { return _type + 2; } +private: + const char *_addr; + const char *_type; + +public: + SymbolDesc(const char *s) { + _addr = s; + _type = strchr(_addr, ' ') + 1; + } + + const char *addr() { return (const char *)strtoul(_addr, NULL, 16); } + char type() { return _type[0]; } + const char *name() { return _type + 2; } }; class MemoryMapDesc { - private: - const char* _addr; - const char* _end; - const char* _perm; - const char* _offs; - const char* _dev; - const char* _inode; - const char* _file; - - public: - MemoryMapDesc(const char* s) { - _addr = s; - _end = strchr(_addr, '-') + 1; - _perm = strchr(_end, ' ') + 1; - _offs = strchr(_perm, ' ') + 1; - _dev = strchr(_offs, ' ') + 1; - _inode = strchr(_dev, ' ') + 1; - _file = strchr(_inode, ' '); - - if (_file != NULL) { - while (*_file == ' ') _file++; - } - } - - const char* file() { return _file; } - bool isReadable() { return _perm[0] == 'r'; } - bool isExecutable() { return _perm[2] == 'x'; } - const char* addr() { return (const char*)strtoul(_addr, NULL, 16); } - const char* end() { return (const char*)strtoul(_end, NULL, 16); } - unsigned long offs() { return strtoul(_offs, NULL, 16); } - unsigned long dev() { return strtoul(_dev, NULL, 16) << 8 | strtoul(_dev + 3, NULL, 16); } - unsigned long inode() { return strtoul(_inode, NULL, 10); } +private: + const char *_addr; + const char *_end; + const char *_perm; + const char *_offs; + const char *_dev; + const char *_inode; + const char *_file; + +public: + MemoryMapDesc(const char *s) { + _addr = s; + _end = strchr(_addr, '-') + 1; + _perm = strchr(_end, ' ') + 1; + _offs = strchr(_perm, ' ') + 1; + _dev = strchr(_offs, ' ') + 1; + _inode = strchr(_dev, ' ') + 1; + _file = strchr(_inode, ' '); + + if (_file != NULL) { + while (*_file == ' ') + _file++; + } + } + + const char *file() { return _file; } + bool isReadable() { return _perm[0] == 'r'; } + bool isExecutable() { return _perm[2] == 'x'; } + const char *addr() { return (const char *)strtoul(_addr, NULL, 16); } + const char *end() { return (const char *)strtoul(_end, NULL, 16); } + unsigned long offs() { return strtoul(_offs, NULL, 16); } + unsigned long dev() { + return strtoul(_dev, NULL, 16) << 8 | strtoul(_dev + 3, NULL, 16); + } + unsigned long inode() { return strtoul(_inode, NULL, 10); } }; - -#ifdef __LP64__ +# ifdef __LP64__ const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64; typedef Elf64_Ehdr ElfHeader; typedef Elf64_Shdr ElfSection; typedef Elf64_Phdr ElfProgramHeader; typedef Elf64_Nhdr ElfNote; -typedef Elf64_Sym ElfSymbol; -typedef Elf64_Rel ElfRelocation; -typedef Elf64_Dyn ElfDyn; -#define ELF_R_TYPE ELF64_R_TYPE -#define ELF_R_SYM ELF64_R_SYM -#else +typedef Elf64_Sym ElfSymbol; +typedef Elf64_Rel ElfRelocation; +typedef Elf64_Dyn ElfDyn; +# define ELF_R_TYPE ELF64_R_TYPE +# define ELF_R_SYM ELF64_R_SYM +# else const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32; typedef Elf32_Ehdr ElfHeader; typedef Elf32_Shdr ElfSection; typedef Elf32_Phdr ElfProgramHeader; typedef Elf32_Nhdr ElfNote; -typedef Elf32_Sym ElfSymbol; -typedef Elf32_Rel ElfRelocation; -typedef Elf32_Dyn ElfDyn; -#define ELF_R_TYPE ELF32_R_TYPE -#define ELF_R_SYM ELF32_R_SYM -#endif // __LP64__ - -#if defined(__x86_64__) -# define R_GLOB_DAT R_X86_64_GLOB_DAT -#elif defined(__i386__) -# define R_GLOB_DAT R_386_GLOB_DAT -#elif defined(__arm__) || defined(__thumb__) -# define R_GLOB_DAT R_ARM_GLOB_DAT -#elif defined(__aarch64__) -# define R_GLOB_DAT R_AARCH64_GLOB_DAT -#elif defined(__PPC64__) -# define R_GLOB_DAT R_PPC64_GLOB_DAT -#else -# error "Compiling on unsupported arch" -#endif - -// GNU dynamic linker relocates pointers in the dynamic section, while musl doesn't. -// A tricky case is when we attach to a musl container from a glibc host. -#ifdef __musl__ -# define DYN_PTR(ptr) (_base + (ptr)) -#else -# define DYN_PTR(ptr) ((char*)(ptr) >= _base ? (char*)(ptr) : _base + (ptr)) -#endif // __musl__ - +typedef Elf32_Sym ElfSymbol; +typedef Elf32_Rel ElfRelocation; +typedef Elf32_Dyn ElfDyn; +# define ELF_R_TYPE ELF32_R_TYPE +# define ELF_R_SYM ELF32_R_SYM +# endif // __LP64__ + +# if defined(__x86_64__) +# define R_GLOB_DAT R_X86_64_GLOB_DAT +# elif defined(__i386__) +# define R_GLOB_DAT R_386_GLOB_DAT +# elif defined(__arm__) || defined(__thumb__) +# define R_GLOB_DAT R_ARM_GLOB_DAT +# elif defined(__aarch64__) +# define R_GLOB_DAT R_AARCH64_GLOB_DAT +# elif defined(__PPC64__) +# define R_GLOB_DAT R_PPC64_GLOB_DAT +# else +# error "Compiling on unsupported arch" +# endif + +// GNU dynamic linker relocates pointers in the dynamic section, while musl +// doesn't. A tricky case is when we attach to a musl container from a glibc +// host. +# ifdef __musl__ +# define DYN_PTR(ptr) (_base + (ptr)) +# else +# define DYN_PTR(ptr) \ + ((char *)(ptr) >= _base ? (char *)(ptr) : _base + (ptr)) +# endif // __musl__ class ElfParser { - private: - CodeCache* _cc; - const char* _base; - const char* _file_name; - ElfHeader* _header; - const char* _sections; - - ElfParser(CodeCache* cc, const char* base, const void* addr, const char* file_name = NULL) { - _cc = cc; - _base = base; - _file_name = file_name; - _header = (ElfHeader*)addr; - _sections = (const char*)addr + _header->e_shoff; - } - - bool validHeader() { - unsigned char* ident = _header->e_ident; - return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && ident[3] == 'F' - && ident[4] == ELFCLASS_SUPPORTED && ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT - && _header->e_shstrndx != SHN_UNDEF; - } - - ElfSection* section(int index) { - return (ElfSection*)(_sections + index * _header->e_shentsize); - } - - const char* at(ElfSection* section) { - return (const char*)_header + section->sh_offset; - } - - const char* at(ElfProgramHeader* pheader) { - return _header->e_type == ET_EXEC ? (const char*)pheader->p_vaddr : (const char*)_header + pheader->p_vaddr; - } - - ElfSection* findSection(uint32_t type, const char* name); - ElfProgramHeader* findProgramHeader(uint32_t type); - - void parseDynamicSection(); - void parseDwarfInfo(); - void loadSymbols(bool use_debug); - bool loadSymbolsUsingBuildId(); - bool loadSymbolsUsingDebugLink(); - void loadSymbolTable(ElfSection* symtab); - void addRelocationSymbols(ElfSection* reltab, const char* plt); - - public: - static void parseProgramHeaders(CodeCache* cc, const char* base); - static bool parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug); - static void parseMem(CodeCache* cc, const char* base); +private: + CodeCache *_cc; + const char *_base; + const char *_file_name; + ElfHeader *_header; + const char *_sections; + + ElfParser(CodeCache *cc, const char *base, const void *addr, + const char *file_name = NULL) { + _cc = cc; + _base = base; + _file_name = file_name; + _header = (ElfHeader *)addr; + _sections = (const char *)addr + _header->e_shoff; + } + + bool validHeader() { + unsigned char *ident = _header->e_ident; + return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && + ident[3] == 'F' && ident[4] == ELFCLASS_SUPPORTED && + ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT && + _header->e_shstrndx != SHN_UNDEF; + } + + ElfSection *section(int index) { + return (ElfSection *)(_sections + index * _header->e_shentsize); + } + + const char *at(ElfSection *section) { + return (const char *)_header + section->sh_offset; + } + + const char *at(ElfProgramHeader *pheader) { + return _header->e_type == ET_EXEC + ? (const char *)pheader->p_vaddr + : (const char *)_header + pheader->p_vaddr; + } + + ElfSection *findSection(uint32_t type, const char *name); + ElfProgramHeader *findProgramHeader(uint32_t type); + + void parseDynamicSection(); + void parseDwarfInfo(); + void loadSymbols(bool use_debug); + bool loadSymbolsUsingBuildId(); + bool loadSymbolsUsingDebugLink(); + void loadSymbolTable(ElfSection *symtab); + void addRelocationSymbols(ElfSection *reltab, const char *plt); + +public: + static void parseProgramHeaders(CodeCache *cc, const char *base); + static bool parseFile(CodeCache *cc, const char *base, const char *file_name, + bool use_debug); + static void parseMem(CodeCache *cc, const char *base); }; +ElfSection *ElfParser::findSection(uint32_t type, const char *name) { + const char *strtab = at(section(_header->e_shstrndx)); -ElfSection* ElfParser::findSection(uint32_t type, const char* name) { - const char* strtab = at(section(_header->e_shstrndx)); - - for (int i = 0; i < _header->e_shnum; i++) { - ElfSection* section = this->section(i); - if (section->sh_type == type && section->sh_name != 0) { - if (strcmp(strtab + section->sh_name, name) == 0) { - return section; - } - } + for (int i = 0; i < _header->e_shnum; i++) { + ElfSection *section = this->section(i); + if (section->sh_type == type && section->sh_name != 0) { + if (strcmp(strtab + section->sh_name, name) == 0) { + return section; + } } + } - return NULL; + return NULL; } -ElfProgramHeader* ElfParser::findProgramHeader(uint32_t type) { - const char* pheaders = (const char*)_header + _header->e_phoff; +ElfProgramHeader *ElfParser::findProgramHeader(uint32_t type) { + const char *pheaders = (const char *)_header + _header->e_phoff; - for (int i = 0; i < _header->e_phnum; i++) { - ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); - if (pheader->p_type == type) { - return pheader; - } + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader *pheader = + (ElfProgramHeader *)(pheaders + i * _header->e_phentsize); + if (pheader->p_type == type) { + return pheader; } + } - return NULL; + return NULL; } -bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug) { - int fd = open(file_name, O_RDONLY); - if (fd == -1) { - return false; - } - - size_t length = (size_t)lseek64(fd, 0, SEEK_END); - void* addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - - if (addr == MAP_FAILED) { - //Log::warn("Could not parse symbols from %s: %s", file_name, strerror(errno)); - } else { - ElfParser elf(cc, base, addr, file_name); - if (elf.validHeader()) { - elf.loadSymbols(use_debug); - } - munmap(addr, length); +bool ElfParser::parseFile(CodeCache *cc, const char *base, + const char *file_name, bool use_debug) { + int fd = open(file_name, O_RDONLY); + if (fd == -1) { + return false; + } + + size_t length = (size_t)lseek64(fd, 0, SEEK_END); + void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (addr == MAP_FAILED) { + // Log::warn("Could not parse symbols from %s: %s", file_name, + // strerror(errno)); + } else { + ElfParser elf(cc, base, addr, file_name); + if (elf.validHeader()) { + elf.loadSymbols(use_debug); } - return true; + munmap(addr, length); + } + return true; } -void ElfParser::parseMem(CodeCache* cc, const char* base) { - ElfParser elf(cc, base, base); - if (elf.validHeader()) { - elf.loadSymbols(false); - } +void ElfParser::parseMem(CodeCache *cc, const char *base) { + ElfParser elf(cc, base, base); + if (elf.validHeader()) { + elf.loadSymbols(false); + } } -void ElfParser::parseProgramHeaders(CodeCache* cc, const char* base) { - ElfParser elf(cc, base, base); - if (elf.validHeader()) { - cc->setTextBase(base); - elf.parseDynamicSection(); - elf.parseDwarfInfo(); - } +void ElfParser::parseProgramHeaders(CodeCache *cc, const char *base) { + ElfParser elf(cc, base, base); + if (elf.validHeader()) { + cc->setTextBase(base); + elf.parseDynamicSection(); + elf.parseDwarfInfo(); + } } void ElfParser::parseDynamicSection() { - ElfProgramHeader* dynamic = findProgramHeader(PT_DYNAMIC); - if (dynamic != NULL) { - void** got_start = NULL; - size_t pltrelsz = 0; - char* rel = NULL; - size_t relsz = 0; - size_t relent = 0; - size_t relcount = 0; - - const char* dyn_start = at(dynamic); - const char* dyn_end = dyn_start + dynamic->p_memsz; - for (ElfDyn* dyn = (ElfDyn*)dyn_start; dyn < (ElfDyn*)dyn_end; dyn++) { - switch (dyn->d_tag) { - case DT_PLTGOT: - got_start = (void**)DYN_PTR(dyn->d_un.d_ptr) + 3; - break; - case DT_PLTRELSZ: - pltrelsz = dyn->d_un.d_val; - break; - case DT_RELA: - case DT_REL: - rel = (char*)DYN_PTR(dyn->d_un.d_ptr); - break; - case DT_RELASZ: - case DT_RELSZ: - relsz = dyn->d_un.d_val; - break; - case DT_RELAENT: - case DT_RELENT: - relent = dyn->d_un.d_val; - break; - case DT_RELACOUNT: - case DT_RELCOUNT: - relcount = dyn->d_un.d_val; - break; - } + ElfProgramHeader *dynamic = findProgramHeader(PT_DYNAMIC); + if (dynamic != NULL) { + void **got_start = NULL; + size_t pltrelsz = 0; + char *rel = NULL; + size_t relsz = 0; + size_t relent = 0; + size_t relcount = 0; + + const char *dyn_start = at(dynamic); + const char *dyn_end = dyn_start + dynamic->p_memsz; + for (ElfDyn *dyn = (ElfDyn *)dyn_start; dyn < (ElfDyn *)dyn_end; dyn++) { + switch (dyn->d_tag) { + case DT_PLTGOT: + got_start = (void **)DYN_PTR(dyn->d_un.d_ptr) + 3; + break; + case DT_PLTRELSZ: + pltrelsz = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_REL: + rel = (char *)DYN_PTR(dyn->d_un.d_ptr); + break; + case DT_RELASZ: + case DT_RELSZ: + relsz = dyn->d_un.d_val; + break; + case DT_RELAENT: + case DT_RELENT: + relent = dyn->d_un.d_val; + break; + case DT_RELACOUNT: + case DT_RELCOUNT: + relcount = dyn->d_un.d_val; + break; + } + } + + if (relent != 0) { + if (pltrelsz != 0 && got_start != NULL) { + // The number of entries in .got.plt section matches the number of + // entries in .rela.plt + _cc->setGlobalOffsetTable(got_start, got_start + pltrelsz / relent, + false); + } else if (rel != NULL && relsz != 0) { + // RELRO technique: .got.plt has been merged into .got and made + // read-only. Find .got end from the highest relocation address. + void **min_addr = (void **)-1; + void **max_addr = (void **)0; + for (size_t offs = relcount * relent; offs < relsz; offs += relent) { + ElfRelocation *r = (ElfRelocation *)(rel + offs); + if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT) { + void **addr = (void **)(_base + r->r_offset); + if (addr < min_addr) + min_addr = addr; + if (addr > max_addr) + max_addr = addr; + } + } + + if (got_start == NULL) { + got_start = (void **)min_addr; } - if (relent != 0) { - if (pltrelsz != 0 && got_start != NULL) { - // The number of entries in .got.plt section matches the number of entries in .rela.plt - _cc->setGlobalOffsetTable(got_start, got_start + pltrelsz / relent, false); - } else if (rel != NULL && relsz != 0) { - // RELRO technique: .got.plt has been merged into .got and made read-only. - // Find .got end from the highest relocation address. - void** min_addr = (void**)-1; - void** max_addr = (void**)0; - for (size_t offs = relcount * relent; offs < relsz; offs += relent) { - ElfRelocation* r = (ElfRelocation*)(rel + offs); - if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT) { - void** addr = (void**)(_base + r->r_offset); - if (addr < min_addr) min_addr = addr; - if (addr > max_addr) max_addr = addr; - } - } - - if (got_start == NULL) { - got_start = (void**)min_addr; - } - - if (max_addr >= got_start) { - _cc->setGlobalOffsetTable(got_start, max_addr + 1, false); - } - } + if (max_addr >= got_start) { + _cc->setGlobalOffsetTable(got_start, max_addr + 1, false); } + } } + } } void ElfParser::parseDwarfInfo() { - if (!DWARF_SUPPORTED) return; - - ElfProgramHeader* eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); - if (eh_frame_hdr != NULL) { - DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); - _cc->setDwarfTable(dwarf.table(), dwarf.count()); - } + if (!DWARF_SUPPORTED) + return; + + ElfProgramHeader *eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); + if (eh_frame_hdr != NULL) { + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + } } void ElfParser::loadSymbols(bool use_debug) { - // Look for debug symbols in the original .so - ElfSection* section = findSection(SHT_SYMTAB, ".symtab"); - if (section != NULL) { - loadSymbolTable(section); - goto loaded; - } - - // Try to load symbols from an external debuginfo library - if (use_debug) { - if (loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink()) { - goto loaded; - } + // Look for debug symbols in the original .so + ElfSection *section = findSection(SHT_SYMTAB, ".symtab"); + if (section != NULL) { + loadSymbolTable(section); + goto loaded; + } + + // Try to load symbols from an external debuginfo library + if (use_debug) { + if (loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink()) { + goto loaded; } + } - // If everything else fails, load only exported symbols - section = findSection(SHT_DYNSYM, ".dynsym"); - if (section != NULL) { - loadSymbolTable(section); - } + // If everything else fails, load only exported symbols + section = findSection(SHT_DYNSYM, ".dynsym"); + if (section != NULL) { + loadSymbolTable(section); + } loaded: - if (use_debug) { - // Synthesize names for PLT stubs - ElfSection* plt = findSection(SHT_PROGBITS, ".plt"); - ElfSection* reltab = findSection(SHT_RELA, ".rela.plt"); - if (reltab == NULL) { - reltab = findSection(SHT_REL, ".rel.plt"); - } - if (plt != NULL && reltab != NULL) { - addRelocationSymbols(reltab, _base + plt->sh_offset + PLT_HEADER_SIZE); - } + if (use_debug) { + // Synthesize names for PLT stubs + ElfSection *plt = findSection(SHT_PROGBITS, ".plt"); + ElfSection *reltab = findSection(SHT_RELA, ".rela.plt"); + if (reltab == NULL) { + reltab = findSection(SHT_REL, ".rel.plt"); + } + if (plt != NULL && reltab != NULL) { + addRelocationSymbols(reltab, _base + plt->sh_offset + PLT_HEADER_SIZE); } + } } -// Load symbols from /usr/lib/debug/.build-id/ab/cdef1234.debug, where abcdef1234 is Build ID +// Load symbols from /usr/lib/debug/.build-id/ab/cdef1234.debug, where +// abcdef1234 is Build ID bool ElfParser::loadSymbolsUsingBuildId() { - ElfSection* section = findSection(SHT_NOTE, ".note.gnu.build-id"); - if (section == NULL || section->sh_size <= 16) { - return false; - } - - ElfNote* note = (ElfNote*)at(section); - if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { - return false; - } - - const char* build_id = (const char*)note + sizeof(*note) + 4; - int build_id_len = note->n_descsz; - - char path[PATH_MAX]; - char* p = path + sprintf(path, "/usr/lib/debug/.build-id/%02hhx/", build_id[0]); - for (int i = 1; i < build_id_len; i++) { - p += sprintf(p, "%02hhx", build_id[i]); - } - strcpy(p, ".debug"); - - return parseFile(_cc, _base, path, false); + ElfSection *section = findSection(SHT_NOTE, ".note.gnu.build-id"); + if (section == NULL || section->sh_size <= 16) { + return false; + } + + ElfNote *note = (ElfNote *)at(section); + if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { + return false; + } + + const char *build_id = (const char *)note + sizeof(*note) + 4; + int build_id_len = note->n_descsz; + + char path[PATH_MAX]; + char *p = + path + sprintf(path, "/usr/lib/debug/.build-id/%02hhx/", build_id[0]); + for (int i = 1; i < build_id_len; i++) { + p += sprintf(p, "%02hhx", build_id[i]); + } + strcpy(p, ".debug"); + + return parseFile(_cc, _base, path, false); } // Look for debuginfo file specified in .gnu_debuglink section bool ElfParser::loadSymbolsUsingDebugLink() { - ElfSection* section = findSection(SHT_PROGBITS, ".gnu_debuglink"); - if (section == NULL || section->sh_size <= 4) { - return false; - } - - const char* basename = strrchr(_file_name, '/'); - if (basename == NULL) { - return false; - } - - char* dirname = strndup(_file_name, basename - _file_name); - if (dirname == NULL) { - return false; - } - - const char* debuglink = at(section); - char path[PATH_MAX]; - bool result = false; + ElfSection *section = findSection(SHT_PROGBITS, ".gnu_debuglink"); + if (section == NULL || section->sh_size <= 4) { + return false; + } + + const char *basename = strrchr(_file_name, '/'); + if (basename == NULL) { + return false; + } + + char *dirname = strndup(_file_name, basename - _file_name); + if (dirname == NULL) { + return false; + } + + const char *debuglink = at(section); + char path[PATH_MAX]; + bool result = false; + + // 1. /path/to/libjvm.so.debug + if (strcmp(debuglink, basename + 1) != 0 && + snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 2. /path/to/.debug/libjvm.so.debug + if (!result && + snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 3. /usr/lib/debug/path/to/libjvm.so.debug + if (!result && + snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < + PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + free(dirname); + return result; +} - // 1. /path/to/libjvm.so.debug - if (strcmp(debuglink, basename + 1) != 0 && - snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) { - result = parseFile(_cc, _base, path, false); +void ElfParser::loadSymbolTable(ElfSection *symtab) { + ElfSection *strtab = section(symtab->sh_link); + const char *strings = at(strtab); + + const char *symbols = at(symtab); + const char *symbols_end = symbols + symtab->sh_size; + for (; symbols < symbols_end; symbols += symtab->sh_entsize) { + ElfSymbol *sym = (ElfSymbol *)symbols; + if (sym->st_name != 0 && sym->st_value != 0) { + // Skip special AArch64 mapping symbols: $x and $d + if (sym->st_size != 0 || sym->st_info != 0 || + strings[sym->st_name] != '$') { + // printf("Loading sym %s at 0x%lx (base=0x%lx)\n", + // strings + sym->st_name, _base + sym->st_value, _base); + _cc->add(_base + sym->st_value, (int)sym->st_size, + strings + sym->st_name); + } } + } +} - // 2. /path/to/.debug/libjvm.so.debug - if (!result && snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) { - result = parseFile(_cc, _base, path, false); - } +void ElfParser::addRelocationSymbols(ElfSection *reltab, const char *plt) { + ElfSection *symtab = section(reltab->sh_link); + const char *symbols = at(symtab); - // 3. /usr/lib/debug/path/to/libjvm.so.debug - if (!result && snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < PATH_MAX) { - result = parseFile(_cc, _base, path, false); - } + ElfSection *strtab = section(symtab->sh_link); + const char *strings = at(strtab); - free(dirname); - return result; -} + const char *relocations = at(reltab); + const char *relocations_end = relocations + reltab->sh_size; + for (; relocations < relocations_end; relocations += reltab->sh_entsize) { + ElfRelocation *r = (ElfRelocation *)relocations; + ElfSymbol *sym = + (ElfSymbol *)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); -void ElfParser::loadSymbolTable(ElfSection* symtab) { - ElfSection* strtab = section(symtab->sh_link); - const char* strings = at(strtab); - - const char* symbols = at(symtab); - const char* symbols_end = symbols + symtab->sh_size; - for (; symbols < symbols_end; symbols += symtab->sh_entsize) { - ElfSymbol* sym = (ElfSymbol*)symbols; - if (sym->st_name != 0 && sym->st_value != 0) { - // Skip special AArch64 mapping symbols: $x and $d - if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { - printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + sym->st_name, _base + sym->st_value, _base); - _cc->add(_base + sym->st_value, (int)sym->st_size, strings + sym->st_name); - } - } + char name[256]; + if (sym->st_name == 0) { + strcpy(name, "@plt"); + } else { + const char *sym_name = strings + sym->st_name; + snprintf(name, sizeof(name), "%s%cplt", sym_name, + sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); + name[sizeof(name) - 1] = 0; } -} - -void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) { - ElfSection* symtab = section(reltab->sh_link); - const char* symbols = at(symtab); - - ElfSection* strtab = section(symtab->sh_link); - const char* strings = at(strtab); - - const char* relocations = at(reltab); - const char* relocations_end = relocations + reltab->sh_size; - for (; relocations < relocations_end; relocations += reltab->sh_entsize) { - ElfRelocation* r = (ElfRelocation*)relocations; - ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); - - char name[256]; - if (sym->st_name == 0) { - strcpy(name, "@plt"); - } else { - const char* sym_name = strings + sym->st_name; - snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); - name[sizeof(name) - 1] = 0; - } - _cc->add(plt, PLT_ENTRY_SIZE, name); - plt += PLT_ENTRY_SIZE; - } + _cc->add(plt, PLT_ENTRY_SIZE, name); + plt += PLT_ENTRY_SIZE; + } } - Mutex Symbols::_parse_lock; bool Symbols::_have_kernel_symbols = false; -void Symbols::parseKernelSymbols(CodeCache* cc) { - // XXX(nick): omitted +void Symbols::parseKernelSymbols(CodeCache *cc) { + // XXX(nick): omitted } -void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) { - // we can't use static global sets due to undefined initialization order stuff - // (see https://stackoverflow.com/questions/27145617/segfault-when-adding-an-element-to-a-stdmap) - // I'm not sure why this original code even worked? - std::set parsed_libraries; - std::set parsed_inodes; - MutexLocker ml(_parse_lock); - - FILE* f = fopen("/proc/self/maps", "r"); - if (f == NULL) { - return; +void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { + // we can't use static global sets due to undefined initialization order stuff + // (see + // https://stackoverflow.com/questions/27145617/segfault-when-adding-an-element-to-a-stdmap) + // I'm not sure why this original code even worked? + std::set parsed_libraries; + std::set parsed_inodes; + MutexLocker ml(_parse_lock); + + FILE *f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + return; + } + + const char *last_readable_base = NULL; + const char *image_end = NULL; + char *str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; } - const char* last_readable_base = NULL; - const char* image_end = NULL; - char* str = NULL; - size_t str_size = 0; - ssize_t len; + const char *image_base = map.addr(); + if (image_base != image_end) + last_readable_base = image_base; + image_end = map.end(); - while ((len = getline(&str, &str_size, f)) > 0) { - str[len - 1] = 0; + if (map.isExecutable()) { + if (!parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } - MemoryMapDesc map(str); - if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { - continue; - } + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); - const char* image_base = map.addr(); - if (image_base != image_end) last_readable_base = image_base; - image_end = map.end(); - - if (map.isExecutable()) { - if (!parsed_libraries.insert(image_base).second) { - continue; // the library was already parsed - } - - int count = array->count(); - if (count >= MAX_NATIVE_LIBS) { - break; - } - - CodeCache* cc = new CodeCache(map.file(), count, image_base, image_end); - - unsigned long inode = map.inode(); - if (inode != 0) { - // Do not parse the same executable twice, e.g. on Alpine Linux - if (parsed_inodes.insert(map.dev() | inode << 16).second) { - // Be careful: executable file is not always ELF, e.g. classes.jsa - if ((image_base -= map.offs()) >= last_readable_base) { - ElfParser::parseProgramHeaders(cc, image_base); - } - ElfParser::parseFile(cc, image_base, map.file(), true); - } - } else if (strcmp(map.file(), "[vdso]") == 0) { - ElfParser::parseMem(cc, image_base); - } - - cc->sort(); - array->add(cc); + unsigned long inode = map.inode(); + if (inode != 0) { + // Do not parse the same executable twice, e.g. on Alpine Linux + if (parsed_inodes.insert(map.dev() | inode << 16).second) { + // Be careful: executable file is not always ELF, e.g. classes.jsa + if ((image_base -= map.offs()) >= last_readable_base) { + ElfParser::parseProgramHeaders(cc, image_base); + } + ElfParser::parseFile(cc, image_base, map.file(), true); } + } else if (strcmp(map.file(), "[vdso]") == 0) { + ElfParser::parseMem(cc, image_base); + } + + cc->sort(); + array->add(cc); } + } - free(str); - fclose(f); + free(str); + fclose(f); } #endif // __linux__ diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index c7bab7bd8..7630977c1 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -108,7 +108,7 @@ bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, // stack grows down, so end of stack is start // us->initial_regs.sp does not have to be aligned - uint64_t sp_start = us->initial_regs.regs[REGNAME(SP)]; + uint64_t sp_start = us->initial_regs.regs[REGNAME(SP)]; uint64_t sp_end = sp_start + us->stack_sz; if (addr < sp_start && addr > sp_start - 4096) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 46e960fb8..51ea2df4a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -316,12 +316,11 @@ add_benchmark(timer-bench timer-bench.cc ../src/timer.cc ../src/perf.cc) message(STATUS "Async profiler" ${ASYNC_PROFILER_SRC}) -add_unit_test(dwarf_unwind-ut dwarf_unwind-ut.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc ${ASYNC_PROFILER_SRC} - LIBRARIES ${ELFUTILS_LIBRARIES}) +add_unit_test( + dwarf_unwind-ut dwarf_unwind-ut.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc + ../src/async-profiler/stack_context.cpp ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) - - if(NOT CMAKE_BUILD_TYPE STREQUAL "SanitizedDebug") add_exe( simple_malloc-static simple_malloc.cc diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 521dea80a..9c8db2f15 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -1,8 +1,7 @@ #include - -#include "unwind_state.hpp" #include "savecontext.hpp" +#include "unwind_state.hpp" // #include "symbol.hpp" #include "stackWalker.h" @@ -21,90 +20,76 @@ (unsigned long)&&__here; \ }) - // #include "ddprof_defs.hpp" - // temp copy pasta #define PERF_SAMPLE_STACK_SIZE (4096UL * 8) -#define CAST_TO_VOID_STAR(ptr) reinterpret_cast(ptr) std::byte stack[PERF_SAMPLE_STACK_SIZE]; -DDPROF_NOINLINE size_t funcA(std::array ®s); -DDPROF_NOINLINE size_t funcB(std::array ®s); +DDPROF_NOINLINE size_t funcA(std::array ®s); +DDPROF_NOINLINE size_t funcB(std::array ®s); -size_t funcB(std::array ®s) { - // Load libraries - CodeCacheArray cache_arary; - Symbols::parseLibraries(&cache_arary, false); +size_t funcB(std::array ®s) { printf("Here we are in B %lx \n", _THIS_IP_); size_t size = save_context(retrieve_stack_end_address(), regs, stack); - { // IP - uint64_t ip = regs[REGNAME(PC)]; - printf("%lx = ip\n", ip); - - { // small useless test - CodeCache *code_cache = findLibraryByAddress(&cache_arary, reinterpret_cast(ip)); - EXPECT_TRUE(code_cache); - } - } - - // context from saving state - ap::StackContext sc; - #ifdef __x86_64__ - sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); - sc.sp = regs[REGNAME(SP)]; - sc.fp = regs[REGNAME(RBP)]; -#elif __aarch64__ - sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); - sc.sp = regs[REGNAME(SP)]; - sc.fp = regs[REGNAME(FP)]; -#endif - // size should be < PERF_SAMPLE_STACK_SIZE - ap::StackBuffer buffer(stack, sc.sp, sc.sp + size); - - void *stack[128]; - int n = stackWalk(&cache_arary, sc, buffer, const_cast(stack), 128, 0); - for (int i = 0; i < n; ++i) { - { // retrieve symbol - CodeCache *code_cache = findLibraryByAddress(&cache_arary, reinterpret_cast(stack[i])); - if (code_cache) { - const char *sym_name = code_cache->binarySearch(stack[i]); - printf("IP = %p - %s\n", stack[i], sym_name); - } - } - } - return size; } -size_t funcA(std::array ®s) { +size_t funcA(std::array ®s) { printf("Here we are in A %lx \n", _THIS_IP_); return funcB(regs); } +void unwind_async_profiler() {} -void unwind_async_profiler() { - -} +void unwind_libdwfl() {} -void unwind_libdwfl(){ +namespace ap { } TEST(dwarf_unwind, simple) { - std::array regs; - size_t size_stack = funcA(regs); + // Load libraries + CodeCacheArray cache_arary; + Symbols::parseLibraries(&cache_arary, false); + std::array regs; + size_t size_stack = funcA(regs); EXPECT_TRUE(size_stack); + { // IP + uint64_t ip = regs[REGNAME(PC)]; + printf("%lx = ip\n", ip); - // DO REGNAME(RBP) --> Gives the index inside the table - // DO REGNAME(SP) - // DO REGNAME(PC) + { // small useless test + CodeCache *code_cache = + findLibraryByAddress(&cache_arary, reinterpret_cast(ip)); + EXPECT_TRUE(code_cache); + } + } + ap::StackContext sc = ap::from_regs(std::span(regs)); + ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); - // int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const void** callchain, int max_depth, int skip) { + void *stack[128]; + int n = stackWalk(&cache_arary, sc, buffer, const_cast(stack), + 128, 0); + const char* syms[128]; + + for (int i = 0; i < n; ++i) { + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress( + &cache_arary, reinterpret_cast(stack[i])); + if (code_cache) { + syms[i] = code_cache->binarySearch(stack[i]); + printf("IP = %p - %s\n", stack[i], syms[i]); + } + } + } + // Check that we found the expected functions during unwinding + ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos); + ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos); + ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos); } From ed168f42cb572de2ba8dca67843c9e1ad23df4ec Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 17 Nov 2022 11:27:24 +0100 Subject: [PATCH 08/29] Minor notes on steps to take to improve unwinding --- design_notes.txt | 33 ++++++++++++++++++++++-------- src/async-profiler/stackWalker.cpp | 1 - test/dwarf_unwind-ut.cc | 30 ++++++--------------------- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/design_notes.txt b/design_notes.txt index b592706c1..761f29dba 100644 --- a/design_notes.txt +++ b/design_notes.txt @@ -17,25 +17,40 @@ DwflHdr -- File info (dso_hdr) -- Lookup Dwarf symbol lookup - +-- Symbol table ## Async profiler -### SymbolLinux -parseLibraries parses everything in proc self -Instead, write an API that can work with ddprof object model +parseLibraries parses everything in proc self. + +### Step 1 -- ensure we can have one code array per PID + + +### Step 2 -- ensure symbols are shared across PIDs + + +### Step 3 -- ensure unwinding tables are shared across PIDs + + + +### Junk notes +Write an API that can work with ddprof object model LoadSymbolTable loads at a given base address We want to store all symbols at an elf address -Oh wow, everything is added in a weird code blob -Oh wow, native func is stored in a char and we deduce pointer from the offset to the name - -1) Rewrite parseLibraries +1) Rewrite parseLibraries we can use DSO information + file info Ensure the cache is at elf address (not base) Start is 0 or for non PIE, 2) Find library is by absolute address -- Keep that ? \ No newline at end of file +- Keep that ? + + + +### Issues + +- We are moving from a lazy to an absolute load +- We don't have enough tests diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index d7dc31530..4472b79d9 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -116,7 +116,6 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, return false; } } - void *new_pc = stripPointer(SafeAccess::load((void **)sc.sp - 1)); // Update the pc using return address if (!read_memory(reinterpret_cast((void **)sc.sp - 1), reinterpret_cast(&sc.pc), buffer)) { diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 9c8db2f15..60ede645d 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -44,14 +44,6 @@ size_t funcA(std::array ®s) { return funcB(regs); } -void unwind_async_profiler() {} - -void unwind_libdwfl() {} - -namespace ap { - -} - TEST(dwarf_unwind, simple) { // Load libraries CodeCacheArray cache_arary; @@ -60,34 +52,24 @@ TEST(dwarf_unwind, simple) { size_t size_stack = funcA(regs); EXPECT_TRUE(size_stack); - { // IP - uint64_t ip = regs[REGNAME(PC)]; - printf("%lx = ip\n", ip); - - { // small useless test - CodeCache *code_cache = - findLibraryByAddress(&cache_arary, reinterpret_cast(ip)); - EXPECT_TRUE(code_cache); - } - } ap::StackContext sc = ap::from_regs(std::span(regs)); ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); - void *stack[128]; - int n = stackWalk(&cache_arary, sc, buffer, const_cast(stack), + void *callchain[128]; + int n = stackWalk(&cache_arary, sc, buffer, const_cast(callchain), 128, 0); const char* syms[128]; - for (int i = 0; i < n; ++i) { { // retrieve symbol CodeCache *code_cache = findLibraryByAddress( - &cache_arary, reinterpret_cast(stack[i])); + &cache_arary, reinterpret_cast(callchain[i])); if (code_cache) { - syms[i] = code_cache->binarySearch(stack[i]); - printf("IP = %p - %s\n", stack[i], syms[i]); + syms[i] = code_cache->binarySearch(callchain[i]); + printf("IP = %p - %s\n", callchain[i], syms[i]); } } } + // Check that we found the expected functions during unwinding ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos); ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos); From fcda4a5ee8f5a5117923ad824940e33fc5979e5e Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 18 Nov 2022 14:44:36 +0100 Subject: [PATCH 09/29] Create a remote unwinding test --- include/async-profiler/symbols.h | 1 + src/async-profiler/symbols_linux.cpp | 67 +++++++++++++++++++++++ test/CMakeLists.txt | 4 +- test/dwarf_unwind-ut.cc | 80 +++++++++++++++++++++++++++- 4 files changed, 150 insertions(+), 2 deletions(-) diff --git a/include/async-profiler/symbols.h b/include/async-profiler/symbols.h index 3271ccbe1..d0df18428 100644 --- a/include/async-profiler/symbols.h +++ b/include/async-profiler/symbols.h @@ -28,6 +28,7 @@ class Symbols { public: static void parseKernelSymbols(CodeCache *cc); static void parseLibraries(CodeCacheArray *array, bool kernel_symbols); + static void parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_symbols); static bool haveKernelSymbols() { return _have_kernel_symbols; } }; diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index d6bc67d64..2807c5f8f 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -509,6 +509,73 @@ void Symbols::parseKernelSymbols(CodeCache *cc) { // XXX(nick): omitted } + +void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_symbols) { + std::set parsed_libraries; + std::set parsed_inodes; + MutexLocker ml(_parse_lock); + char proc_map_filename[1024] = {}; + snprintf(proc_map_filename, std::size(proc_map_filename), "%s/proc/%d/maps", "", pid); + // todo plug the proc_map open functions (handles user switches) + FILE *f = fopen(proc_map_filename, "r"); + if (f == NULL) { + return; + } + + const char *last_readable_base = NULL; + const char *image_end = NULL; + char *str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + + const char *image_base = map.addr(); + if (image_base != image_end) + last_readable_base = image_base; + image_end = map.end(); + + if (map.isExecutable()) { + if (!parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); + + unsigned long inode = map.inode(); + if (inode != 0) { + // Do not parse the same executable twice, e.g. on Alpine Linux + if (parsed_inodes.insert(map.dev() | inode << 16).second) { + // Be careful: executable file is not always ELF, e.g. classes.jsa + if ((image_base -= map.offs()) >= last_readable_base) { + ElfParser::parseProgramHeaders(cc, image_base); + } + ElfParser::parseFile(cc, image_base, map.file(), true); + } + } else if (strcmp(map.file(), "[vdso]") == 0) { + ElfParser::parseMem(cc, image_base); + } + + cc->sort(); + array->add(cc); + } + } + + free(str); + fclose(f); +} + void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { // we can't use static global sets due to undefined initialization order stuff // (see diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 51ea2df4a..49eb2ae39 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -318,7 +318,9 @@ message(STATUS "Async profiler" ${ASYNC_PROFILER_SRC}) add_unit_test( dwarf_unwind-ut dwarf_unwind-ut.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc - ../src/async-profiler/stack_context.cpp ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) + ../src/async-profiler/stack_context.cpp ../src/lib/allocation_tracker.cc ../src/ringbuffer_utils.cc ../src/perf_ringbuffer.cc + ../src/perf.cc ../src/pevent_lib.cc ../src/sys_utils.cc ../src/user_override.cc ../src/perf_watcher.cc + ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) if(NOT CMAKE_BUILD_TYPE STREQUAL "SanitizedDebug") diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 60ede645d..a39f2375d 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -32,7 +32,6 @@ DDPROF_NOINLINE size_t funcA(std::array ®s); DDPROF_NOINLINE size_t funcB(std::array ®s); size_t funcB(std::array ®s) { - printf("Here we are in B %lx \n", _THIS_IP_); size_t size = save_context(retrieve_stack_end_address(), regs, stack); @@ -75,3 +74,82 @@ TEST(dwarf_unwind, simple) { ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos); ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos); } + + + +#include "ringbuffer_holder.hpp" +#include "ringbuffer_utils.hpp" +#include "allocation_tracker.hpp" +#include "perf_ringbuffer.hpp" + +DDPROF_NOINLINE void func_save_sleep(size_t size); +DDPROF_NOINLINE void func_intermediate(size_t size); + +DDPROF_NOINLINE void func_save_sleep(size_t size) { + ddprof::AllocationTracker::track_allocation(0xdeadbeef, size); + // prevent tail call optimization + getpid(); + sleep(1); +} + +void func_intermediate(size_t size) { + func_save_sleep(size); +} + +TEST(dwarf_unwind, remote) { + const uint64_t rate = 1; + const size_t buf_size_order = 5; + ddprof::RingBufferHolder ring_buffer{buf_size_order, + RingBufferType::kMPSCRingBuffer}; + // use allocation tracking to store events + ddprof::AllocationTracker::allocation_tracking_init( + rate, ddprof::AllocationTracker::kDeterministicSampling, + ring_buffer.get_buffer_info()); + + // Fork + pid_t parent_pid = getpid(); + pid_t temp_pid = fork(); + if (!temp_pid) { + func_intermediate(10); + return; + } + + // Load libraries from the fork - Cache array is relent to a single pid + CodeCacheArray cache_arary; + Symbols::parsePidLibraries(temp_pid, &cache_arary, false); + // Establish a ring buffer ? + + ddprof::MPSCRingBufferReader reader{ring_buffer.get_ring_buffer()}; + ASSERT_GT(reader.available_size(), 0); + + auto buf = reader.read_sample(); + ASSERT_FALSE(buf.empty()); + const perf_event_header *hdr = + reinterpret_cast(buf.data()); + ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE); + + // convert based on mask for this watcher (default in this case) + perf_event_sample *sample = hdr2samp(hdr, perf_event_default_sample_type()); + + std::span regs_span{sample->regs, PERF_REGS_COUNT}; + ap::StackContext sc = ap::from_regs(regs_span); + std::span stack{ reinterpret_cast(sample->data_stack), sample->size_stack}; + ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack); + + void *callchain[DD_MAX_STACK_DEPTH]; + int n = stackWalk(&cache_arary, sc, buffer, const_cast(callchain), + DD_MAX_STACK_DEPTH, 0); + + std::array syms; + for (int i = 0; i < n; ++i) { + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress( + &cache_arary, reinterpret_cast(callchain[i])); + if (code_cache) { + syms[i] = code_cache->binarySearch(callchain[i]); + printf("IP = %p - %s\n", callchain[i], syms[i]); + } + } + } + +} From e4cff8905ab88c5f9076d0ed302b8b441711b3b6 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 19 Nov 2022 09:08:33 +0100 Subject: [PATCH 10/29] Add an async prof library --- CMakeLists.txt | 12 +++- include/async-profiler/stackWalker.h | 2 + include/async-profiler/stack_context.h | 8 +-- include/lib/saveregisters.hpp | 1 + include/span.hpp | 7 --- include/unwind_output.hpp | 9 +++ src/async-profiler/stackWalker.cpp | 1 + src/async-profiler/stack_context.cpp | 2 +- src/exe/main.cc | 2 +- src/perf_mainloop.cc | 2 +- src/pprof/ddprof_pprof.cc | 85 ++++++++++++++++++++++++++ src/unwind.cc | 2 +- test/CMakeLists.txt | 4 +- test/dwarf_unwind-ut.cc | 8 +-- 14 files changed, 123 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06d7a7042..129c1683b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,16 @@ else() endif() # ---- Dependencies ---- +## -- Async profiler -- +set(ASYNC_PROFILER_LIB_DIR ${CMAKE_SOURCE_DIR}) +set(ASYNC_PROFILER_SRC_DIR ${ASYNC_PROFILER_LIB_DIR}/src/async-profiler) +set(ASYNC_PROFILER_LIB_INCLUDE ${ASYNC_PROFILER_LIB_DIR}/include/async-profiler) +aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) +add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) +target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} ${CMAKE_SOURCE_DIR}/include) +set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) +add_library(DDProf::AsyncProf ALIAS async_prof_lib) +# ------------------- # libdatadog_profiling include(Findlibdatadog) @@ -145,7 +155,7 @@ aux_source_directory(src/exe EXE_SRC) # Define all sources set(DDPROF_GLOBAL_SRC ${COMMON_SRC} ${PPROF_SRC} ${EXPORTER_SRC} ${EXE_SRC}) -set(DDPROF_LIBRARY_LIST DDProf::Parser llvm-demangle ${ELFUTILS_LIBRARIES} Threads::Threads) +set(DDPROF_LIBRARY_LIST DDProf::Parser DDProf::AsyncProf llvm-demangle ${ELFUTILS_LIBRARIES} Threads::Threads) if(ON) # Add the rust library - Refactoring ongoing. OFF for now diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h index c67cac21d..ceabe17a8 100644 --- a/include/async-profiler/stackWalker.h +++ b/include/async-profiler/stackWalker.h @@ -25,6 +25,8 @@ #include "codeCache.h" #include "stack_context.h" + + CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address); int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, diff --git a/include/async-profiler/stack_context.h b/include/async-profiler/stack_context.h index be47ae1fe..35b9cd057 100644 --- a/include/async-profiler/stack_context.h +++ b/include/async-profiler/stack_context.h @@ -3,7 +3,7 @@ #pragma once #include -#include +#include "span.hpp" #include "perf_archmap.hpp" @@ -21,12 +21,12 @@ struct StackContext { }; // Async profiler's unwinding only uses a subset of the registers -StackContext from_regs(const std::span regs); +StackContext from_regs(const ddprof::span regs); struct StackBuffer { - StackBuffer(std::span bytes, uint64_t start, uint64_t end) + StackBuffer(ddprof::span bytes, uint64_t start, uint64_t end) : _bytes(bytes), sp_start(start), sp_end(end) {} - std::span _bytes; + ddprof::span _bytes; uint64_t sp_start; // initial SP (in context of the process) uint64_t sp_end; // sp + size (so root functions = start of stack) /* diff --git a/include/lib/saveregisters.hpp b/include/lib/saveregisters.hpp index 0f0f747c5..24a368fe4 100644 --- a/include/lib/saveregisters.hpp +++ b/include/lib/saveregisters.hpp @@ -23,6 +23,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once +#include #include "ddprof_base.hpp" #include "perf_archmap.hpp" #include "span.hpp" diff --git a/include/span.hpp b/include/span.hpp index c7c223f91..6caa05010 100644 --- a/include/span.hpp +++ b/include/span.hpp @@ -5,7 +5,6 @@ #pragma once -#if __cpp_lib_span # include @@ -15,9 +14,3 @@ using std::as_writable_bytes; using std::span; } // namespace ddprof -#else - -# define TCB_SPAN_NAMESPACE_NAME ddprof -# include "tcb/span.hpp" - -#endif diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 72e8e7bb6..988a0492c 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -11,6 +11,7 @@ #include "ddprof_defs.hpp" #include "string_view.hpp" +#include typedef struct FunLoc { uint64_t ip; // Relative to file, not VMA @@ -26,4 +27,12 @@ typedef struct UnwindOutput { bool is_incomplete; } UnwindOutput; +struct UnwindOutput_V2 { + std::array callchain; + uint64_t nb_locs = {}; + int pid = {}; + int tid = {}; + bool is_incomplete = false; +}; + void uw_output_clear(UnwindOutput *); diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 4472b79d9..fc09fbb0b 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -55,6 +55,7 @@ bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { return true; } +// todo const correctness CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address) { const int native_lib_count = cache->count(); for (int i = 0; i < native_lib_count; i++) { diff --git a/src/async-profiler/stack_context.cpp b/src/async-profiler/stack_context.cpp index dc06b1ebc..6bde12b9a 100644 --- a/src/async-profiler/stack_context.cpp +++ b/src/async-profiler/stack_context.cpp @@ -5,7 +5,7 @@ namespace ap { // Async profiler's unwinding only uses a subset of the registers -StackContext from_regs(const std::span regs) { +StackContext from_regs(const ddprof::span regs) { // context from saving state ap::StackContext sc; sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); diff --git a/src/exe/main.cc b/src/exe/main.cc index 19fbe55d8..886e0875f 100644 --- a/src/exe/main.cc +++ b/src/exe/main.cc @@ -128,7 +128,7 @@ static DDRes get_library_path(TempFileHolder &libdd_profiling_path, } if (profiling_path.empty()) { - DDRES_CHECK_FWD(get_or_create_temp_file( + DDRES_CHECK_FWD(ddprof::get_or_create_temp_file( k_libdd_profiling_embedded_name, ddprof::as_bytes(ddprof::span{_binary_libdd_profiling_embedded_so_start, _binary_libdd_profiling_embedded_so_end}), diff --git a/src/perf_mainloop.cc b/src/perf_mainloop.cc index 0a6ac216a..bbe96a593 100644 --- a/src/perf_mainloop.cc +++ b/src/perf_mainloop.cc @@ -171,7 +171,7 @@ static inline DDRes worker_process_ring_buffers(PEvent *pes, int pe_len, // \fixme{nsavoire} free slot as soon as possible ? // reader.advance(hdr->size); - buffer = remaining(buffer, hdr->size); + buffer = ddprof::remaining(buffer, hdr->size); } } else { ddprof::MPSCRingBufferReader reader{ring_buffer}; diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index d81f3b3f5..a32b7eeeb 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -168,6 +168,91 @@ static void write_line(const ddprof::Symbol &symbol, ddog_Line *ffi_line) { ffi_line->line = symbol._lineno; } + +static void write_location_v2(const void* ip, + const ddog_Slice_line *lines, + ddog_Location *ffi_location) { + ffi_location->address = reinterpret_cast(ip); + ffi_location->lines = *lines; + // Folded not handled for now + ffi_location->is_folded = false; +} + +static void write_function_v2(const char* func, + ddog_Function *ffi_func) { + ffi_func->name = to_CharSlice(string_view_create_strlen(func)); +} + +static void write_line_v2(const char* func, ddog_Line *ffi_line) { + write_function_v2(func, &ffi_line->function); + ffi_line->line = 0; +} + +#include "async-profiler/codeCache.h" +#include "async-profiler/stackWalker.h" + +DDRes pprof_aggregate_v2(ddprof::span callchain, CodeCacheArray &cache_arary, + uint64_t value, uint64_t count, const PerfWatcher *watcher, + DDProfPProf *pprof) { + ddog_Profile *profile = pprof->_profile; + + int64_t values[DDPROF_PWT_LENGTH] = {}; + values[watcher->pprof_sample_idx] = value * count; + if (watcher_has_countable_sample_type(watcher)) { + values[watcher->pprof_count_sample_idx] = count; + } + + ddog_Location locations_buff[DD_MAX_STACK_DEPTH]; + // assumption of single line per loc for now + ddog_Line line_buff[DD_MAX_STACK_DEPTH]; + + // todo skip frames + unsigned cur_loc = 0; + for (const void *ip : callchain) { + const char *func = "unknown"; + CodeCache *code_cache = findLibraryByAddress(&cache_arary, ip); + if (code_cache) { + func = code_cache->binarySearch(ip); + } + + // possibly several lines to handle inlined function (not handled for now) + write_line_v2(func, &line_buff[cur_loc]); + ddog_Slice_line lines = {.ptr = &line_buff[cur_loc], .len = 1}; + write_location_v2(ip, &lines, &locations_buff[cur_loc]); + ++cur_loc; + } + + ddog_Label labels[PPROF_MAX_LABELS] = {}; + size_t labels_num = 0; + + // todo pid and tid things + if (watcher_has_tracepoint(watcher)) { + labels[labels_num].key = to_CharSlice("tracepoint_type"); + + // If the label is given, use that as the tracepoint type. Otherwise + // default to the event name + if (!watcher->tracepoint_label.empty()) { + labels[labels_num].str = to_CharSlice(watcher->tracepoint_label.c_str()); + } else { + labels[labels_num].str = to_CharSlice(watcher->tracepoint_event.c_str()); + } + ++labels_num; + } + ddog_Sample sample = { + .locations = {.ptr = locations_buff, .len = cur_loc}, + .values = {.ptr = values, .len = pprof->_nb_values}, + .labels = {.ptr = labels, .len = labels_num}, + }; + + uint64_t id_sample = ddog_Profile_add(profile, sample); + if (id_sample == 0) { + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile"); + } + + return ddres_init(); + +} + // Assumption of API is that sample is valid in a single type DDRes pprof_aggregate(const UnwindOutput *uw_output, const SymbolHdr *symbol_hdr, uint64_t value, diff --git a/src/unwind.cc b/src/unwind.cc index 2fd058bca..b66951e9f 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -79,7 +79,7 @@ static bool is_stack_complete(UnwindState *us) { DDRes unwindstate__unwind(UnwindState *us) { DDRes res = ddres_init(); if (us->pid != 0) { // we can not unwind pid 0 - res = unwind_dwfl(us); + } if (IsDDResNotOK(res)) { find_dso_add_error_frame(us); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 49eb2ae39..921e5ef91 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -153,7 +153,7 @@ add_unit_test( add_unit_test( ddprof_pprof-ut ../src/pprof/ddprof_pprof.cc ../src/unwind_output.cc ../src/perf_watcher.cc ddprof_pprof-ut.cc - LIBRARIES Datadog::Profiling DDProf::Parser + LIBRARIES Datadog::Profiling DDProf::Parser DDProf::AsyncProf DEFINITIONS MYNAME="ddprof_pprof-ut") add_unit_test( @@ -165,7 +165,7 @@ add_unit_test( ../src/perf_watcher.cc ../src/tags.cc ddprof_exporter-ut.cc - LIBRARIES Datadog::Profiling DDProf::Parser + LIBRARIES Datadog::Profiling DDProf::Parser DDProf::AsyncProf DEFINITIONS MYNAME="ddprof_exporter-ut") add_unit_test( diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index a39f2375d..03abcbfac 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -51,7 +51,7 @@ TEST(dwarf_unwind, simple) { size_t size_stack = funcA(regs); EXPECT_TRUE(size_stack); - ap::StackContext sc = ap::from_regs(std::span(regs)); + ap::StackContext sc = ap::from_regs(ddprof::span(regs)); ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); void *callchain[128]; @@ -81,6 +81,7 @@ TEST(dwarf_unwind, simple) { #include "ringbuffer_utils.hpp" #include "allocation_tracker.hpp" #include "perf_ringbuffer.hpp" +#include "span.hpp" DDPROF_NOINLINE void func_save_sleep(size_t size); DDPROF_NOINLINE void func_intermediate(size_t size); @@ -107,7 +108,6 @@ TEST(dwarf_unwind, remote) { ring_buffer.get_buffer_info()); // Fork - pid_t parent_pid = getpid(); pid_t temp_pid = fork(); if (!temp_pid) { func_intermediate(10); @@ -131,9 +131,9 @@ TEST(dwarf_unwind, remote) { // convert based on mask for this watcher (default in this case) perf_event_sample *sample = hdr2samp(hdr, perf_event_default_sample_type()); - std::span regs_span{sample->regs, PERF_REGS_COUNT}; + ddprof::span regs_span{sample->regs, PERF_REGS_COUNT}; ap::StackContext sc = ap::from_regs(regs_span); - std::span stack{ reinterpret_cast(sample->data_stack), sample->size_stack}; + ddprof::span stack{ reinterpret_cast(sample->data_stack), sample->size_stack}; ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack); void *callchain[DD_MAX_STACK_DEPTH]; From e61cb4796b7f6a99869b3b2e44dc5b8ac3b5ca73 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 19 Nov 2022 11:04:26 +0100 Subject: [PATCH 11/29] Hacky version using the async profiler's unwinding --- CMakeLists.txt | 34 ---- include/async-profiler/stackWalker.h | 2 +- include/pprof/ddprof_pprof.hpp | 7 + include/unwind.hpp | 2 - include/unwind_output.hpp | 8 +- include/unwind_state.hpp | 16 +- src/async-profiler/stackWalker.cpp | 2 +- src/ddprof_worker.cc | 66 +------ src/unwind.cc | 94 +++------ src/unwind_dwfl.cc | 274 --------------------------- src/unwind_helpers.cc | 73 ------- src/unwind_output.cc | 9 - test/CMakeLists.txt | 77 -------- test/ddprof_exporter-ut.cc | 7 +- test/ddprof_pprof-ut.cc | 7 +- test/unwind_output_mock.hpp | 2 +- 16 files changed, 55 insertions(+), 625 deletions(-) delete mode 100644 src/unwind_dwfl.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 129c1683b..140c1a460 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -388,40 +388,6 @@ install( ARCHIVE DESTINATION ddprof/lib PUBLIC_HEADER DESTINATION ddprof/include) -# ---- Declaration of native library ---- -option(BUILD_NATIVE_LIB "Build a library out of the native profiler" ON) -if(${BUILD_NATIVE_LIB}) - - # Define all sources - set(DDPROF_LIB_SRC ${COMMON_SRC} src/lib/ddprof_output.cc) - - # Libs to link - set(NATIVE_LIB_LIBRARY_LIST DDProf::Parser llvm-demangle ${ELFUTILS_LIBRARIES} Threads::Threads) - - if("${DDPROF_ALLOCATOR}" STREQUAL "JEMALLOC") - list(PREPEND NATIVE_LIB_LIBRARY_LIST jemalloc) - endif() - - # Create the lib - add_library(ddprof-native ${DDPROF_LIB_SRC}) - - set_target_properties(ddprof-native PROPERTIES VERSION ${PROJECT_VERSION}) - set_target_properties(ddprof-native PROPERTIES COMPILE_DEFINITIONS DDPROF_NATIVE_LIB) - - # libcap, can be removed from version distributed to client - list(APPEND NATIVE_LIB_LIBRARY_LIST libcap) - - target_include_directories(ddprof-native PRIVATE ${DDPROF_INCLUDE_LIST}) - - target_link_libraries(ddprof-native PRIVATE ${NATIVE_LIB_LIBRARY_LIST}) - add_library(DDProf::Native ALIAS ddprof-native) - - option(ACCURACY_TEST "Enable accuracy test" OFF) - if(${ACCURACY_TEST}) - add_subdirectory(test/self_unwind) - endif() -endif() - # ---- Unit tests ---- aux_source_directory(${CMAKE_SOURCE_DIR}/src/async-profiler ASYNC_PROFILER_SRC) diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h index ceabe17a8..e30c9c8fb 100644 --- a/include/async-profiler/stackWalker.h +++ b/include/async-profiler/stackWalker.h @@ -30,7 +30,7 @@ CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address); int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, - const ap::StackBuffer &buffer, const void **callchain, + const ap::StackBuffer &buffer, void const **callchain, int max_depth, int skip); #endif // _STACKWALKER_H diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index e902d04a0..268f0a698 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -12,6 +12,9 @@ #include "tags.hpp" #include "unwind_output.hpp" +#include "span.hpp" +#include "async-profiler/codeCache.h" + struct ddog_Profile; struct SymbolHdr; @@ -25,6 +28,10 @@ struct DDProfPProf { DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext *ctx); +DDRes pprof_aggregate_v2(ddprof::span callchain, CodeCacheArray &cache_arary, + uint64_t value, uint64_t count, const PerfWatcher *watcher, + DDProfPProf *pprof); + /** * Aggregate to the existing profile the provided unwinding output. * @param uw_output diff --git a/include/unwind.hpp b/include/unwind.hpp index 552aaf709..2eb53d668 100644 --- a/include/unwind.hpp +++ b/include/unwind.hpp @@ -12,8 +12,6 @@ typedef struct UnwindState UnwindState; namespace ddprof { -void unwind_init(void); - // Fill sample info to prepare for unwinding void unwind_init_sample(UnwindState *us, uint64_t *sample_regs, pid_t sample_pid, uint64_t sample_size_stack, diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 988a0492c..62eff7046 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -11,7 +11,7 @@ #include "ddprof_defs.hpp" #include "string_view.hpp" -#include +#include typedef struct FunLoc { uint64_t ip; // Relative to file, not VMA @@ -28,11 +28,9 @@ typedef struct UnwindOutput { } UnwindOutput; struct UnwindOutput_V2 { - std::array callchain; - uint64_t nb_locs = {}; + const void* callchain[DD_MAX_STACK_DEPTH]; + uint64_t nb_locs; int pid = {}; int tid = {}; bool is_incomplete = false; }; - -void uw_output_clear(UnwindOutput *); diff --git a/include/unwind_state.hpp b/include/unwind_state.hpp index 58b884be1..a49a9105c 100644 --- a/include/unwind_state.hpp +++ b/include/unwind_state.hpp @@ -14,6 +14,7 @@ #include "perf_archmap.hpp" #include "symbol_hdr.hpp" #include "unwind_output.hpp" +#include "async-profiler/codeCache.h" #include @@ -37,16 +38,8 @@ struct UnwindRegisters { /// given through callbacks struct UnwindState { explicit UnwindState(int dd_profiling_fd = -1) - : _dwfl_wrapper(nullptr), dso_hdr(dd_profiling_fd), pid(-1), - stack(nullptr), stack_sz(0), current_ip(0) { - uw_output_clear(&output); - } - - ddprof::DwflHdr dwfl_hdr; - ddprof::DwflWrapper *_dwfl_wrapper; // pointer to current dwfl element - - ddprof::DsoHdr dso_hdr; - SymbolHdr symbol_hdr; + : pid(-1), + stack(nullptr), stack_sz(0), current_ip(0) {} pid_t pid; char *stack; @@ -55,7 +48,8 @@ struct UnwindState { UnwindRegisters initial_regs; ProcessAddress_t current_ip; - UnwindOutput output; + std::unordered_map code_cache; + UnwindOutput_V2 output; }; static inline bool unwind_registers_equal(const UnwindRegisters *lhs, diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index fc09fbb0b..8e1c99948 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -146,7 +146,7 @@ void populateStackContext(ap::StackContext &sc, void *ucontext) { } int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, - const ap::StackBuffer &buffer, const void **callchain, + const ap::StackBuffer &buffer, void const **callchain, int max_depth, int skip) { int depth = -skip; diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index f588e01b5..3aaddef15 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -42,10 +42,9 @@ static const DDPROF_STATS s_cycled_stats[] = { static const long k_clock_ticks_per_sec = sysconf(_SC_CLK_TCK); /// Human readable runtime information -static void print_diagnostics(const DsoHdr &dso_hdr) { +static void print_diagnostics(void) { LG_NFO("Printing internal diagnostics"); ddprof_stats_print(); - dso_hdr._stats.log(); } static inline int64_t now_nanos() { @@ -54,31 +53,6 @@ static inline int64_t now_nanos() { return (tv.tv_sec * 1000000 + tv.tv_usec) * 1000; } -#ifndef DDPROF_NATIVE_LIB -static DDRes report_lost_events(DDProfContext *ctx) { - for (int watcher_idx = 0; watcher_idx < ctx->num_watchers; ++watcher_idx) { - if (ctx->worker_ctx.lost_events_per_watcher[watcher_idx] > 0) { - PerfWatcher *watcher = &ctx->watchers[watcher_idx]; - UnwindState *us = ctx->worker_ctx.us; - uw_output_clear(&us->output); - add_common_frame(us, SymbolErrors::lost_event); - LG_WRN("Reporting #%lu -> [%lu] lost samples for watcher #%d", - ctx->worker_ctx.lost_events_per_watcher[watcher_idx], - ctx->worker_ctx.lost_events_per_watcher[watcher_idx] * - watcher->sample_period, - watcher_idx); - DDRES_CHECK_FWD(pprof_aggregate( - &us->output, &us->symbol_hdr, watcher->sample_period, - ctx->worker_ctx.lost_events_per_watcher[watcher_idx], watcher, - ctx->worker_ctx.pprof[ctx->worker_ctx.i_current_pprof])); - ctx->worker_ctx.lost_events_per_watcher[watcher_idx] = 0; - } - } - - return {}; -} -#endif - static inline long export_time_convert(double upload_period) { return upload_period * 1000000000; } @@ -116,7 +90,6 @@ DDRes worker_library_init(DDProfContext *ctx, DDRES_CHECK_FWD(pevent_mmap(pevent_hdr, false)); } // Initialize the unwind state and library - unwind_init(); ctx->worker_ctx.user_tags = new UserTags(ctx->params.tags, ctx->params.num_cpu); @@ -152,8 +125,7 @@ DDRes worker_library_free(DDProfContext *ctx) { } /// Retrieve cpu / memory info -static DDRes worker_update_stats(ProcStatus *procstat, const DsoHdr *dso_hdr, - std::chrono::nanoseconds cycle_duration) { +static DDRes worker_update_stats(ProcStatus *procstat, std::chrono::nanoseconds cycle_duration) { // Update the procstats, but first snapshot the utime so we can compute the // diff for the utime metric int64_t cpu_time_old = procstat->utime + procstat->stime; @@ -164,11 +136,6 @@ static DDRes worker_update_stats(ProcStatus *procstat, const DsoHdr *dso_hdr, (k_clock_ticks_per_sec * elapsed_nsec); ddprof_stats_set(STATS_PROFILER_RSS, get_page_size() * procstat->rss); ddprof_stats_set(STATS_PROFILER_CPU_USAGE, millicores); - ddprof_stats_set(STATS_DSO_UNHANDLED_SECTIONS, - dso_hdr->_stats.sum_event_metric(DsoStats::kUnhandledDso)); - ddprof_stats_set(STATS_DSO_NEW_DSO, - dso_hdr->_stats.sum_event_metric(DsoStats::kNewDso)); - ddprof_stats_set(STATS_DSO_SIZE, dso_hdr->get_nb_dso()); long target_cpu_nsec; ddprof_stats_get(STATS_TARGET_CPU_USAGE, &target_cpu_nsec); @@ -266,29 +233,19 @@ DDRes ddprof_pr_sample(DDProfContext *ctx, perf_event_sample *sample, // Aggregate if unwinding went well (todo : fatal error propagation) if (!IsDDResFatal(res) && EventConfMode::kCallgraph <= watcher->output_mode) { -#ifndef DDPROF_NATIVE_LIB // Depending on the type of watcher, compute a value for sample uint64_t sample_val = perf_value_from_sample(watcher, sample); // in lib mode we don't aggregate (protect to avoid link failures) int i_export = ctx->worker_ctx.i_current_pprof; DDProfPProf *pprof = ctx->worker_ctx.pprof[i_export]; - DDRES_CHECK_FWD(pprof_aggregate(&us->output, &us->symbol_hdr, sample_val, 1, + + DDRES_CHECK_FWD(pprof_aggregate_v2(ddprof::span(us->output.callchain, us->output.nb_locs), us->code_cache[sample->pid], sample_val, 1, watcher, pprof)); if (ctx->params.show_samples) { - ddprof_print_sample(us->output, us->symbol_hdr, sample->period, *watcher); + // todo show samples + // ddprof_print_sample(us->output, us->symbol_hdr, sample->period, *watcher); } -#else - // Call the user's stack handler - if (ctx->stack_handler) { - if (!ctx->stack_handler->apply(&us->output, ctx, - ctx->stack_handler->callback_ctx, - watcher_pos)) { - DDRES_RETURN_ERROR_LOG(DD_WHAT_STACK_HANDLE, - "Stack handler returning errors"); - } - } -#endif } DDRES_CHECK_FWD(ddprof_stats_add(STATS_AGGREGATION_AVG_TIME, @@ -353,7 +310,7 @@ DDRes ddprof_worker_cycle(DDProfContext *ctx, int64_t now, return ddres_create(DD_SEVERROR, DD_WHAT_EXPORTER); } - DDRES_CHECK_FWD(report_lost_events(ctx)); + // todo lost events // Dispatch to thread ctx->worker_ctx.exp_error = false; @@ -383,11 +340,10 @@ DDRes ddprof_worker_cycle(DDProfContext *ctx, int64_t now, // Scrape procfs for process usage statistics DDRES_CHECK_FWD(worker_update_stats(&ctx->worker_ctx.proc_status, - &ctx->worker_ctx.us->dso_hdr, cycle_duration)); // And emit diagnostic output (if it's enabled) - print_diagnostics(ctx->worker_ctx.us->dso_hdr); + print_diagnostics(); if (IsDDResNotOK(ddprof_stats_send(ctx->params.internal_stats))) { LG_WRN("Unable to utilize to statsd socket. Suppressing future stats."); free((void *)ctx->params.internal_stats); @@ -397,9 +353,6 @@ DDRes ddprof_worker_cycle(DDProfContext *ctx, int64_t now, // Increase the counts of exports ctx->worker_ctx.count_worker += 1; - // allow new backpopulates - ctx->worker_ctx.us->dso_hdr.reset_backpopulate_state(); - // Update the time last sent ctx->worker_ctx.send_nanos += export_time_convert(ctx->params.upload_period); @@ -424,9 +377,6 @@ void ddprof_pr_mmap(DDProfContext *ctx, const perf_event_mmap2 *map, LG_DBG("<%d>(MAP)%d: %s (%lx/%lx/%lx) %02u:%02u %lu", watcher_pos, map->pid, map->filename, map->addr, map->len, map->pgoff, map->maj, map->min, map->ino); - ddprof::Dso new_dso(map->pid, map->addr, map->addr + map->len - 1, map->pgoff, - std::string(map->filename), true, map->ino); - ctx->worker_ctx.us->dso_hdr.insert_erase_overlap(std::move(new_dso)); } void ddprof_pr_lost(DDProfContext *ctx, const perf_event_lost *lost, diff --git a/src/unwind.cc b/src/unwind.cc index b66951e9f..332bd8fd3 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -7,12 +7,8 @@ #include "ddprof_stats.hpp" #include "ddres.hpp" -#include "dso_hdr.hpp" -#include "dwfl_hdr.hpp" #include "logger.hpp" #include "signal_helper.hpp" -#include "symbol_hdr.hpp" -#include "unwind_dwfl.hpp" #include "unwind_helpers.hpp" #include "unwind_metrics.hpp" #include "unwind_state.hpp" @@ -21,24 +17,19 @@ #include #include +#include "symbols.h" +#include "stack_context.h" +#include "stackWalker.h" + using namespace std::string_view_literals; namespace ddprof { -void unwind_init(void) { elf_version(EV_CURRENT); } - -static void find_dso_add_error_frame(UnwindState *us) { - DsoHdr::DsoFindRes find_res = - us->dso_hdr.dso_find_closest(us->pid, us->current_ip); - add_error_frame(find_res.second ? &(find_res.first->second) : nullptr, us, - us->current_ip); -} void unwind_init_sample(UnwindState *us, uint64_t *sample_regs, pid_t sample_pid, uint64_t sample_size_stack, char *sample_data_stack) { - uw_output_clear(&us->output); - memcpy(&us->initial_regs.regs[0], sample_regs, - K_NB_REGS_UNWIND * sizeof(uint64_t)); + us->output.nb_locs = 0; + memcpy(&us->initial_regs.regs[0], sample_regs,K_NB_REGS_UNWIND * sizeof(uint64_t)); us->current_ip = us->initial_regs.regs[REGNAME(PC)]; us->pid = sample_pid; us->stack_sz = sample_size_stack; @@ -52,76 +43,37 @@ static bool is_ld(const std::string &path) { return path.starts_with("ld-"); } -static bool is_stack_complete(UnwindState *us) { - static constexpr std::array s_expected_root_frames{"_start"sv, "__clone"sv, - "_exit"sv}; - - if (us->output.nb_locs == 0) { - return false; - } - - const auto &root_loc = us->output.locs[us->output.nb_locs - 1]; - const auto &root_mapping = - us->symbol_hdr._mapinfo_table[root_loc._map_info_idx]; - - // If we are in ld.so (eg. during lib init before main) consider the stack as - // complete - if (is_ld(root_mapping._sopath)) { - return true; - } - - const auto &root_func = - us->symbol_hdr._symbol_table[root_loc._symbol_idx]._symname; - return std::find(s_expected_root_frames.begin(), s_expected_root_frames.end(), - root_func) != s_expected_root_frames.end(); -} - DDRes unwindstate__unwind(UnwindState *us) { DDRes res = ddres_init(); if (us->pid != 0) { // we can not unwind pid 0 - - } - if (IsDDResNotOK(res)) { - find_dso_add_error_frame(us); - } - - if (!is_stack_complete(us)) { - us->output.is_incomplete = true; - ddprof_stats_add(STATS_UNWIND_INCOMPLETE_STACK, 1, nullptr); - // Only add [incomplete] virtual frame if stack is not already truncated ! - if (!is_max_stack_depth_reached(*us)) { - add_common_frame(us, SymbolErrors::incomplete_stack); + CodeCacheArray &code_cache_array = us->code_cache[us->pid]; + if (!code_cache_array.count()) { + // No libraries + Symbols::parsePidLibraries(us->pid, &us->code_cache[us->pid], false); + // todo how do we avoid bouncing on this ? } - } else { - us->output.is_incomplete = false; + ddprof::span regs_span{us->initial_regs.regs, PERF_REGS_COUNT}; + ap::StackContext sc = ap::from_regs(regs_span); + ddprof::span stack{ reinterpret_cast(us->stack), us->stack_sz}; + ap::StackBuffer buffer(stack, sc.sp, sc.sp + us->stack_sz); + + // todo remove char* in favour of uint64 + us->output.nb_locs = stackWalk(&code_cache_array, sc, buffer, (us->output.callchain), DD_MAX_STACK_DEPTH, 0); } - ddprof_stats_add(STATS_UNWIND_AVG_STACK_DEPTH, us->output.nb_locs, nullptr); + // todo error management (error frame) // Add a frame that identifies executable to which these belong - add_virtual_base_frame(us); - if (us->_dwfl_wrapper->_inconsistent) { - // error detected on this pid - LG_WRN("(Inconsistent DWFL/DSOs)%d - Free associated objects", us->pid); - unwind_pid_free(us, us->pid); - } + // todo base frame + return res; } void unwind_pid_free(UnwindState *us, pid_t pid) { - us->dso_hdr.pid_free(pid); - us->dwfl_hdr.clear_pid(pid); - us->symbol_hdr.clear(pid); + us->code_cache.erase(pid); } -void unwind_cycle(UnwindState *us) { - us->symbol_hdr.display_stats(); - us->symbol_hdr.cycle(); - // clean up pids that we did not see recently - us->dwfl_hdr.display_stats(); - us->dwfl_hdr.clear_unvisited(); - - us->dso_hdr._stats.reset(); +void unwind_cycle(UnwindState *) { unwind_metrics_reset(); } diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc deleted file mode 100644 index 872fbf5b0..000000000 --- a/src/unwind_dwfl.cc +++ /dev/null @@ -1,274 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. This product includes software -// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present -// Datadog, Inc. - -#include "unwind_dwfl.hpp" - -#include "ddprof_stats.hpp" -#include "ddres.hpp" -#include "dwfl_internals.hpp" -#include "dwfl_thread_callbacks.hpp" -#include "logger.hpp" -#include "runtime_symbol_lookup.hpp" -#include "symbol_hdr.hpp" -#include "unwind_helpers.hpp" -#include "unwind_state.hpp" - -int frame_cb(Dwfl_Frame *, void *); - -namespace ddprof { - -DDRes unwind_init_dwfl(UnwindState *us) { - // Create or get the dwfl object associated to cache - us->_dwfl_wrapper = &(us->dwfl_hdr.get_or_insert(us->pid)); - if (!us->_dwfl_wrapper->_attached) { - // we need to add at least one module to figure out the architecture (to - // create the unwinding backend) - - DsoHdr::DsoMap &map = us->dso_hdr._map[us->pid]; - if (map.empty()) { - int nb_elts; - us->dso_hdr.pid_backpopulate(us->pid, nb_elts); - } - - bool success = false; - // Find an elf file we can load for this PID - for (auto it = map.cbegin(); it != map.cend(); ++it) { - const Dso &dso = it->second; - if (dso._executable) { - FileInfoId_t file_info_id = us->dso_hdr.get_or_insert_file_info(dso); - if (file_info_id <= k_file_info_error) { - LG_DBG("Unable to find file for DSO %s", dso.to_string().c_str()); - continue; - } - const FileInfoValue &file_info_value = - us->dso_hdr.get_file_info_value(file_info_id); - - DDProfMod *ddprof_mod = us->_dwfl_wrapper->register_mod( - us->current_ip, dso, file_info_value); - if (ddprof_mod) { - // one success is fine - success = true; - break; - } - } - } - if (!success) { - LG_DBG("Unable to attach a mod for PID%d", us->pid); - return ddres_warn(DD_WHAT_UW_ERROR); - } - - static const Dwfl_Thread_Callbacks dwfl_callbacks = { - .next_thread = next_thread, - .get_thread = nullptr, - .memory_read = memory_read_dwfl, - .set_initial_registers = set_initial_registers, - .detach = nullptr, - .thread_detach = nullptr, - }; - // Creates the dwfl unwinding backend - return us->_dwfl_wrapper->attach(us->pid, &dwfl_callbacks, us); - } - return ddres_init(); -} - -static void trace_unwinding_end(UnwindState *us) { - if (LL_DEBUG <= LOG_getlevel()) { - DsoHdr::DsoFindRes find_res = - us->dso_hdr.dso_find_closest(us->pid, us->current_ip); - if (find_res.second) { - LG_DBG("Stopped at %lx - dso %s - error %s", us->current_ip, - find_res.first->second.to_string().c_str(), dwfl_errmsg(-1)); - } else { - LG_DBG("Unknown DSO %lx - error %s", us->current_ip, dwfl_errmsg(-1)); - } - } -} -static DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, - const DDProfMod &ddprof_mod, - FileInfoId_t file_info_id); - -// check for runtime symbols provided in /tmp files -static DDRes add_runtime_symbol_frame(UnwindState *us, const Dso &dso, - ElfAddress_t pc); - -// returns an OK status if we should continue unwinding -static DDRes add_symbol(Dwfl_Frame *dwfl_frame, UnwindState *us) { - - if (is_max_stack_depth_reached(*us)) { - add_common_frame(us, SymbolErrors::truncated_stack); - LG_DBG("Max stack depth reached (depth#%lu)", us->output.nb_locs); - ddprof_stats_add(STATS_UNWIND_TRUNCATED_OUTPUT, 1, nullptr); - return ddres_warn(DD_WHAT_UW_MAX_DEPTH); - } - - Dwarf_Addr pc = 0; - if (!dwfl_frame_pc(dwfl_frame, &pc, nullptr)) { - LG_DBG("Failure to compute frame PC: %s (depth#%lu)", dwfl_errmsg(-1), - us->output.nb_locs); - add_error_frame(nullptr, us, pc, SymbolErrors::dwfl_frame); - return ddres_init(); // invalid pc : do not add frame - } - us->current_ip = pc; - - DsoHdr::DsoFindRes find_res = - us->dso_hdr.dso_find_or_backpopulate(us->pid, pc); - if (!find_res.second) { - // no matching file was found - LG_DBG("[UW] (PID%d) DSO not found at 0x%lx (depth#%lu)", us->pid, pc, - us->output.nb_locs); - add_error_frame(nullptr, us, pc, SymbolErrors::unknown_dso); - return ddres_init(); - } - const Dso &dso = find_res.first->second; - if (dso::has_runtime_symbols(dso._type)) { - return add_runtime_symbol_frame(us, dso, pc); - } - // if not encountered previously, update file location / key - FileInfoId_t file_info_id = us->dso_hdr.get_or_insert_file_info(dso); - if (file_info_id <= k_file_info_error) { - // unable to acces file: add available info from dso - add_dso_frame(us, dso, pc, "pc"); - // We could stop here or attempt to continue in the dwarf unwinding - // sometimes frame pointer lets us go further -> So we continue - return ddres_init(); - } - const FileInfoValue &file_info_value = - us->dso_hdr.get_file_info_value(file_info_id); - DDProfMod *ddprof_mod = us->_dwfl_wrapper->unsafe_get(file_info_id); - if (!ddprof_mod) { - // ensure unwinding backend has access to this module (and check - // consistency) - ddprof_mod = us->_dwfl_wrapper->register_mod(pc, dso, file_info_value); - // Updates in DSO layout can create inconsistencies - if (!ddprof_mod) { - return ddres_warn(DD_WHAT_UW_ERROR); - } - } - - // To check that we are in an activation frame, we unwind the current frame - // This means we need access to the module information. - // Now that we have loaded the module, we can check if we are an activation - // frame - bool isactivation = false; - - if (!dwfl_frame_pc(dwfl_frame, &pc, &isactivation)) { - LG_DBG("Failure to compute frame PC: %s (depth#%lu)", dwfl_errmsg(-1), - us->output.nb_locs); - add_error_frame(nullptr, us, pc, SymbolErrors::dwfl_frame); - return ddres_init(); // invalid pc : do not add frame - } - if (!isactivation) - --pc; - us->current_ip = pc; - - // Now we register - if (IsDDResNotOK(add_dwfl_frame(us, dso, pc, *ddprof_mod, file_info_id))) { - return ddres_warn(DD_WHAT_UW_ERROR); - } - return ddres_init(); -} - -bool is_infinite_loop(UnwindState *us) { - UnwindOutput &output = us->output; - uint64_t nb_locs = output.nb_locs; - unsigned nb_frames_to_check = 3; - if (nb_locs <= nb_frames_to_check) { - return false; - } - for (unsigned i = 0; i < nb_frames_to_check; ++i) { - FunLoc &n_minus_one_loc = output.locs[nb_locs - i]; - FunLoc &n_minus_two_loc = output.locs[nb_locs - i - 1]; - if (n_minus_one_loc.ip != n_minus_two_loc.ip) { - return false; - } - } - return true; -} - -// frame_cb callback at every frame for the dwarf unwinding -static int frame_cb(Dwfl_Frame *dwfl_frame, void *arg) { - UnwindState *us = (UnwindState *)arg; -#ifdef DEBUG - LG_NFO("Beging depth %lu", us->output.nb_locs); -#endif - int dwfl_error_value = dwfl_errno(); - if (dwfl_error_value) { - // Check if dwarf unwinding was a failure we can get stuck in infinite loops - if (is_infinite_loop(us)) { - LG_DBG("Break out of unwinding (possible infinite loop)"); - return DWARF_CB_ABORT; - } - } -#ifdef DEBUG - // We often fallback to frame pointer unwinding (which logs an error) - if (dwfl_error_value) { - LG_DBG("Error flagged at depth = %lu -- Error:%s ", us->output.nb_locs, - dwfl_errmsg(dwfl_error_value)); - } -#endif - - // Before we potentially exit, record the fact that we're processing a frame - ddprof_stats_add(STATS_UNWIND_FRAMES, 1, NULL); - - if (IsDDResNotOK(add_symbol(dwfl_frame, us))) { - return DWARF_CB_ABORT; - } - - return DWARF_CB_OK; -} - -DDRes unwind_dwfl(UnwindState *us) { - DDRes res = unwind_init_dwfl(us); - if (!IsDDResOK(res)) { - LOG_ERROR_DETAILS(LG_DBG, res._what); - return res; - } - // - // Launch the dwarf unwinding (uses frame_cb callback) - if (dwfl_getthread_frames(us->_dwfl_wrapper->_dwfl, us->pid, frame_cb, us) != - 0) { - trace_unwinding_end(us); - } - res = us->output.nb_locs > 0 ? ddres_init() - : ddres_warn(DD_WHAT_DWFL_LIB_ERROR); - return res; -} - -static DDRes add_dwfl_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, - const DDProfMod &ddprof_mod, - FileInfoId_t file_info_id) { - - SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; - - // get or create the dwfl symbol - SymbolIdx_t symbol_idx = unwind_symbol_hdr._dwfl_symbol_lookup.get_or_insert( - ddprof_mod, unwind_symbol_hdr._symbol_table, - unwind_symbol_hdr._dso_symbol_lookup, file_info_id, pc, dso); - MapInfoIdx_t map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( - us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); - return add_frame(symbol_idx, map_idx, pc, us); -} - -// check for runtime symbols provided in /tmp files -static DDRes add_runtime_symbol_frame(UnwindState *us, const Dso &dso, - ElfAddress_t pc) { - SymbolHdr &unwind_symbol_hdr = us->symbol_hdr; - SymbolTable &symbol_table = unwind_symbol_hdr._symbol_table; - RuntimeSymbolLookup &runtime_symbol_lookup = - unwind_symbol_hdr._runtime_symbol_lookup; - SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(dso._pid, pc, symbol_table); - if (symbol_idx == -1) { - add_dso_frame(us, dso, pc, "pc"); - return ddres_init(); - } - - MapInfoIdx_t map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( - us->pid, us->symbol_hdr._mapinfo_table, dso, {}); - - return add_frame(symbol_idx, map_idx, pc, us); -} - -} // namespace ddprof diff --git a/src/unwind_helpers.cc b/src/unwind_helpers.cc index 7630977c1..9fc3a0c14 100644 --- a/src/unwind_helpers.cc +++ b/src/unwind_helpers.cc @@ -18,61 +18,6 @@ bool is_max_stack_depth_reached(const UnwindState &us) { return us.output.nb_locs + 2 >= DD_MAX_STACK_DEPTH; } -DDRes add_frame(SymbolIdx_t symbol_idx, MapInfoIdx_t map_idx, ElfAddress_t pc, - UnwindState *us) { - UnwindOutput *output = &us->output; - int64_t current_loc_idx = output->nb_locs; - if (output->nb_locs >= DD_MAX_STACK_DEPTH) { - DDRES_RETURN_WARN_LOG(DD_WHAT_UW_MAX_DEPTH, - "Max stack depth reached"); // avoid overflow - } - - output->locs[current_loc_idx]._symbol_idx = symbol_idx; - output->locs[current_loc_idx].ip = pc; - if (map_idx == -1) { - // just add an empty element for mapping info - output->locs[current_loc_idx]._map_info_idx = - us->symbol_hdr._common_mapinfo_lookup.get_or_insert( - CommonMapInfoLookup::MappingErrors::empty, - us->symbol_hdr._mapinfo_table); - } else { - output->locs[current_loc_idx]._map_info_idx = map_idx; - } -#ifdef DEBUG - LG_NTC("Considering frame with IP : %lx / %s ", pc, - us->symbol_hdr._symbol_table[output->locs[current_loc_idx]._symbol_idx] - ._symname.c_str()); -#endif - output->nb_locs++; - return ddres_init(); -} - -static void add_frame_without_mapping(UnwindState *us, SymbolIdx_t symbol_idx) { - add_frame(symbol_idx, -1, 0, us); -} - -void add_common_frame(UnwindState *us, SymbolErrors lookup_case) { - add_frame_without_mapping(us, - us->symbol_hdr._common_symbol_lookup.get_or_insert( - lookup_case, us->symbol_hdr._symbol_table)); -} - -void add_dso_frame(UnwindState *us, const Dso &dso, - ElfAddress_t normalized_addr, std::string_view addr_type) { - add_frame_without_mapping( - us, - us->symbol_hdr._dso_symbol_lookup.get_or_insert( - normalized_addr, dso, us->symbol_hdr._symbol_table, addr_type)); -} - -void add_virtual_base_frame(UnwindState *us) { - add_frame_without_mapping( - us, - us->symbol_hdr._base_frame_symbol_lookup.get_or_insert( - us->pid, us->symbol_hdr._symbol_table, - us->symbol_hdr._dso_symbol_lookup, us->dso_hdr)); -} - // read a word from the given stack bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, void *arg) { @@ -216,22 +161,4 @@ bool memory_read(ProcessAddress_t addr, ElfWord_t *result, int regno, *result = *(ElfWord_t *)(us->stack + stack_idx); return true; } - -void add_error_frame(const Dso *dso, UnwindState *us, - [[maybe_unused]] ProcessAddress_t pc, - SymbolErrors error_case) { - ddprof_stats_add(STATS_UNWIND_ERRORS, 1, NULL); - if (dso) { -// #define ADD_ADDR_IN_SYMB // creates more elements (but adds info on -// addresses) -#ifdef ADD_ADDR_IN_SYMB - add_dso_frame(us, *dso, pc, "pc"); -#else - add_dso_frame(us, *dso, 0x0, "pc"); -#endif - } else { - add_common_frame(us, error_case); - } - LG_DBG("Error frame (depth#%lu)", us->output.nb_locs); -} } // namespace ddprof diff --git a/src/unwind_output.cc b/src/unwind_output.cc index 34a927dbb..1188de7ce 100644 --- a/src/unwind_output.cc +++ b/src/unwind_output.cc @@ -8,12 +8,3 @@ #include "unwind_output.hpp" -static void FunLoc_clear(FunLoc *locs) { - memset(locs, 0, sizeof(*locs) * DD_MAX_STACK_DEPTH); -} - -void uw_output_clear(UnwindOutput *output) { - FunLoc_clear(output->locs); - output->nb_locs = 0; - output->is_incomplete = true; -} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 921e5ef91..892c1a982 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -207,83 +207,6 @@ add_unit_test( add_compile_definitions("DWFL_TEST_DATA=\"${CMAKE_CURRENT_SOURCE_DIR}/data\"") set_property(TARGET dwfl_module-ut PROPERTY POSITION_INDEPENDENT_CODE TRUE) -add_unit_test( - savecontext-ut - savecontext-ut.cc - ../src/base_frame_symbol_lookup.cc - ../src/build_id.cc - ../src/common_mapinfo_lookup.cc - ../src/common_symbol_lookup.cc - ../src/ddprof_file_info.cc - ../src/ddprof_stats.cc - ../src/dso.cc - ../src/dso_hdr.cc - ../src/dso_symbol_lookup.cc - ../src/dwfl_hdr.cc - ../src/ddprof_module_lib.cc - ../src/dwfl_symbol.cc - ../src/dwfl_symbol_lookup.cc - ../src/dwfl_thread_callbacks.cc - ../src/failed_assumption.cc - ../src/lib/savecontext.cc - ../src/lib/saveregisters.cc - ../src/mapinfo_lookup.cc - ../src/procutils.cc - ../src/runtime_symbol_lookup.cc - ../src/symbol_map.cc - ../src/signal_helper.cc - ../src/statsd.cc - ../src/unwind.cc - ../src/unwind_dwfl.cc - ../src/unwind_helpers.cc - ../src/unwind_metrics.cc - ../src/unwind_output.cc - ../src/user_override.cc - LIBRARIES ${ELFUTILS_LIBRARIES} llvm-demangle - DEFINITIONS MYNAME="savecontext-ut") - -add_unit_test( - allocation_tracker-ut - allocation_tracker-ut.cc - ../src/lib/allocation_tracker.cc - ../src/base_frame_symbol_lookup.cc - ../src/build_id.cc - ../src/common_mapinfo_lookup.cc - ../src/common_symbol_lookup.cc - ../src/ddprof_file_info.cc - ../src/ddprof_stats.cc - ../src/dso.cc - ../src/dso_hdr.cc - ../src/dso_symbol_lookup.cc - ../src/dwfl_hdr.cc - ../src/ddprof_module_lib.cc - ../src/dwfl_symbol.cc - ../src/dwfl_symbol_lookup.cc - ../src/dwfl_thread_callbacks.cc - ../src/failed_assumption.cc - ../src/pevent_lib.cc - ../src/perf.cc - ../src/perf_ringbuffer.cc - ../src/perf_watcher.cc - ../src/ringbuffer_utils.cc - ../src/lib/savecontext.cc - ../src/lib/saveregisters.cc - ../src/mapinfo_lookup.cc - ../src/procutils.cc - ../src/runtime_symbol_lookup.cc - ../src/symbol_map.cc - ../src/signal_helper.cc - ../src/statsd.cc - ../src/sys_utils.cc - ../src/user_override.cc - ../src/unwind.cc - ../src/unwind_dwfl.cc - ../src/unwind_helpers.cc - ../src/unwind_metrics.cc - ../src/unwind_output.cc - LIBRARIES ${ELFUTILS_LIBRARIES} llvm-demangle - DEFINITIONS ${DDPROF_DEFINITION_LIST}) - add_unit_test(sys_utils-ut sys_utils-ut.cc ../src/sys_utils.cc) add_unit_test( diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index bb48552ef..9e4d61e3d 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -9,7 +9,6 @@ #include "pevent_lib_mocks.hpp" #include "pprof/ddprof_pprof.hpp" #include "tags.hpp" -#include "unwind_output_mock.hpp" #include #include @@ -144,7 +143,7 @@ TEST(DDProfExporter, simple) { SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; - fill_unwind_symbols(table, mapinfo_table, mock_output); +// fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfContext ctx = {}; ctx.watchers[0] = *ewatcher_from_str("sCPU"); @@ -152,8 +151,8 @@ TEST(DDProfExporter, simple) { res = pprof_create_profile(&pprofs, &ctx); EXPECT_TRUE(IsDDResOK(res)); - res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], - &pprofs); +// res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], +// &pprofs); EXPECT_TRUE(IsDDResOK(res)); } { diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 21d06bf3b..5fdcb4634 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -10,7 +10,6 @@ #include "loghandle.hpp" #include "pevent_lib_mocks.hpp" #include "symbol_hdr.hpp" -#include "unwind_output_mock.hpp" #include #include @@ -67,15 +66,15 @@ TEST(DDProfPProf, aggregate) { SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; - fill_unwind_symbols(table, mapinfo_table, mock_output); + // fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfPProf pprof; DDProfContext ctx = {}; ctx.watchers[0] = *ewatcher_from_str("sCPU"); ctx.num_watchers = 1; DDRes res = pprof_create_profile(&pprof, &ctx); EXPECT_TRUE(IsDDResOK(res)); - res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], - &pprof); +// res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], +// &pprof); EXPECT_TRUE(IsDDResOK(res)); diff --git a/test/unwind_output_mock.hpp b/test/unwind_output_mock.hpp index af0c072c1..1f83e8689 100644 --- a/test/unwind_output_mock.hpp +++ b/test/unwind_output_mock.hpp @@ -43,7 +43,7 @@ static inline void fill_mapinfo_table_1(MapInfoTable &mapinfo_table) { } static inline void fill_unwind_output_1(UnwindOutput &uw_output) { - uw_output_clear(&uw_output); + // uw_output_clear(&uw_output); uw_output.nb_locs = K_MOCK_LOC_SIZE; FunLoc *locs = uw_output.locs; From 78e09574ce9c390fed4f67c98effad1f40d85162 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 21 Nov 2022 22:07:57 +0100 Subject: [PATCH 12/29] Adding a benchmark for the async profiler --- CMakeLists.txt | 10 ++-- include/async-profiler/stackWalker.h | 2 - include/async-profiler/stack_context.h | 2 +- include/async-profiler/symbols.h | 3 +- include/lib/saveregisters.hpp | 2 +- include/pprof/ddprof_pprof.hpp | 7 ++- include/span.hpp | 4 +- include/unwind_output.hpp | 2 +- include/unwind_state.hpp | 7 +-- src/async-profiler/symbols_linux.cpp | 8 +-- src/ddprof_worker.cc | 15 +++-- src/pprof/ddprof_pprof.cc | 17 +++--- src/unwind.cc | 25 +++++---- src/unwind_output.cc | 1 - test/CMakeLists.txt | 34 ++++++++++-- test/async_prof-bench.cc | 76 ++++++++++++++++++++++++++ test/ddprof_exporter-ut.cc | 7 ++- test/ddprof_pprof-ut.cc | 5 +- test/dwarf_unwind-ut.cc | 58 +++++++++++--------- 19 files changed, 196 insertions(+), 89 deletions(-) create mode 100644 test/async_prof-bench.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 140c1a460..139fb398d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,13 +80,14 @@ else() endif() # ---- Dependencies ---- -## -- Async profiler -- +# -- Async profiler -- set(ASYNC_PROFILER_LIB_DIR ${CMAKE_SOURCE_DIR}) set(ASYNC_PROFILER_SRC_DIR ${ASYNC_PROFILER_LIB_DIR}/src/async-profiler) -set(ASYNC_PROFILER_LIB_INCLUDE ${ASYNC_PROFILER_LIB_DIR}/include/async-profiler) +set(ASYNC_PROFILER_LIB_INCLUDE ${ASYNC_PROFILER_LIB_DIR}/include/async-profiler) aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) -target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} ${CMAKE_SOURCE_DIR}/include) +target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} + ${CMAKE_SOURCE_DIR}/include) set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(DDProf::AsyncProf ALIAS async_prof_lib) # ------------------- @@ -155,7 +156,8 @@ aux_source_directory(src/exe EXE_SRC) # Define all sources set(DDPROF_GLOBAL_SRC ${COMMON_SRC} ${PPROF_SRC} ${EXPORTER_SRC} ${EXE_SRC}) -set(DDPROF_LIBRARY_LIST DDProf::Parser DDProf::AsyncProf llvm-demangle ${ELFUTILS_LIBRARIES} Threads::Threads) +set(DDPROF_LIBRARY_LIST DDProf::Parser DDProf::AsyncProf llvm-demangle ${ELFUTILS_LIBRARIES} + Threads::Threads) if(ON) # Add the rust library - Refactoring ongoing. OFF for now diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h index e30c9c8fb..60998482a 100644 --- a/include/async-profiler/stackWalker.h +++ b/include/async-profiler/stackWalker.h @@ -25,8 +25,6 @@ #include "codeCache.h" #include "stack_context.h" - - CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address); int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, diff --git a/include/async-profiler/stack_context.h b/include/async-profiler/stack_context.h index 35b9cd057..92eecd86f 100644 --- a/include/async-profiler/stack_context.h +++ b/include/async-profiler/stack_context.h @@ -2,8 +2,8 @@ #pragma once -#include #include "span.hpp" +#include #include "perf_archmap.hpp" diff --git a/include/async-profiler/symbols.h b/include/async-profiler/symbols.h index d0df18428..b06cc2b53 100644 --- a/include/async-profiler/symbols.h +++ b/include/async-profiler/symbols.h @@ -28,7 +28,8 @@ class Symbols { public: static void parseKernelSymbols(CodeCache *cc); static void parseLibraries(CodeCacheArray *array, bool kernel_symbols); - static void parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_symbols); + static void parsePidLibraries(pid_t pid, CodeCacheArray *array, + bool kernel_symbols); static bool haveKernelSymbols() { return _have_kernel_symbols; } }; diff --git a/include/lib/saveregisters.hpp b/include/lib/saveregisters.hpp index 24a368fe4..e5f7d0e2c 100644 --- a/include/lib/saveregisters.hpp +++ b/include/lib/saveregisters.hpp @@ -23,10 +23,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include #include "ddprof_base.hpp" #include "perf_archmap.hpp" #include "span.hpp" +#include #if defined(__x86_64__) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index 268f0a698..028cbebf9 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -12,8 +12,8 @@ #include "tags.hpp" #include "unwind_output.hpp" -#include "span.hpp" #include "async-profiler/codeCache.h" +#include "span.hpp" struct ddog_Profile; struct SymbolHdr; @@ -28,8 +28,9 @@ struct DDProfPProf { DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext *ctx); -DDRes pprof_aggregate_v2(ddprof::span callchain, CodeCacheArray &cache_arary, - uint64_t value, uint64_t count, const PerfWatcher *watcher, +DDRes pprof_aggregate_v2(ddprof::span callchain, + CodeCacheArray &cache_arary, uint64_t value, + uint64_t count, const PerfWatcher *watcher, DDProfPProf *pprof); /** diff --git a/include/span.hpp b/include/span.hpp index 6caa05010..0338a65f1 100644 --- a/include/span.hpp +++ b/include/span.hpp @@ -5,12 +5,10 @@ #pragma once - -# include +#include namespace ddprof { using std::as_bytes; using std::as_writable_bytes; using std::span; } // namespace ddprof - diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 62eff7046..39cd2a2dd 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -28,7 +28,7 @@ typedef struct UnwindOutput { } UnwindOutput; struct UnwindOutput_V2 { - const void* callchain[DD_MAX_STACK_DEPTH]; + const void *callchain[DD_MAX_STACK_DEPTH]; uint64_t nb_locs; int pid = {}; int tid = {}; diff --git a/include/unwind_state.hpp b/include/unwind_state.hpp index a49a9105c..502026003 100644 --- a/include/unwind_state.hpp +++ b/include/unwind_state.hpp @@ -5,6 +5,7 @@ #pragma once +#include "async-profiler/codeCache.h" #include "ddprof_defs.hpp" #include "ddres_def.hpp" #include "dso_hdr.hpp" @@ -14,7 +15,6 @@ #include "perf_archmap.hpp" #include "symbol_hdr.hpp" #include "unwind_output.hpp" -#include "async-profiler/codeCache.h" #include @@ -38,8 +38,7 @@ struct UnwindRegisters { /// given through callbacks struct UnwindState { explicit UnwindState(int dd_profiling_fd = -1) - : pid(-1), - stack(nullptr), stack_sz(0), current_ip(0) {} + : pid(-1), stack(nullptr), stack_sz(0), current_ip(0) {} pid_t pid; char *stack; @@ -48,7 +47,7 @@ struct UnwindState { UnwindRegisters initial_regs; ProcessAddress_t current_ip; - std::unordered_map code_cache; + std::unordered_map code_cache; UnwindOutput_V2 output; }; diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 2807c5f8f..0c536153e 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -509,13 +509,14 @@ void Symbols::parseKernelSymbols(CodeCache *cc) { // XXX(nick): omitted } - -void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_symbols) { +void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, + bool kernel_symbols) { std::set parsed_libraries; std::set parsed_inodes; MutexLocker ml(_parse_lock); char proc_map_filename[1024] = {}; - snprintf(proc_map_filename, std::size(proc_map_filename), "%s/proc/%d/maps", "", pid); + snprintf(proc_map_filename, std::size(proc_map_filename), "%s/proc/%d/maps", + "", pid); // todo plug the proc_map open functions (handles user switches) FILE *f = fopen(proc_map_filename, "r"); if (f == NULL) { @@ -552,7 +553,6 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_sy } CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); - unsigned long inode = map.inode(); if (inode != 0) { // Do not parse the same executable twice, e.g. on Alpine Linux diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 3aaddef15..77a52c798 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -125,7 +125,8 @@ DDRes worker_library_free(DDProfContext *ctx) { } /// Retrieve cpu / memory info -static DDRes worker_update_stats(ProcStatus *procstat, std::chrono::nanoseconds cycle_duration) { +static DDRes worker_update_stats(ProcStatus *procstat, + std::chrono::nanoseconds cycle_duration) { // Update the procstats, but first snapshot the utime so we can compute the // diff for the utime metric int64_t cpu_time_old = procstat->utime + procstat->stime; @@ -240,11 +241,13 @@ DDRes ddprof_pr_sample(DDProfContext *ctx, perf_event_sample *sample, int i_export = ctx->worker_ctx.i_current_pprof; DDProfPProf *pprof = ctx->worker_ctx.pprof[i_export]; - DDRES_CHECK_FWD(pprof_aggregate_v2(ddprof::span(us->output.callchain, us->output.nb_locs), us->code_cache[sample->pid], sample_val, 1, - watcher, pprof)); + DDRES_CHECK_FWD(pprof_aggregate_v2( + ddprof::span(us->output.callchain, us->output.nb_locs), + us->code_cache[sample->pid], sample_val, 1, watcher, pprof)); if (ctx->params.show_samples) { // todo show samples - // ddprof_print_sample(us->output, us->symbol_hdr, sample->period, *watcher); + // ddprof_print_sample(us->output, us->symbol_hdr, sample->period, + // *watcher); } } @@ -339,8 +342,8 @@ DDRes ddprof_worker_cycle(DDProfContext *ctx, int64_t now, ctx->worker_ctx.cycle_start_time = cycle_now; // Scrape procfs for process usage statistics - DDRES_CHECK_FWD(worker_update_stats(&ctx->worker_ctx.proc_status, - cycle_duration)); + DDRES_CHECK_FWD( + worker_update_stats(&ctx->worker_ctx.proc_status, cycle_duration)); // And emit diagnostic output (if it's enabled) print_diagnostics(); diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index a32b7eeeb..08dab4192 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -168,9 +168,7 @@ static void write_line(const ddprof::Symbol &symbol, ddog_Line *ffi_line) { ffi_line->line = symbol._lineno; } - -static void write_location_v2(const void* ip, - const ddog_Slice_line *lines, +static void write_location_v2(const void *ip, const ddog_Slice_line *lines, ddog_Location *ffi_location) { ffi_location->address = reinterpret_cast(ip); ffi_location->lines = *lines; @@ -178,12 +176,11 @@ static void write_location_v2(const void* ip, ffi_location->is_folded = false; } -static void write_function_v2(const char* func, - ddog_Function *ffi_func) { +static void write_function_v2(const char *func, ddog_Function *ffi_func) { ffi_func->name = to_CharSlice(string_view_create_strlen(func)); } -static void write_line_v2(const char* func, ddog_Line *ffi_line) { +static void write_line_v2(const char *func, ddog_Line *ffi_line) { write_function_v2(func, &ffi_line->function); ffi_line->line = 0; } @@ -191,9 +188,10 @@ static void write_line_v2(const char* func, ddog_Line *ffi_line) { #include "async-profiler/codeCache.h" #include "async-profiler/stackWalker.h" -DDRes pprof_aggregate_v2(ddprof::span callchain, CodeCacheArray &cache_arary, - uint64_t value, uint64_t count, const PerfWatcher *watcher, - DDProfPProf *pprof) { +DDRes pprof_aggregate_v2(ddprof::span callchain, + CodeCacheArray &cache_arary, uint64_t value, + uint64_t count, const PerfWatcher *watcher, + DDProfPProf *pprof) { ddog_Profile *profile = pprof->_profile; int64_t values[DDPROF_PWT_LENGTH] = {}; @@ -250,7 +248,6 @@ DDRes pprof_aggregate_v2(ddprof::span callchain, CodeCacheArray &c } return ddres_init(); - } // Assumption of API is that sample is valid in a single type diff --git a/src/unwind.cc b/src/unwind.cc index 332bd8fd3..8642fae13 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -17,9 +17,9 @@ #include #include -#include "symbols.h" -#include "stack_context.h" #include "stackWalker.h" +#include "stack_context.h" +#include "symbols.h" using namespace std::string_view_literals; @@ -29,7 +29,8 @@ void unwind_init_sample(UnwindState *us, uint64_t *sample_regs, pid_t sample_pid, uint64_t sample_size_stack, char *sample_data_stack) { us->output.nb_locs = 0; - memcpy(&us->initial_regs.regs[0], sample_regs,K_NB_REGS_UNWIND * sizeof(uint64_t)); + memcpy(&us->initial_regs.regs[0], sample_regs, + K_NB_REGS_UNWIND * sizeof(uint64_t)); us->current_ip = us->initial_regs.regs[REGNAME(PC)]; us->pid = sample_pid; us->stack_sz = sample_size_stack; @@ -53,13 +54,17 @@ DDRes unwindstate__unwind(UnwindState *us) { // todo how do we avoid bouncing on this ? } - ddprof::span regs_span{us->initial_regs.regs, PERF_REGS_COUNT}; + ddprof::span regs_span{us->initial_regs.regs, + PERF_REGS_COUNT}; ap::StackContext sc = ap::from_regs(regs_span); - ddprof::span stack{ reinterpret_cast(us->stack), us->stack_sz}; + ddprof::span stack{reinterpret_cast(us->stack), + us->stack_sz}; ap::StackBuffer buffer(stack, sc.sp, sc.sp + us->stack_sz); // todo remove char* in favour of uint64 - us->output.nb_locs = stackWalk(&code_cache_array, sc, buffer, (us->output.callchain), DD_MAX_STACK_DEPTH, 0); + us->output.nb_locs = + stackWalk(&code_cache_array, sc, buffer, (us->output.callchain), + DD_MAX_STACK_DEPTH, 0); } // todo error management (error frame) @@ -69,12 +74,8 @@ DDRes unwindstate__unwind(UnwindState *us) { return res; } -void unwind_pid_free(UnwindState *us, pid_t pid) { - us->code_cache.erase(pid); -} +void unwind_pid_free(UnwindState *us, pid_t pid) { us->code_cache.erase(pid); } -void unwind_cycle(UnwindState *) { - unwind_metrics_reset(); -} +void unwind_cycle(UnwindState *) { unwind_metrics_reset(); } } // namespace ddprof diff --git a/src/unwind_output.cc b/src/unwind_output.cc index 1188de7ce..188932f7a 100644 --- a/src/unwind_output.cc +++ b/src/unwind_output.cc @@ -7,4 +7,3 @@ #include #include "unwind_output.hpp" - diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 892c1a982..02a7b3362 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -240,12 +240,38 @@ add_benchmark(timer-bench timer-bench.cc ../src/timer.cc ../src/perf.cc) message(STATUS "Async profiler" ${ASYNC_PROFILER_SRC}) add_unit_test( - dwarf_unwind-ut dwarf_unwind-ut.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc - ../src/async-profiler/stack_context.cpp ../src/lib/allocation_tracker.cc ../src/ringbuffer_utils.cc ../src/perf_ringbuffer.cc - ../src/perf.cc ../src/pevent_lib.cc ../src/sys_utils.cc ../src/user_override.cc ../src/perf_watcher.cc - ${ASYNC_PROFILER_SRC} LIBRARIES ${ELFUTILS_LIBRARIES}) + dwarf_unwind-ut + dwarf_unwind-ut.cc + ../src/lib/savecontext.cc + ../src/lib/saveregisters.cc + ../src/async-profiler/stack_context.cpp + ../src/lib/allocation_tracker.cc + ../src/ringbuffer_utils.cc + ../src/perf_ringbuffer.cc + ../src/perf.cc + ../src/pevent_lib.cc + ../src/sys_utils.cc + ../src/user_override.cc + ../src/perf_watcher.cc + LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf) target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) +add_benchmark( + async_prof-bench + async_prof-bench.cc + ../src/lib/savecontext.cc + ../src/lib/saveregisters.cc + ../src/async-profiler/stack_context.cpp + ../src/lib/allocation_tracker.cc + ../src/ringbuffer_utils.cc + ../src/perf_ringbuffer.cc + ../src/perf.cc + ../src/pevent_lib.cc + ../src/sys_utils.cc + ../src/user_override.cc + ../src/perf_watcher.cc + LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf) + if(NOT CMAKE_BUILD_TYPE STREQUAL "SanitizedDebug") add_exe( simple_malloc-static simple_malloc.cc diff --git a/test/async_prof-bench.cc b/test/async_prof-bench.cc new file mode 100644 index 000000000..6252f1d5e --- /dev/null +++ b/test/async_prof-bench.cc @@ -0,0 +1,76 @@ + +#include + +#include "savecontext.hpp" +#include "stackWalker.h" +#include "unwind_state.hpp" + +#include "async-profiler/codeCache.h" +#include "async-profiler/stack_context.h" +#include "async-profiler/symbols.h" + +#include "allocation_tracker.hpp" +#include "perf_ringbuffer.hpp" +#include "ringbuffer_holder.hpp" +#include "ringbuffer_utils.hpp" +#include "span.hpp" + +DDPROF_NOINLINE size_t func_save(std::span stack, std::span regs); +DDPROF_NOINLINE size_t func_intermediate_1(int i, std::span stack, std::span regs); + +DDPROF_NOINLINE size_t func_save(std::span stack, std::span regs) { + return save_context(retrieve_stack_end_address(), regs, stack); +} + +size_t func_intermediate_1(int i, std::span stack, std::span regs) { + while(i > 0){ + return func_intermediate_1(--i, stack, regs); + } + return func_save(stack, regs); +} + +static void BM_SaveContext(benchmark::State &state) { + CodeCacheArray cache_arary; + Symbols::parseLibraries(&cache_arary, false); + + std::byte stack[PERF_SAMPLE_STACK_SIZE]; + std::array regs; + + constexpr int depth_walk = 10; + + int cpt = 0; + for (auto _ : state) { + // looks like buffer is modified by async profiler + // I need to save context at all loops + // This slightly modifies the bench + size_t size_stack = func_intermediate_1(depth_walk, stack, regs); + ap::StackContext sc = ap::from_regs(regs); + ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); + + void *callchain[DD_MAX_STACK_DEPTH]; + int n = + stackWalk(&cache_arary, sc, buffer, + const_cast(callchain), DD_MAX_STACK_DEPTH, 0); + if (unlikely(n < depth_walk)) { + exit(1); + } + + const char *syms[128]; + for (int i = 0; i < n; ++i) { + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress( + &cache_arary, reinterpret_cast(callchain[i])); + if (likely(code_cache)) { + syms[i] = code_cache->binarySearch(callchain[i]); +// printf("IP[%d] = %p - %s\n", i, callchain[i], syms[i]); + } + else { + printf("error"); + } + cpt += strlen(syms[i]); + } + } + } +} + +BENCHMARK(BM_SaveContext); diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 9e4d61e3d..866966042 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -143,7 +143,7 @@ TEST(DDProfExporter, simple) { SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; -// fill_unwind_symbols(table, mapinfo_table, mock_output); + // fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfContext ctx = {}; ctx.watchers[0] = *ewatcher_from_str("sCPU"); @@ -151,8 +151,9 @@ TEST(DDProfExporter, simple) { res = pprof_create_profile(&pprofs, &ctx); EXPECT_TRUE(IsDDResOK(res)); -// res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], -// &pprofs); + // res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, + // &ctx.watchers[0], + // &pprofs); EXPECT_TRUE(IsDDResOK(res)); } { diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 5fdcb4634..13845baa0 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -73,8 +73,9 @@ TEST(DDProfPProf, aggregate) { ctx.num_watchers = 1; DDRes res = pprof_create_profile(&pprof, &ctx); EXPECT_TRUE(IsDDResOK(res)); -// res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, &ctx.watchers[0], -// &pprof); + // res = pprof_aggregate(&mock_output, &symbol_hdr, 1000, 1, + // &ctx.watchers[0], + // &pprof); EXPECT_TRUE(IsDDResOK(res)); diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 03abcbfac..553c0c1ca 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -1,16 +1,14 @@ #include #include "savecontext.hpp" -#include "unwind_state.hpp" -// #include "symbol.hpp" #include "stackWalker.h" +#include "unwind_state.hpp" #include #include "async-profiler/codeCache.h" -#include "async-profiler/symbols.h" - #include "async-profiler/stack_context.h" +#include "async-profiler/symbols.h" // Retrieves instruction pointer #define _THIS_IP_ \ @@ -25,7 +23,6 @@ // temp copy pasta #define PERF_SAMPLE_STACK_SIZE (4096UL * 8) - std::byte stack[PERF_SAMPLE_STACK_SIZE]; DDPROF_NOINLINE size_t funcA(std::array ®s); @@ -55,9 +52,9 @@ TEST(dwarf_unwind, simple) { ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); void *callchain[128]; - int n = stackWalk(&cache_arary, sc, buffer, const_cast(callchain), - 128, 0); - const char* syms[128]; + int n = stackWalk(&cache_arary, sc, buffer, + const_cast(callchain), 128, 0); + const char *syms[128]; for (int i = 0; i < n; ++i) { { // retrieve symbol CodeCache *code_cache = findLibraryByAddress( @@ -75,27 +72,30 @@ TEST(dwarf_unwind, simple) { ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos); } - - -#include "ringbuffer_holder.hpp" -#include "ringbuffer_utils.hpp" #include "allocation_tracker.hpp" #include "perf_ringbuffer.hpp" +#include "ringbuffer_holder.hpp" +#include "ringbuffer_utils.hpp" #include "span.hpp" DDPROF_NOINLINE void func_save_sleep(size_t size); -DDPROF_NOINLINE void func_intermediate(size_t size); +DDPROF_NOINLINE void func_intermediate_0(size_t size); +DDPROF_NOINLINE void func_intermediate_1(size_t size); DDPROF_NOINLINE void func_save_sleep(size_t size) { - ddprof::AllocationTracker::track_allocation(0xdeadbeef, size); - // prevent tail call optimization - getpid(); - sleep(1); + int i = 0; + while (++i < 100000) { + ddprof::AllocationTracker::track_allocation(0xdeadbeef, size); + // prevent tail call optimization + getpid(); + usleep(100); + // printf("Save context nb -- %d \n", i); + } } -void func_intermediate(size_t size) { - func_save_sleep(size); -} +void func_intermediate_0(size_t size) { func_intermediate_1(size); } + +void func_intermediate_1(size_t size) { func_save_sleep(size); } TEST(dwarf_unwind, remote) { const uint64_t rate = 1; @@ -110,7 +110,7 @@ TEST(dwarf_unwind, remote) { // Fork pid_t temp_pid = fork(); if (!temp_pid) { - func_intermediate(10); + func_intermediate_0(10); return; } @@ -131,16 +131,19 @@ TEST(dwarf_unwind, remote) { // convert based on mask for this watcher (default in this case) perf_event_sample *sample = hdr2samp(hdr, perf_event_default_sample_type()); - ddprof::span regs_span{sample->regs, PERF_REGS_COUNT}; + ddprof::span regs_span{sample->regs, + PERF_REGS_COUNT}; ap::StackContext sc = ap::from_regs(regs_span); - ddprof::span stack{ reinterpret_cast(sample->data_stack), sample->size_stack}; + ddprof::span stack{ + reinterpret_cast(sample->data_stack), sample->size_stack}; ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack); void *callchain[DD_MAX_STACK_DEPTH]; - int n = stackWalk(&cache_arary, sc, buffer, const_cast(callchain), - DD_MAX_STACK_DEPTH, 0); + int n = + stackWalk(&cache_arary, sc, buffer, const_cast(callchain), + DD_MAX_STACK_DEPTH, 0); - std::array syms; + std::array syms; for (int i = 0; i < n; ++i) { { // retrieve symbol CodeCache *code_cache = findLibraryByAddress( @@ -150,6 +153,7 @@ TEST(dwarf_unwind, remote) { printf("IP = %p - %s\n", callchain[i], syms[i]); } } + // cleanup the producer fork + kill(temp_pid, SIGTERM); } - } From c0047e00f8e04b444a2a61cdb301bafcbcbb8650 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 22 Nov 2022 13:21:48 +0100 Subject: [PATCH 13/29] Prevent tail call optimisation --- test/async_prof-bench.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/test/async_prof-bench.cc b/test/async_prof-bench.cc index 6252f1d5e..4293edabe 100644 --- a/test/async_prof-bench.cc +++ b/test/async_prof-bench.cc @@ -22,14 +22,19 @@ DDPROF_NOINLINE size_t func_save(std::span stack, std::span stack, std::span regs) { +DDPROF_NOINLINE size_t func_intermediate_1(int i, std::span stack, std::span regs) { while(i > 0){ - return func_intermediate_1(--i, stack, regs); + size_t size = func_intermediate_1(--i, stack, regs); + DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); + return size; } - return func_save(stack, regs); + size_t size = func_save(stack, regs); + DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); + return size; + } -static void BM_SaveContext(benchmark::State &state) { +static void BM_UnwindSameStack(benchmark::State &state) { CodeCacheArray cache_arary; Symbols::parseLibraries(&cache_arary, false); @@ -52,6 +57,7 @@ static void BM_SaveContext(benchmark::State &state) { stackWalk(&cache_arary, sc, buffer, const_cast(callchain), DD_MAX_STACK_DEPTH, 0); if (unlikely(n < depth_walk)) { + printf("n = %d \n", n); exit(1); } @@ -66,6 +72,7 @@ static void BM_SaveContext(benchmark::State &state) { } else { printf("error"); + exit(1); } cpt += strlen(syms[i]); } @@ -73,4 +80,4 @@ static void BM_SaveContext(benchmark::State &state) { } } -BENCHMARK(BM_SaveContext); +BENCHMARK(BM_UnwindSameStack); From 6af2003ebc9c968b0d6957c38e5135e73512b69f Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 23 Nov 2022 14:56:40 +0100 Subject: [PATCH 14/29] Minor change in comment --- test/async_prof-bench.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/async_prof-bench.cc b/test/async_prof-bench.cc index 4293edabe..c269555f9 100644 --- a/test/async_prof-bench.cc +++ b/test/async_prof-bench.cc @@ -47,7 +47,7 @@ static void BM_UnwindSameStack(benchmark::State &state) { for (auto _ : state) { // looks like buffer is modified by async profiler // I need to save context at all loops - // This slightly modifies the bench + // This modifies what we are measuring size_t size_stack = func_intermediate_1(depth_walk, stack, regs); ap::StackContext sc = ap::from_regs(regs); ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); From 15f3093f146825c6f81cabc8c6cb59ef530af985 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 25 Nov 2022 11:52:16 +0100 Subject: [PATCH 15/29] Remove the save context from the benchmark operation --- test/async_prof-bench.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/async_prof-bench.cc b/test/async_prof-bench.cc index c269555f9..2bb637f51 100644 --- a/test/async_prof-bench.cc +++ b/test/async_prof-bench.cc @@ -42,15 +42,16 @@ static void BM_UnwindSameStack(benchmark::State &state) { std::array regs; constexpr int depth_walk = 10; + // looks like buffer is modified by async profiler + // I need to save context at all loops + // This modifies what we are measuring + size_t size_stack = func_intermediate_1(depth_walk, stack, regs); + ap::StackContext sc = ap::from_regs(regs); + ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); int cpt = 0; for (auto _ : state) { - // looks like buffer is modified by async profiler - // I need to save context at all loops - // This modifies what we are measuring - size_t size_stack = func_intermediate_1(depth_walk, stack, regs); ap::StackContext sc = ap::from_regs(regs); - ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); void *callchain[DD_MAX_STACK_DEPTH]; int n = @@ -78,6 +79,7 @@ static void BM_UnwindSameStack(benchmark::State &state) { } } } + printf("total cpt = %d \n", cpt); } BENCHMARK(BM_UnwindSameStack); From 81b9d2dea8b3d5019bc0d6d0354c35e391dfb036 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 29 Nov 2022 11:57:08 +0100 Subject: [PATCH 16/29] Minor env fixes - Fix for zsh - Fix for gcc 12 --- CMakeLists.txt | 20 +++++++++++--------- src/lib/elfutils.cc | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 139fb398d..11be0f7a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,16 @@ else() endif() # ---- Dependencies ---- + +# libdatadog_profiling +include(Findlibdatadog) + +# Event Parser +add_subdirectory(src/event_parser) + +# elfutils +include(Findelfutils) + # -- Async profiler -- set(ASYNC_PROFILER_LIB_DIR ${CMAKE_SOURCE_DIR}) set(ASYNC_PROFILER_SRC_DIR ${ASYNC_PROFILER_LIB_DIR}/src/async-profiler) @@ -88,19 +98,11 @@ aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(async_prof_lib PRIVATE elf) set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(DDProf::AsyncProf ALIAS async_prof_lib) # ------------------- -# libdatadog_profiling -include(Findlibdatadog) - -# Event Parser -add_subdirectory(src/event_parser) - -# elfutils -include(Findelfutils) - # ---- Static analysis ---- include(ClangTidy) include(Format) diff --git a/src/lib/elfutils.cc b/src/lib/elfutils.cc index 64d35e6a1..43930b9ad 100644 --- a/src/lib/elfutils.cc +++ b/src/lib/elfutils.cc @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef __ELF_NATIVE_CLASS # define __ELF_NATIVE_CLASS 64 From f562ce94d6732b4d3fb321b1cfed31e813cb1778 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 29 Nov 2022 14:08:57 +0100 Subject: [PATCH 17/29] Minor fix for zsh --- setup_env.sh | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/setup_env.sh b/setup_env.sh index b320754d2..7e5e25d99 100755 --- a/setup_env.sh +++ b/setup_env.sh @@ -1,6 +1,8 @@ # Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. +echoerr() { echo "$@" 1>&2; } + # Run source ./setup_env.sh export PATH=$PATH:${PWD}/tools:${PWD}/bench/runners @@ -16,6 +18,7 @@ for cc_ver in gcc-{12..9}; do break fi done + for cxx_ver in g++-{12..9}; do if command -v "$cxx_ver" > /dev/null; then DDPROF_CXX_DEFAULT="$cxx_ver" @@ -23,11 +26,16 @@ for cxx_ver in g++-{12..9}; do fi done +echoerr "Using DDPROF_CXX_DEFAULT=${DDPROF_CXX_DEFAULT}" +echoerr "Using DDPROF_CC_DEFAULT=${DDPROF_CC_DEFAULT}" +echoerr "Compiler can be overriden with CXX and CC variables when sourcing ${0}" + SCRIPTDIR="$(cd -- $( dirname -- "${BASH_SOURCE[0]}" ) && pwd)" # no "$0" when sourcing DDPROF_INSTALL_PREFIX="../deliverables" DDPROF_BUILD_BENCH="ON" NATIVE_LIB="ON" -COMPILER_SETTING="-DCMAKE_CXX_COMPILER=${CXX:-"${DDPROF_CXX_DEFAULT}"} -DCMAKE_C_COMPILER=${CC:-"${DDPROF_CC_DEFAULT}"}" +COMPILER_SETTING="-DCMAKE_CXX_COMPILER=\"${CXX:-${DDPROF_CXX_DEFAULT}}\" -DCMAKE_C_COMPILER=\"${CC:-${DDPROF_CC_DEFAULT}}\"" + # Avoid having the vendors compiled in the same directory EXTENSION_CC=${CC:-"gcc"} # strip version number from compiler @@ -49,11 +57,13 @@ GetDefaultAllocatorOptions() { } GetDirectoryExtention() { - echo "_${EXTENSION_CC,,}_${EXTENSION_OS,,}_${1}" + echo "_${EXTENSION_CC}_${EXTENSION_OS}_${1}" } COMMON_OPT="${COMPILER_SETTING} ${DEFAULT_ALLOCATOR_OPT} -DACCURACY_TEST=ON -DCMAKE_INSTALL_PREFIX=${DDPROF_INSTALL_PREFIX} -DBUILD_BENCHMARKS=${DDPROF_BUILD_BENCH} -DBUILD_NATIVE_LIB=${NATIVE_LIB}" +# echoerr "Cmake settings--\n ${COMMON_OPT}" + CmakeWithOptions() { # Build mode # Extra Parameters to cmake @@ -61,7 +71,10 @@ CmakeWithOptions() { shift VENDOR_EXTENSION=$(GetDirectoryExtention ${BUILD_TYPE}) # shellcheck disable=SC2086 - cmake ${COMMON_OPT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DVENDOR_EXTENSION=${VENDOR_EXTENSION} $@ + cmake_cmd="cmake ${COMMON_OPT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DVENDOR_EXTENSION=${VENDOR_EXTENSION} $@" + echoerr "-------------- cmake command -------------- " + echoerr ${cmake_cmd} + eval ${cmake_cmd} } RelCMake() { From e8989aba678ac65401395ee633ed3d36942c433e Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 30 Nov 2022 11:22:47 +0100 Subject: [PATCH 18/29] Version allowing to run async profiler with ddprof --- design_notes.txt | 39 ++++++++- include/async-profiler/codeCache.h | 6 +- src/async-profiler/stackWalker.cpp | 9 ++- src/async-profiler/symbols_linux.cpp | 117 +++++++++++++++++++++++++-- test/async_prof-bench.cc | 42 ++++++---- test/dwarf_unwind-ut.cc | 5 +- 6 files changed, 189 insertions(+), 29 deletions(-) diff --git a/design_notes.txt b/design_notes.txt index 761f29dba..b0d682d1d 100644 --- a/design_notes.txt +++ b/design_notes.txt @@ -25,14 +25,11 @@ parseLibraries parses everything in proc self. ### Step 1 -- ensure we can have one code array per PID - ### Step 2 -- ensure symbols are shared across PIDs - ### Step 3 -- ensure unwinding tables are shared across PIDs - ### Junk notes Write an API that can work with ddprof object model @@ -49,8 +46,42 @@ Start is 0 or for non PIE, - Keep that ? - ### Issues - We are moving from a lazy to an absolute load - We don't have enough tests + +- We are not consider elf versions (though do we care ?) +- read past sp --> check with + + + +### Async profiler load in symbols_linux + +--> Create code cache +library name +Index --> count of lib +image base --> start +image end --> end + +--> Parse program headers +text_base --> set as base +---> parseDynamicSection +global offset table --> Example (Shit it is absolute) +GOT start == 0x5555556226b0 +GOT start == 0x7ffff7ffd018 + +relocation -> relent size of relocation entry +!We already have ways to parse GOT, we don't care + +---> parseDwarfInfo +parseDwarfInfo() +looks like the values are relative (wouhou) +then we set the dwarf table (so nothing to change) + +-> VDSO +--> parse memory + + +// TODO tree of life + diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h index 3e2359e19..ad2bc8098 100644 --- a/include/async-profiler/codeCache.h +++ b/include/async-profiler/codeCache.h @@ -84,8 +84,6 @@ class CodeCache { void **_got_end; bool _got_patchable; - FrameDesc *_dwarf_table; - int _dwarf_table_length; int _capacity; int _count; @@ -94,6 +92,10 @@ class CodeCache { void expand(); public: + // todo fix hacky override for remote + FrameDesc *_dwarf_table; + int _dwarf_table_length; + CodeCache(const char *name, short lib_index = -1, const void *min_address = NO_MIN_ADDRESS, const void *max_address = NO_MAX_ADDRESS); diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 8e1c99948..75e777770 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -105,7 +105,14 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { return false; } - + // high addr + // + // sp(2) + // + // sp(3) (-32 fp_off) + // + // red zone + // low addr if (f->fp_off & DW_PC_OFFSET) { sc.pc = (const char *)sc.pc + (f->fp_off >> 1); } else { diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 0c536153e..0ab72e488 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -34,6 +34,11 @@ # include # include +# include +# include + +# define LG_WRN(...) printf(__VA_ARGS__) + class SymbolDesc { private: const char *_addr; @@ -185,12 +190,16 @@ class ElfParser { bool loadSymbolsUsingDebugLink(); void loadSymbolTable(ElfSection *symtab); void addRelocationSymbols(ElfSection *reltab, const char *plt); - public: + static const char* get_self_vdso(void); static void parseProgramHeaders(CodeCache *cc, const char *base); + static bool parseProgramHeadersRemote(Elf *elf, CodeCache *cc, + const char *base, + const char *mmap_addr); static bool parseFile(CodeCache *cc, const char *base, const char *file_name, bool use_debug); static void parseMem(CodeCache *cc, const char *base); + static void parseMemRemote(CodeCache *cc, const char *base, const char *addr); }; ElfSection *ElfParser::findSection(uint32_t type, const char *name) { @@ -234,8 +243,7 @@ bool ElfParser::parseFile(CodeCache *cc, const char *base, close(fd); if (addr == MAP_FAILED) { - // Log::warn("Could not parse symbols from %s: %s", file_name, - // strerror(errno)); + LG_WRN("Could not parse symbols from %s: %s", file_name, strerror(errno)); } else { ElfParser elf(cc, base, addr, file_name); if (elf.validHeader()) { @@ -246,6 +254,13 @@ bool ElfParser::parseFile(CodeCache *cc, const char *base, return true; } +void ElfParser::parseMemRemote(CodeCache *cc, const char *base, const char *addr) { + ElfParser elf(cc, base, addr); + if (elf.validHeader()) { + elf.loadSymbols(false); + } +} + void ElfParser::parseMem(CodeCache *cc, const char *base) { ElfParser elf(cc, base, base); if (elf.validHeader()) { @@ -253,6 +268,24 @@ void ElfParser::parseMem(CodeCache *cc, const char *base) { } } +// remote opens the elf file +bool ElfParser::parseProgramHeadersRemote(Elf *elf, CodeCache *cc, + const char *base, + const char *mmap_addr) { + // todo check if I can use base + ElfParser elf_remote(cc, reinterpret_cast(mmap_addr), mmap_addr); + if (elf_remote.validHeader()) { + cc->setTextBase(mmap_addr); + elf_remote.parseDynamicSection(); + elf_remote.parseDwarfInfo(); + return true ; + } + else { + printf("invalid header \n"); + } + return false; +} + void ElfParser::parseProgramHeaders(CodeCache *cc, const char *base) { ElfParser elf(cc, base, base); if (elf.validHeader()) { @@ -300,11 +333,12 @@ void ElfParser::parseDynamicSection() { break; } } - + printf("relent = %d \n", relent); if (relent != 0) { if (pltrelsz != 0 && got_start != NULL) { // The number of entries in .got.plt section matches the number of // entries in .rela.plt + printf("GOT start == %p \n", got_start); _cc->setGlobalOffsetTable(got_start, got_start + pltrelsz / relent, false); } else if (rel != NULL && relsz != 0) { @@ -333,6 +367,9 @@ void ElfParser::parseDynamicSection() { } } } + else { + printf("No dynamic section \n"); + } } void ElfParser::parseDwarfInfo() { @@ -343,6 +380,7 @@ void ElfParser::parseDwarfInfo() { if (eh_frame_hdr != NULL) { DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); _cc->setDwarfTable(dwarf.table(), dwarf.count()); + printf("Created a number of dwarf entries = %d \n", dwarf.count()); } } @@ -509,6 +547,36 @@ void Symbols::parseKernelSymbols(CodeCache *cc) { // XXX(nick): omitted } + +const char* ElfParser::get_self_vdso(void) { + FILE *f = fopen("/proc/self/maps", "r"); + const char *addr_vdso = nullptr; + + if (f == NULL) { + return nullptr; + } + char *str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + const char *image_base = map.addr(); + if (map.isExecutable()) { + if (strcmp(map.file(), "[vdso]") == 0) { + addr_vdso = image_base; // found it + break; + } + } + } + return addr_vdso; +} + void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, bool kernel_symbols) { std::set parsed_libraries; @@ -528,6 +596,8 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, char *str = NULL; size_t str_size = 0; ssize_t len; + // tell elf what version we are using + elf_version(EV_CURRENT); while ((len = getline(&str, &str_size, f)) > 0) { str[len - 1] = 0; @@ -554,17 +624,52 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); unsigned long inode = map.inode(); + CodeCache *cc_remote = nullptr; + printf("Considering %s \n", map.file()); if (inode != 0) { + // remote unwinding + int fd = open(map.file(), O_RDONLY); + if (-1 == fd) { + printf("error opening file %s \n", map.file()); + continue; + } + size_t length = (size_t)lseek64(fd, 0, SEEK_END); + void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + Elf *elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL || addr == MAP_FAILED) { + LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf, + addr); + goto continue_loop; + } + // temp structure to load everything + cc_remote = new CodeCache(map.file(), count, addr, addr+length); + // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa if ((image_base -= map.offs()) >= last_readable_base) { - ElfParser::parseProgramHeaders(cc, image_base); +// ElfParser::parseProgramHeaders(cc, image_base); + if (ElfParser::parseProgramHeadersRemote(elf, cc_remote, image_base, reinterpret_cast(addr))) { + cc->setTextBase(image_base); + cc->setDwarfTable(cc_remote->_dwarf_table, cc_remote->_dwarf_table_length); + // avoid deleting it (move it) + cc_remote->setDwarfTable(nullptr, 0); + } } + ElfParser::parseFile(cc, image_base, map.file(), true); } + + continue_loop: + close(fd); + elf_end(elf); // no-op if null + munmap(addr, length); + // we transfered everything away so we can delete this + delete cc_remote; } else if (strcmp(map.file(), "[vdso]") == 0) { - ElfParser::parseMem(cc, image_base); + // find our self address for vdso + const char *addr_vdso = ElfParser::get_self_vdso(); + ElfParser::parseMemRemote(cc, image_base, addr_vdso); } cc->sort(); diff --git a/test/async_prof-bench.cc b/test/async_prof-bench.cc index 2bb637f51..f68b26197 100644 --- a/test/async_prof-bench.cc +++ b/test/async_prof-bench.cc @@ -15,23 +15,33 @@ #include "ringbuffer_utils.hpp" #include "span.hpp" -DDPROF_NOINLINE size_t func_save(std::span stack, std::span regs); -DDPROF_NOINLINE size_t func_intermediate_1(int i, std::span stack, std::span regs); +DDPROF_NOINLINE size_t func_save(std::span stack, + std::span regs); +DDPROF_NOINLINE size_t +func_intermediate_1(int i, std::span stack, + std::span regs); -DDPROF_NOINLINE size_t func_save(std::span stack, std::span regs) { - return save_context(retrieve_stack_end_address(), regs, stack); +DDPROF_NOINLINE size_t func_save(ddprof::span stack, + ddprof::span regs) { + static thread_local size_t tl_size = 0; + if (!tl_size) { + tl_size = save_context(retrieve_stack_end_address(), regs, stack); + } + DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); + return tl_size; } -DDPROF_NOINLINE size_t func_intermediate_1(int i, std::span stack, std::span regs) { - while(i > 0){ +DDPROF_NOINLINE size_t +func_intermediate_1(int i, std::span stack, + std::span regs) { + while (i > 0) { size_t size = func_intermediate_1(--i, stack, regs); DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); return size; } - size_t size = func_save(stack, regs); - DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); - return size; - + size_t size = func_save(stack, regs); + DDPROF_BLOCK_TAIL_CALL_OPTIMIZATION(); + return size; } static void BM_UnwindSameStack(benchmark::State &state) { @@ -46,12 +56,15 @@ static void BM_UnwindSameStack(benchmark::State &state) { // I need to save context at all loops // This modifies what we are measuring size_t size_stack = func_intermediate_1(depth_walk, stack, regs); - ap::StackContext sc = ap::from_regs(regs); - ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); + ap::StackContext sc2 = ap::from_regs(regs); + ap::StackBuffer buffer(stack, sc2.sp, sc2.sp + size_stack); int cpt = 0; for (auto _ : state) { + // regs are over writen by async profiler + size_t size_stack = func_intermediate_1(depth_walk, stack, regs); ap::StackContext sc = ap::from_regs(regs); + ap::StackBuffer buffer(stack, sc2.sp, sc2.sp + size_stack); void *callchain[DD_MAX_STACK_DEPTH]; int n = @@ -69,9 +82,8 @@ static void BM_UnwindSameStack(benchmark::State &state) { &cache_arary, reinterpret_cast(callchain[i])); if (likely(code_cache)) { syms[i] = code_cache->binarySearch(callchain[i]); -// printf("IP[%d] = %p - %s\n", i, callchain[i], syms[i]); - } - else { + // printf("IP[%d] = %p - %s\n", i, callchain[i], syms[i]); + } else { printf("error"); exit(1); } diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 553c0c1ca..17c054175 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -110,12 +110,15 @@ TEST(dwarf_unwind, remote) { // Fork pid_t temp_pid = fork(); if (!temp_pid) { - func_intermediate_0(10); +// func_intermediate_0(10); + char*const argList[] = {"sleep", "10", nullptr}; + execvp("sleep", argList); return; } // Load libraries from the fork - Cache array is relent to a single pid CodeCacheArray cache_arary; + sleep(1); Symbols::parsePidLibraries(temp_pid, &cache_arary, false); // Establish a ring buffer ? From 67658106facfe39e9c65f857a01c76a060bf95ed Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 1 Dec 2022 10:28:02 +0100 Subject: [PATCH 19/29] Add a small loader tool to compare async profiler's load vs remote load. --- src/async-profiler/symbols_linux.cpp | 1 + test/CMakeLists.txt | 2 + test/loader/CMakeLists.txt | 7 ++++ test/loader/main.cc | 59 ++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 test/loader/CMakeLists.txt create mode 100644 test/loader/main.cc diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 0ab72e488..dd666003b 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -718,6 +718,7 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { if (!parsed_libraries.insert(image_base).second) { continue; // the library was already parsed } + printf("Considering %s \n", map.file()); int count = array->count(); if (count >= MAX_NATIVE_LIBS) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 02a7b3362..9747989fa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -318,3 +318,5 @@ add_test( NAME ddprof_help COMMAND ddprof -h WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory(loader) diff --git a/test/loader/CMakeLists.txt b/test/loader/CMakeLists.txt new file mode 100644 index 000000000..13b7403f2 --- /dev/null +++ b/test/loader/CMakeLists.txt @@ -0,0 +1,7 @@ +message(STATUS "Compiling an async profiler loader") +add_exe( + async_prof_load main.cc + LIBRARIES dl DDProf::AsyncProf) + +target_include_directories(async_prof_load PRIVATE ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/third_party) diff --git a/test/loader/main.cc b/test/loader/main.cc new file mode 100644 index 000000000..20a51b854 --- /dev/null +++ b/test/loader/main.cc @@ -0,0 +1,59 @@ +#include +#include "CLI/CLI11.hpp" + +#include "async-profiler/codeCache.h" +#include "async-profiler/symbols.h" + +#include +#include + +/***************************** SIGSEGV Handler *******************************/ +static void sigsegv_handler(int sig, siginfo_t *si, void *uc) { + // TODO this really shouldn't call printf-family functions... + (void)uc; +#ifdef __GLIBC__ + static void *buf[4096] = {0}; + size_t sz = backtrace(buf, 4096); +#endif + fprintf(stderr, "loader[%d]: has encountered an error and will exit\n", + getpid()); + if (sig == SIGSEGV) + printf("Fault address: %p\n", si->si_addr); +#ifdef __GLIBC__ + backtrace_symbols_fd(buf, sz, STDERR_FILENO); +#endif + exit(-1); +} + +static void install_segfault_handler(){ + struct sigaction sigaction_handlers = {}; + sigaction_handlers.sa_sigaction = sigsegv_handler; + sigaction_handlers.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &(sigaction_handlers), NULL); +} + +int main(int argc, char *argv[]) { + install_segfault_handler(); + + CLI::App app{"Loads a library then tries to use async profiler"}; + + // Options opts; + std::vector exec_args; + std::string lib_str; + + app.add_option("--lib", lib_str, "Library to open")->required()->check(CLI::ExistingFile); + CLI11_PARSE(app, argc, argv); + printf("Welcome to a library loader using the async profiler\n"); + void *handle = dlopen(lib_str.c_str(), RTLD_NOW); + if (!handle) { + printf("Error opening the library \n"); + exit(1); + } + { + CodeCacheArray cache_arary; + Symbols::parseLibraries(&cache_arary, false); + } + printf("Closing gracefully\n"); + dlclose(handle); + return 0; +} From cfd7bd5a989986e9342143c70bab3b667db90e3a Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 4 Jan 2023 15:52:09 +0100 Subject: [PATCH 20/29] Work in progress Adaptations to ensure we can use async profiler remotely --- CMakeLists.txt | 2 +- include/async-profiler/codeCache.h | 1 - include/async-profiler/elf_helpers.h | 13 +++ src/async-profiler/dwarf.cpp | 19 ++- src/async-profiler/elf_helpers.cpp | 168 +++++++++++++++++++++++++++ src/async-profiler/symbols_linux.cpp | 83 ++++++++----- src/ddprof_module_lib.cc | 5 + src/lib/elfutils.cc | 2 +- test/dwarf_unwind-ut.cc | 10 +- test/loader/CMakeLists.txt | 5 +- test/loader/main.cc | 16 ++- test/loader/note.txt | 96 +++++++++++++++ 12 files changed, 378 insertions(+), 42 deletions(-) create mode 100644 include/async-profiler/elf_helpers.h create mode 100644 src/async-profiler/elf_helpers.cpp create mode 100644 test/loader/note.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 11be0f7a2..22abcb251 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,7 @@ aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} ${CMAKE_SOURCE_DIR}/include) -target_link_libraries(async_prof_lib PRIVATE elf) +target_link_libraries(async_prof_lib PRIVATE dw elf) set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(DDProf::AsyncProf ALIAS async_prof_lib) # ------------------- diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h index ad2bc8098..534996d9e 100644 --- a/include/async-profiler/codeCache.h +++ b/include/async-profiler/codeCache.h @@ -84,7 +84,6 @@ class CodeCache { void **_got_end; bool _got_patchable; - int _capacity; int _count; CodeBlob *_blobs; diff --git a/include/async-profiler/elf_helpers.h b/include/async-profiler/elf_helpers.h new file mode 100644 index 000000000..dd97054b1 --- /dev/null +++ b/include/async-profiler/elf_helpers.h @@ -0,0 +1,13 @@ +#pragma once + +#include "ddprof_defs.hpp" + +struct Elf; + +bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &bias_offset, Offset_t &text_base); + +const char* get_section_data(Elf *elf, const char *section_name); + + +bool process_fdes(Elf *elf); diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index 68640dfd1..7f3e53164 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -20,6 +20,10 @@ #include "dwarf.h" #include +#include + +#define DEBUG + enum { DW_CFA_nop = 0x0, DW_CFA_set_loc = 0x1, @@ -90,15 +94,23 @@ void DwarfParser::parse(const char *eh_frame_hdr) { u8 fde_count_enc = eh_frame_hdr[2]; u8 table_enc = eh_frame_hdr[3]; + printf("eh_frame_ptr_enc = %lx \n", eh_frame_ptr_enc); + printf("table_enc = %lx \n", table_enc); if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { return; } int fde_count = *(int *)(eh_frame_hdr + 8); +#ifdef DEBUG + printf("fde count = %d \n", fde_count); +#endif int *table = (int *)(eh_frame_hdr + 16); for (int i = 0; i < fde_count; i++) { _ptr = eh_frame_hdr + table[i * 2]; + if (i == 0) { + printf("ptr = %lx, table offset = %lx \n", _ptr, table[i * 2]); + } parseFde(); } } @@ -118,7 +130,9 @@ void DwarfParser::parseCie() { } void DwarfParser::parseFde() { + u32 fde_len = get32(); +// printf("fde len = %u \n", fde_len); if (fde_len == 0 || fde_len == 0xffffffff) { return; } @@ -126,11 +140,14 @@ void DwarfParser::parseFde() { const char *fde_start = _ptr; u32 cie_offset = get32(); if (_count == 0) { +#ifdef DEBUG + printf("Change pointer to %lx - %lx \n", fde_start, cie_offset); +#endif _ptr = fde_start - cie_offset; parseCie(); _ptr = fde_start + 4; } - + // ptr - base = offset to the FDE u32 range_start = getPtr() - _image_base; u32 range_len = get32(); _ptr += getLeb(); diff --git a/src/async-profiler/elf_helpers.cpp b/src/async-profiler/elf_helpers.cpp new file mode 100644 index 000000000..9e470f174 --- /dev/null +++ b/src/async-profiler/elf_helpers.cpp @@ -0,0 +1,168 @@ +#include "elf_helpers.h" + +#include "build_id.hpp" +#include "logger.hpp" + +#include +#include + +#include +#include + +#define LG_WRN(args...) printf(args) + +const char* get_section_data(Elf *elf, const char *section_name) { + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, "Failed to get string table index for section header strings: %s\n", elf_errmsg(-1)); + return nullptr; + } + + // Iterate over the sections and find the .eh_frame section + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn)) != NULL) { + // Get the section header for the current section + GElf_Shdr shdr; + if (gelf_getshdr(scn, &shdr) != &shdr) { + fprintf(stderr, "Failed to get section header: %s\n", elf_errmsg(-1)); + return nullptr; + } + + // Get the name of the current section + char *name = elf_strptr(elf, shstrndx, shdr.sh_name); + if (name == NULL) { + fprintf(stderr, "Failed to get section name: %s\n", elf_errmsg(-1)); + return nullptr; + } + + // Check if the section is the .eh_frame section + if (strcmp(name, section_name) == 0) { + printf("%s section found at offset 0x%lx, size %ld\n", section_name, shdr.sh_offset, shdr.sh_size); + // Get the data for the .eh_frame section + Elf_Data *data = elf_getdata(scn, NULL); + if (data == NULL) { + return nullptr; + } else { + return reinterpret_cast(data->d_buf); + } + } + } + fprintf(stderr, "Failed to find section: %s\n", section_name); + return nullptr; +} + +bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &bias_offset, Offset_t &text_base) { + vaddr = 0; + bias_offset = 0; + GElf_Ehdr ehdr_mem; + GElf_Ehdr *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (ehdr == nullptr) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + text_base = ehdr->e_entry; + + bool found_exec = false; + switch (ehdr->e_type) { + case ET_EXEC: + case ET_CORE: + case ET_DYN: { + size_t phnum; + if (unlikely(elf_getphdrnum(elf, &phnum) != 0)) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + for (size_t i = 0; i < phnum; ++i) { + GElf_Phdr phdr_mem; + // Retrieve the program header + GElf_Phdr *ph = gelf_getphdr(elf, i, &phdr_mem); + if (unlikely(ph == NULL)) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + constexpr int rx = PF_X | PF_R; + if (ph->p_type == PT_LOAD) { + if ((ph->p_flags & rx) == rx) { + if (!found_exec) { + vaddr = ph->p_vaddr; + bias_offset = ph->p_vaddr - ph->p_offset; + printf("%lx - %lx (vaddr - p_offset) \n", ph->p_vaddr, + ph->p_offset); + found_exec = true; + } else { + // There can be multiple executable load segments. + // The first one should be considered (this is valid) + // Leaving the failure for now as it allows me to find test cases + printf("Multiple exec LOAD segments: %s", filepath); + } + } + } + } + break; + } + default: + LG_WRN("Unsupported elf type (%d) %s", ehdr->e_type, filepath); + return false; + } + + if (!found_exec) { + LG_WRN("Not executable LOAD segment found in %s", filepath); + } + return found_exec; +} + + +bool process_fdes(Elf *elf) { + Elf_Scn *scn = NULL; + Elf_Data *data = NULL; + GElf_Shdr shdr; + + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, "Failed to get string table index for section header strings: %s\n", elf_errmsg(-1)); + return false; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + gelf_getshdr(scn, &shdr); + if (shdr.sh_type == SHT_PROGBITS && (strcmp(".debug_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0 || + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0)) { + // This is the .debug_frame or .eh_frame section + data = elf_getdata(scn, NULL); + break; + } + } + if (!data) { + fprintf(stderr, "Unable to find dwarf information\n"); + return false; + } + + // Iterate through the CFI records in the .debug_frame or .eh_frame section + Dwarf_Off offset = 0; + while (true) { + // Get the next CFI record + Dwarf_Off next_offset; + Dwarf_CFI_Entry entry; + + int result = dwarf_next_cfi(reinterpret_cast(elf_getident(elf, NULL)), + data, + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0, + offset, &next_offset, + &entry); + if (result != 0) { + // End of CFI records + break; + } + + printf("cfi id = %lx\n", entry); + // Process the CFI record + // ... + + // Move to the next CFI record + offset = next_offset; + } + return true; +} diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index dd666003b..05e377107 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -34,6 +34,7 @@ # include # include +# include "elf_helpers.h" # include # include @@ -142,7 +143,7 @@ typedef Elf32_Dyn ElfDyn; # endif // __musl__ class ElfParser { -private: +public: CodeCache *_cc; const char *_base; const char *_file_name; @@ -185,13 +186,15 @@ class ElfParser { void parseDynamicSection(); void parseDwarfInfo(); + void parseDwarfInfoRemote(const char *eh_frame_data); void loadSymbols(bool use_debug); bool loadSymbolsUsingBuildId(); bool loadSymbolsUsingDebugLink(); void loadSymbolTable(ElfSection *symtab); void addRelocationSymbols(ElfSection *reltab, const char *plt); + public: - static const char* get_self_vdso(void); + static const char *get_self_vdso(void); static void parseProgramHeaders(CodeCache *cc, const char *base); static bool parseProgramHeadersRemote(Elf *elf, CodeCache *cc, const char *base, @@ -254,7 +257,8 @@ bool ElfParser::parseFile(CodeCache *cc, const char *base, return true; } -void ElfParser::parseMemRemote(CodeCache *cc, const char *base, const char *addr) { +void ElfParser::parseMemRemote(CodeCache *cc, const char *base, + const char *addr) { ElfParser elf(cc, base, addr); if (elf.validHeader()) { elf.loadSymbols(false); @@ -273,14 +277,13 @@ bool ElfParser::parseProgramHeadersRemote(Elf *elf, CodeCache *cc, const char *base, const char *mmap_addr) { // todo check if I can use base - ElfParser elf_remote(cc, reinterpret_cast(mmap_addr), mmap_addr); + ElfParser elf_remote(cc, base, mmap_addr); if (elf_remote.validHeader()) { cc->setTextBase(mmap_addr); elf_remote.parseDynamicSection(); elf_remote.parseDwarfInfo(); - return true ; - } - else { + return true; + } else { printf("invalid header \n"); } return false; @@ -289,6 +292,7 @@ bool ElfParser::parseProgramHeadersRemote(Elf *elf, CodeCache *cc, void ElfParser::parseProgramHeaders(CodeCache *cc, const char *base) { ElfParser elf(cc, base, base); if (elf.validHeader()) { + printf("Setting text base = %p \n", base); cc->setTextBase(base); elf.parseDynamicSection(); elf.parseDwarfInfo(); @@ -366,18 +370,26 @@ void ElfParser::parseDynamicSection() { } } } - } - else { + } else { printf("No dynamic section \n"); } } +void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data) { + printf("Create dwarf with base:%p - eh_frame_hdr:%p\n", _base, eh_frame_data); + DwarfParser dwarf(_cc->name(), _base, eh_frame_data); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + printf("Created a number of dwarf entries = %d \n", dwarf.count()); +} + void ElfParser::parseDwarfInfo() { if (!DWARF_SUPPORTED) return; ElfProgramHeader *eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); + if (eh_frame_hdr != NULL) { + printf("Create dwarf with %lx - at:%lx \n", _base, at(eh_frame_hdr)); DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); _cc->setDwarfTable(dwarf.table(), dwarf.count()); printf("Created a number of dwarf entries = %d \n", dwarf.count()); @@ -493,7 +505,7 @@ bool ElfParser::loadSymbolsUsingDebugLink() { void ElfParser::loadSymbolTable(ElfSection *symtab) { ElfSection *strtab = section(symtab->sh_link); const char *strings = at(strtab); - + int cpt = 0; const char *symbols = at(symtab); const char *symbols_end = symbols + symtab->sh_size; for (; symbols < symbols_end; symbols += symtab->sh_entsize) { @@ -502,13 +514,15 @@ void ElfParser::loadSymbolTable(ElfSection *symtab) { // Skip special AArch64 mapping symbols: $x and $d if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { - // printf("Loading sym %s at 0x%lx (base=0x%lx)\n", - // strings + sym->st_name, _base + sym->st_value, _base); +// printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + sym->st_name, +// _base + sym->st_value, _base); _cc->add(_base + sym->st_value, (int)sym->st_size, strings + sym->st_name); + ++cpt; } } } + printf("Considered %d symbols \n", cpt); } void ElfParser::addRelocationSymbols(ElfSection *reltab, const char *plt) { @@ -547,8 +561,7 @@ void Symbols::parseKernelSymbols(CodeCache *cc) { // XXX(nick): omitted } - -const char* ElfParser::get_self_vdso(void) { +const char *ElfParser::get_self_vdso(void) { FILE *f = fopen("/proc/self/maps", "r"); const char *addr_vdso = nullptr; @@ -591,6 +604,7 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, return; } + // last readable is previous mmap const char *last_readable_base = NULL; const char *image_end = NULL; char *str = NULL; @@ -624,7 +638,6 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); unsigned long inode = map.inode(); - CodeCache *cc_remote = nullptr; printf("Considering %s \n", map.file()); if (inode != 0) { // remote unwinding @@ -635,28 +648,42 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, } size_t length = (size_t)lseek64(fd, 0, SEEK_END); void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + printf("mmap at %lx - size = %lx\n", addr, length); Elf *elf = elf_begin(fd, ELF_C_READ, NULL); if (elf == NULL || addr == MAP_FAILED) { LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf, addr); goto continue_loop; } - // temp structure to load everything - cc_remote = new CodeCache(map.file(), count, addr, addr+length); + Offset_t biais_offset; + ElfAddress_t vaddr; + ElfAddress_t text_base; + // Compute how to convert a process address + if (get_elf_offsets(elf, map.file(), vaddr, biais_offset, text_base)) { + printf("vaddr from get_elf_offset: %lx \n", vaddr); + printf("biais offset get_elf_offset: %lx \n", biais_offset); + printf("text base from get_elf_offset: %lx \n", text_base); + } // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa - if ((image_base -= map.offs()) >= last_readable_base) { -// ElfParser::parseProgramHeaders(cc, image_base); - if (ElfParser::parseProgramHeadersRemote(elf, cc_remote, image_base, reinterpret_cast(addr))) { - cc->setTextBase(image_base); - cc->setDwarfTable(cc_remote->_dwarf_table, cc_remote->_dwarf_table_length); - // avoid deleting it (move it) - cc_remote->setDwarfTable(nullptr, 0); + if ((image_base -= vaddr) >= last_readable_base) { + ElfParser elf_remote(cc, image_base, addr); + if (elf_remote.validHeader()) { + // text base is supposed to be the entry point + cc->setTextBase(image_base + text_base); + const char* eh_frame_data_hdr = get_section_data(elf, ".eh_frame_hdr"); + const char* eh_frame_data = get_section_data(elf, ".eh_frame"); + printf("eh_frame_hdr = %p, eh_frame = %p \n", eh_frame_data_hdr, eh_frame_data); + if (eh_frame_data_hdr) { + process_fdes(elf); + elf_remote.parseDwarfInfoRemote(eh_frame_data_hdr); + } + +// elf_remote.parseDwarfInfo(); } } - ElfParser::parseFile(cc, image_base, map.file(), true); } @@ -664,8 +691,6 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, close(fd); elf_end(elf); // no-op if null munmap(addr, length); - // we transfered everything away so we can delete this - delete cc_remote; } else if (strcmp(map.file(), "[vdso]") == 0) { // find our self address for vdso const char *addr_vdso = ElfParser::get_self_vdso(); @@ -732,6 +757,9 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa + printf("image_base = %p, map.offs() = %p, last_readable_base = %p \n", + image_base, map.offs(), last_readable_base); + // todo - read the biais from the vaddr field (open file?) if ((image_base -= map.offs()) >= last_readable_base) { ElfParser::parseProgramHeaders(cc, image_base); } @@ -743,6 +771,7 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { cc->sort(); array->add(cc); + break; } } diff --git a/src/ddprof_module_lib.cc b/src/ddprof_module_lib.cc index 3f921f5aa..375ba2480 100644 --- a/src/ddprof_module_lib.cc +++ b/src/ddprof_module_lib.cc @@ -37,6 +37,7 @@ static bool get_elf_offsets(int fd, const std::string &filepath, } bool found_exec = false; + switch (ehdr->e_type) { case ET_EXEC: case ET_CORE: @@ -48,6 +49,7 @@ static bool get_elf_offsets(int fd, const std::string &filepath, } for (size_t i = 0; i < phnum; ++i) { GElf_Phdr phdr_mem; + // Retrieve the program header GElf_Phdr *ph = gelf_getphdr(elf, i, &phdr_mem); if (unlikely(ph == NULL)) { LG_WRN("Invalid elf %s", filepath.c_str()); @@ -60,6 +62,9 @@ static bool get_elf_offsets(int fd, const std::string &filepath, bias_offset = ph->p_vaddr - ph->p_offset; found_exec = true; } else { + // There can be multiple load segments. + // The first one should be considered (this is valid) + // Leaving the failure for now as it allows me to find test cases report_failed_assumption(ddprof::string_format( "Multiple exec LOAD segments: %s", filepath.c_str())); } diff --git a/src/lib/elfutils.cc b/src/lib/elfutils.cc index 43930b9ad..a56bae6d3 100644 --- a/src/lib/elfutils.cc +++ b/src/lib/elfutils.cc @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include #ifndef __ELF_NATIVE_CLASS # define __ELF_NATIVE_CLASS 64 diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 17c054175..058dfaa4c 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -29,14 +29,14 @@ DDPROF_NOINLINE size_t funcA(std::array ®s); DDPROF_NOINLINE size_t funcB(std::array ®s); size_t funcB(std::array ®s) { - printf("Here we are in B %lx \n", _THIS_IP_); + printf("dwarf_unwind-ut:%s %lx \n",__FUNCTION__, _THIS_IP_); size_t size = save_context(retrieve_stack_end_address(), regs, stack); return size; } size_t funcA(std::array ®s) { - printf("Here we are in A %lx \n", _THIS_IP_); + printf("dwarf_unwind-ut:%s %lx \n",__FUNCTION__, _THIS_IP_); return funcB(regs); } @@ -110,9 +110,9 @@ TEST(dwarf_unwind, remote) { // Fork pid_t temp_pid = fork(); if (!temp_pid) { -// func_intermediate_0(10); - char*const argList[] = {"sleep", "10", nullptr}; - execvp("sleep", argList); + func_intermediate_0(10); +// char *const argList[] = {"sleep", "10", nullptr}; +// execvp("sleep", argList); return; } diff --git a/test/loader/CMakeLists.txt b/test/loader/CMakeLists.txt index 13b7403f2..f7fbb3fd7 100644 --- a/test/loader/CMakeLists.txt +++ b/test/loader/CMakeLists.txt @@ -1,7 +1,6 @@ message(STATUS "Compiling an async profiler loader") -add_exe( - async_prof_load main.cc - LIBRARIES dl DDProf::AsyncProf) +add_exe(async_prof_load main.cc LIBRARIES dl DDProf::AsyncProf) +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") target_include_directories(async_prof_load PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party) diff --git a/test/loader/main.cc b/test/loader/main.cc index 20a51b854..11cacc3a7 100644 --- a/test/loader/main.cc +++ b/test/loader/main.cc @@ -1,5 +1,5 @@ -#include #include "CLI/CLI11.hpp" +#include #include "async-profiler/codeCache.h" #include "async-profiler/symbols.h" @@ -25,7 +25,7 @@ static void sigsegv_handler(int sig, siginfo_t *si, void *uc) { exit(-1); } -static void install_segfault_handler(){ +static void install_segfault_handler() { struct sigaction sigaction_handlers = {}; sigaction_handlers.sa_sigaction = sigsegv_handler; sigaction_handlers.sa_flags = SA_SIGINFO; @@ -41,9 +41,12 @@ int main(int argc, char *argv[]) { std::vector exec_args; std::string lib_str; - app.add_option("--lib", lib_str, "Library to open")->required()->check(CLI::ExistingFile); + app.add_option("--lib", lib_str, "Library to open") + ->required() + ->check(CLI::ExistingFile); CLI11_PARSE(app, argc, argv); printf("Welcome to a library loader using the async profiler\n"); + void *handle = dlopen(lib_str.c_str(), RTLD_NOW); if (!handle) { printf("Error opening the library \n"); @@ -53,6 +56,13 @@ int main(int argc, char *argv[]) { CodeCacheArray cache_arary; Symbols::parseLibraries(&cache_arary, false); } + printf("--------------------\n"); + { + CodeCacheArray cache_arary; + Symbols::parsePidLibraries(getpid(), &cache_arary, false); + } + + sleep(10); printf("Closing gracefully\n"); dlclose(handle); return 0; diff --git a/test/loader/note.txt b/test/loader/note.txt new file mode 100644 index 000000000..c0aebaef8 --- /dev/null +++ b/test/loader/note.txt @@ -0,0 +1,96 @@ +58f2362ea000-58f2362f2000 r--p 00000000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load +58f2362f2000-58f23634b000 r-xp 00008000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load +58f23634b000-58f23636f000 r--p 00061000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load +58f23636f000-58f236371000 r--p 00084000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load +58f236371000-58f236372000 rw-p 00086000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load +58f236372000-58f23637a000 rw-p 00000000 00:00 0 +58f236914000-58f236bae000 rw-p 00000000 00:00 0 [heap] +79c36262f000-79c36265e000 rw-p 00000000 00:00 0 +79c363da0000-79c363dfe000 rw-p 00000000 00:00 0 +79c3643f2000-79c364453000 rw-p 00000000 00:00 0 +79c364453000-79c3644b4000 rw-p 00000000 00:00 0 +79c3644e5000-79c364577000 rw-p 00000000 00:00 0 +79c364577000-79c36458b000 r--p 00000000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +79c36458b000-79c364652000 r-xp 00014000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +79c364652000-79c3662b3000 r--p 000db000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +79c3662b3000-79c3662b4000 ---p 01d3c000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +79c3662b4000-79c3662bb000 r--p 01d3c000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +79c3662bb000-79c3662bc000 rw-p 01d43000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so + +79c364577000 +Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +image_base = 0x79c36458b000, map.offs() = 0x14000, last_readable_base = 0x79c364577000 + + + +// mmap at 7e1003515000 - size = 5880 +// Create dwarf with 7e1002c00000 - at:7e1003518354 +// GNU_EH_FRAME 0x003354 0x0000000000003354 0x0000000000003354 0x000144 0x000144 R 0x4 + + +// Create dwarf with 7ffff7800000 - at:7ffff7fb8354 +// 0x7ffff81b91f4 + +// 7ffff8 --> wtf + +// Create dwarf with 0x0x555555554000 - at:0x0x555555858c00 + + +Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut +mmap at 7ffff75a0000 - size = 29e968 +d000 - d000 (vaddr - p_offset) +vaddr from get_elf_offset: d000 +biais offset get_elf_offset: 0 +text base from get_elf_offset: f480 +.eh_frame_hdr section found at offset 0xaf120, size 38492 +Create dwarf with 0x0x555555554000 - at:0x0x555555858c00 +fde count = 4810 + + +Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut +image_base = 0x61043db82000, map.offs() = 0xd000, last_readable_base = 0x61043db75000 +Setting text base = 0x61043db75000 +relent = 24 +GOT start == 0x61043dc5c6c0 +Create dwarf with 61043db75000 - at:61043dc24120 +fde count = 4810 +Created a number of dwarf entries = 19700 +Considered 6429 symbols + +eh_frame_hdr = 0x60adf2e6ada0, +eh_frame = 0x60adf2e74410 + +eh_frame loaded just after the eh_frame hdr + + + +Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut +image_base = 0x555555561000, map.offs() = 0xd000, last_readable_base = 0x555555554000 +Setting text base = 0x555555554000 +relent = 24 +GOT start == 0x55555563c6c0 +Create dwarf with 555555554000 - at:555555604620 +eh_frame_ptr_enc = 1b +table_enc = 3b +fde count = 4817 +ptr = 55555560dce8, table offset = 96c8 +Change pointer to 55555560dcec - 34 +Created a number of dwarf entries = 19721 +Considered 6454 symbols + + +Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut +mmap at 7ffff7591000 - size = 2adf78 +d000 - d000 (vaddr - p_offset) +vaddr from get_elf_offset: d000 +biais offset get_elf_offset: 0 +text base from get_elf_offset: f480 +.eh_frame_hdr section found at offset 0xb0620, size 38548 +.eh_frame section found at offset 0xb9cb8, size 162072 +eh_frame_hdr = 0x555555859f50, eh_frame = 0x5555558635f0 +Create dwarf with base:0x555555554000 - eh_frame_hdr:0x555555859f50 +eh_frame_ptr_enc = 1b +table_enc = 3b +fde count = 4817 +ptr = 555555863618, table offset = 96c8 +Change pointer to 55555586361c - 0 From a5cbca1cdaa314583b1a644ab8b8307b964ef986 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 6 Jan 2023 10:30:09 +0100 Subject: [PATCH 21/29] Async profiler Adjust variables with start of loaded file --- include/async-profiler/dwarf.h | 6 ++ include/async-profiler/elf_helpers.h | 5 +- src/async-profiler/dwarf.cpp | 7 +- src/async-profiler/elf_helpers.cpp | 7 +- src/async-profiler/symbols_linux.cpp | 45 ++++---- test/loader/note.txt | 147 ++++++++++++--------------- 6 files changed, 114 insertions(+), 103 deletions(-) diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h index 3a7524350..9bb15384f 100644 --- a/include/async-profiler/dwarf.h +++ b/include/async-profiler/dwarf.h @@ -140,6 +140,12 @@ class DwarfParser { DwarfParser(const char *name, const char *image_base, const char *eh_frame_hdr); + // manual parse of fde + DwarfParser(const char *name, const char *image_base); + + + void addFde(const char* fde, const char *cie); + FrameDesc *table() const { return _table; } int count() const { return _count; } diff --git a/include/async-profiler/elf_helpers.h b/include/async-profiler/elf_helpers.h index dd97054b1..aeb1ce6d1 100644 --- a/include/async-profiler/elf_helpers.h +++ b/include/async-profiler/elf_helpers.h @@ -5,9 +5,10 @@ struct Elf; bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &elf_offset, Offset_t &bias_offset, Offset_t &text_base); -const char* get_section_data(Elf *elf, const char *section_name); - +const char* get_section_data(Elf *elf, const char *section_name, + Offset_t &elf_offset); bool process_fdes(Elf *elf); diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index 7f3e53164..e92ebb436 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -147,8 +147,13 @@ void DwarfParser::parseFde() { parseCie(); _ptr = fde_start + 4; } - // ptr - base = offset to the FDE + u32 range_start = getPtr() - _image_base; + if (_count == 0) { + printf("Dwarf range start: %lx (ptr) - %lx (image) = %lx \n", getPtr(), _image_base, + range_start); + } + u32 range_len = get32(); _ptr += getLeb(); parseInstructions(range_start, fde_start + fde_len); diff --git a/src/async-profiler/elf_helpers.cpp b/src/async-profiler/elf_helpers.cpp index 9e470f174..5bf81b356 100644 --- a/src/async-profiler/elf_helpers.cpp +++ b/src/async-profiler/elf_helpers.cpp @@ -11,7 +11,7 @@ #define LG_WRN(args...) printf(args) -const char* get_section_data(Elf *elf, const char *section_name) { +const char* get_section_data(Elf *elf, const char *section_name, Offset_t &elf_offset) { // Get the string table index for the section header strings size_t shstrndx; if (elf_getshdrstrndx(elf, &shstrndx) != 0) { @@ -40,6 +40,7 @@ const char* get_section_data(Elf *elf, const char *section_name) { if (strcmp(name, section_name) == 0) { printf("%s section found at offset 0x%lx, size %ld\n", section_name, shdr.sh_offset, shdr.sh_size); // Get the data for the .eh_frame section + elf_offset = shdr.sh_offset; Elf_Data *data = elf_getdata(scn, NULL); if (data == NULL) { return nullptr; @@ -53,6 +54,7 @@ const char* get_section_data(Elf *elf, const char *section_name) { } bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &elf_offset, Offset_t &bias_offset, Offset_t &text_base) { vaddr = 0; bias_offset = 0; @@ -90,6 +92,7 @@ bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, bias_offset = ph->p_vaddr - ph->p_offset; printf("%lx - %lx (vaddr - p_offset) \n", ph->p_vaddr, ph->p_offset); + elf_offset = ph->p_offset; found_exec = true; } else { // There can be multiple executable load segments. @@ -157,7 +160,7 @@ bool process_fdes(Elf *elf) { break; } - printf("cfi id = %lx\n", entry); +// printf("cfi id = %lx\n", entry); // Process the CFI record // ... diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 05e377107..578bcdffb 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -186,7 +186,7 @@ class ElfParser { void parseDynamicSection(); void parseDwarfInfo(); - void parseDwarfInfoRemote(const char *eh_frame_data); + void parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote); void loadSymbols(bool use_debug); bool loadSymbolsUsingBuildId(); bool loadSymbolsUsingDebugLink(); @@ -291,6 +291,7 @@ bool ElfParser::parseProgramHeadersRemote(Elf *elf, CodeCache *cc, void ElfParser::parseProgramHeaders(CodeCache *cc, const char *base) { ElfParser elf(cc, base, base); + if (elf.validHeader()) { printf("Setting text base = %p \n", base); cc->setTextBase(base); @@ -375,9 +376,9 @@ void ElfParser::parseDynamicSection() { } } -void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data) { +void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote) { printf("Create dwarf with base:%p - eh_frame_hdr:%p\n", _base, eh_frame_data); - DwarfParser dwarf(_cc->name(), _base, eh_frame_data); + DwarfParser dwarf(_cc->name(), base_remote, eh_frame_data); _cc->setDwarfTable(dwarf.table(), dwarf.count()); printf("Created a number of dwarf entries = %d \n", dwarf.count()); } @@ -647,9 +648,10 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, continue; } size_t length = (size_t)lseek64(fd, 0, SEEK_END); + // todo : remove the mmap void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); printf("mmap at %lx - size = %lx\n", addr, length); - Elf *elf = elf_begin(fd, ELF_C_READ, NULL); + Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL || addr == MAP_FAILED) { LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf, addr); @@ -658,30 +660,41 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, Offset_t biais_offset; ElfAddress_t vaddr; ElfAddress_t text_base; + Offset_t elf_offset; + // Compute how to convert a process address - if (get_elf_offsets(elf, map.file(), vaddr, biais_offset, text_base)) { + if (get_elf_offsets(elf, map.file(), vaddr, elf_offset, biais_offset, text_base)) { printf("vaddr from get_elf_offset: %lx \n", vaddr); printf("biais offset get_elf_offset: %lx \n", biais_offset); printf("text base from get_elf_offset: %lx \n", text_base); + printf("offset from get_elf_offset: %lx \n", elf_offset); + printf("last readable: %lx \n", last_readable_base); } // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa - if ((image_base -= vaddr) >= last_readable_base) { + // image base is used to offset + if ((image_base -= map.offs()) >= last_readable_base) { + printf("image base = %lx \n", image_base); ElfParser elf_remote(cc, image_base, addr); if (elf_remote.validHeader()) { - // text base is supposed to be the entry point - cc->setTextBase(image_base + text_base); - const char* eh_frame_data_hdr = get_section_data(elf, ".eh_frame_hdr"); - const char* eh_frame_data = get_section_data(elf, ".eh_frame"); + Offset_t eh_frame_hdr_data_offset; + Offset_t eh_frame_data_offset; + // todo: This should be something with the biais instead + cc->setTextBase(image_base); + const char* eh_frame_data_hdr = get_section_data(elf, ".eh_frame_hdr", + eh_frame_hdr_data_offset); + const char* eh_frame_data = get_section_data(elf, ".eh_frame", + eh_frame_data_offset); printf("eh_frame_hdr = %p, eh_frame = %p \n", eh_frame_data_hdr, eh_frame_data); if (eh_frame_data_hdr) { - process_fdes(elf); - elf_remote.parseDwarfInfoRemote(eh_frame_data_hdr); + elf_remote.parseDwarfInfoRemote(eh_frame_data_hdr, + eh_frame_data_hdr - eh_frame_hdr_data_offset); } - -// elf_remote.parseDwarfInfo(); + } + else { + printf("Not a valid header - %s\n", map.file()); } } ElfParser::parseFile(cc, image_base, map.file(), true); @@ -760,6 +773,7 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { printf("image_base = %p, map.offs() = %p, last_readable_base = %p \n", image_base, map.offs(), last_readable_base); // todo - read the biais from the vaddr field (open file?) + if ((image_base -= map.offs()) >= last_readable_base) { ElfParser::parseProgramHeaders(cc, image_base); } @@ -768,13 +782,10 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { } else if (strcmp(map.file(), "[vdso]") == 0) { ElfParser::parseMem(cc, image_base); } - cc->sort(); array->add(cc); - break; } } - free(str); fclose(f); } diff --git a/test/loader/note.txt b/test/loader/note.txt index c0aebaef8..66a4d5cf0 100644 --- a/test/loader/note.txt +++ b/test/loader/note.txt @@ -1,96 +1,81 @@ -58f2362ea000-58f2362f2000 r--p 00000000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load -58f2362f2000-58f23634b000 r-xp 00008000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load -58f23634b000-58f23636f000 r--p 00061000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load -58f23636f000-58f236371000 r--p 00084000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load -58f236371000-58f236372000 rw-p 00086000 fd:06 34734311 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/loader/async_prof_load -58f236372000-58f23637a000 rw-p 00000000 00:00 0 -58f236914000-58f236bae000 rw-p 00000000 00:00 0 [heap] -79c36262f000-79c36265e000 rw-p 00000000 00:00 0 -79c363da0000-79c363dfe000 rw-p 00000000 00:00 0 -79c3643f2000-79c364453000 rw-p 00000000 00:00 0 -79c364453000-79c3644b4000 rw-p 00000000 00:00 0 -79c3644e5000-79c364577000 rw-p 00000000 00:00 0 -79c364577000-79c36458b000 r--p 00000000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -79c36458b000-79c364652000 r-xp 00014000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -79c364652000-79c3662b3000 r--p 000db000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -79c3662b3000-79c3662b4000 ---p 01d3c000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -79c3662b4000-79c3662bb000 r--p 01d3c000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -79c3662bb000-79c3662bc000 rw-p 01d43000 fd:06 25825200 /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so +image_base = 0x407000, map.offs() = 0x7000, last_readable_base = 0x400000 -79c364577000 -Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/libdd_profiling.so -image_base = 0x79c36458b000, map.offs() = 0x14000, last_readable_base = 0x79c364577000 +// Adjust ptr 407020 - 400000 = 7020 +// During stack walking, we do: +// pc - text_base +// base is set at image_base - offset +// It should be biais instead... -// mmap at 7e1003515000 - size = 5880 -// Create dwarf with 7e1002c00000 - at:7e1003518354 -// GNU_EH_FRAME 0x003354 0x0000000000003354 0x0000000000003354 0x000144 0x000144 R 0x4 +// Remote... +// pc - text_base +// So text base should still be the same +// Adjust ptr 75bc67ddf020 - 400000 +// +// Adjust between file eh_frame_hdr and binary +// 75bc67ddf020 - +// (eh_frame_hdr - offset = base) -// Create dwarf with 7ffff7800000 - at:7ffff7fb8354 -// 0x7ffff81b91f4 +.eh_frame_hdr section found at offset 0x1373880, size 349044 +.eh_frame section found at offset 0x13c8d10, size 3201136 +eh_frame_hdr = 0x75bc667ad880, eh_frame = 0x75bc66802d10 +--> 75BC6543A000 -// 7ffff8 --> wtf +75BC67DD8000 -// Create dwarf with 0x0x555555554000 - at:0x0x555555858c00 +// Adjust between file and loaded mem + + eh_frame_hdr.vaddr - eh_frame_hdr.offset - -Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut -mmap at 7ffff75a0000 - size = 29e968 -d000 - d000 (vaddr - p_offset) -vaddr from get_elf_offset: d000 -biais offset get_elf_offset: 0 -text base from get_elf_offset: f480 -.eh_frame_hdr section found at offset 0xaf120, size 38492 -Create dwarf with 0x0x555555554000 - at:0x0x555555858c00 -fde count = 4810 - - -Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut -image_base = 0x61043db82000, map.offs() = 0xd000, last_readable_base = 0x61043db75000 -Setting text base = 0x61043db75000 -relent = 24 -GOT start == 0x61043dc5c6c0 -Create dwarf with 61043db75000 - at:61043dc24120 -fde count = 4810 -Created a number of dwarf entries = 19700 -Considered 6429 symbols - -eh_frame_hdr = 0x60adf2e6ada0, -eh_frame = 0x60adf2e74410 - -eh_frame loaded just after the eh_frame hdr - - - -Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut -image_base = 0x555555561000, map.offs() = 0xd000, last_readable_base = 0x555555554000 -Setting text base = 0x555555554000 +eh_frame_hdr = 0x75bc67e520e0, eh_frame = 0x75bc67e584e8 +Create dwarf with base:0x400000 - eh_frame_hdr:0x75bc67e520e0 +eh_frame_ptr_enc = 1b +table_enc = 3b +fde count = 3199 +ptr = 75bc67e58528, table offset = 6448 +Change pointer to 75bc67e5852c - 44 +Adjust ptr 75bc67ddf020 - 400000 +==> 75bc67ddf020 - 75BC67DD8000 += 7020 + +Considering /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.60.11 +image_base = 0x718900600000, map.offs() = (nil), last_readable_base = 0x718900600000 +Setting text base = 0x718900600000 relent = 24 -GOT start == 0x55555563c6c0 -Create dwarf with 555555554000 - at:555555604620 +GOT start == 0x718901f60018 +Create dwarf with 718900600000 - at:718901973880 eh_frame_ptr_enc = 1b table_enc = 3b -fde count = 4817 -ptr = 55555560dce8, table offset = 96c8 -Change pointer to 55555560dcec - 34 -Created a number of dwarf entries = 19721 -Considered 6454 symbols - - -Considering /home/r1viollet/go/src/github.com/DataDog/ddprof_2/build_gcc_unknown-linux-2.35_Deb/test/dwarf_unwind-ut -mmap at 7ffff7591000 - size = 2adf78 -d000 - d000 (vaddr - p_offset) -vaddr from get_elf_offset: d000 +fde count = 43629 +ptr = 718901bc8d28, table offset = 2554a8 +Change pointer to 718901bc8d2c - 1c +718901bc99f4 (ptr) - 718900600000 (image) = c9d00 +Created a number of dwarf entries = 326475 +Considered 14 symbols + + +Considering /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.60.11 +mmap at 7188fe39a000 - size = 17e26d8 +0 - 0 (vaddr - p_offset) +vaddr from get_elf_offset: 0 biais offset get_elf_offset: 0 -text base from get_elf_offset: f480 -.eh_frame_hdr section found at offset 0xb0620, size 38548 -.eh_frame section found at offset 0xb9cb8, size 162072 -eh_frame_hdr = 0x555555859f50, eh_frame = 0x5555558635f0 -Create dwarf with base:0x555555554000 - eh_frame_hdr:0x555555859f50 +text base from get_elf_offset: ca9d0 +offset from get_elf_offset: 0 +last readable: 718900600000 +image base = 718900600000 +.eh_frame_hdr section found at offset 0x1373880, size 349044 +.eh_frame section found at offset 0x13c8d10, size 3201136 +eh_frame_hdr = 0x7188fdf2a880, eh_frame = 0x7188fdf7fd10 +Create dwarf with base:0x718900600000 - eh_frame_hdr:0x7188fdf2a880 eh_frame_ptr_enc = 1b table_enc = 3b -fde count = 4817 -ptr = 555555863618, table offset = 96c8 -Change pointer to 55555586361c - 0 +fde count = 43629 +ptr = 7188fe17fd28, table offset = 2554a8 +Change pointer to 7188fe17fd2c - 104 +7188fe17fd36 (ptr) - 7188fcbb7000 (image) = 57c9b30 +loader[2949720]: has encountered an error and will exit + + +so eh_fram + From 495d0d369a8c98825f063ca8a7cf9dd738f94cf3 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Fri, 6 Jan 2023 16:27:07 +0100 Subject: [PATCH 22/29] Adjust the offset to the eh_frame section --- include/async-profiler/dwarf.h | 7 +- include/async-profiler/elf_helpers.h | 28 ++++++- src/async-profiler/dwarf.cpp | 18 ++--- src/async-profiler/elf_helpers.cpp | 105 +++++++++++++++++++++++---- src/async-profiler/symbols_linux.cpp | 80 +++++++++++--------- test/dwarf_unwind-ut.cc | 8 +- test/loader/note.txt | 105 ++++++++++++++++++++++++++- 7 files changed, 278 insertions(+), 73 deletions(-) diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h index 9bb15384f..a001500b9 100644 --- a/include/async-profiler/dwarf.h +++ b/include/async-profiler/dwarf.h @@ -127,7 +127,7 @@ class DwarfParser { return ptr + *(int *)add(4); } - void parse(const char *eh_frame_hdr); + void parse(const char *eh_frame_hdr, u64 adjust_eh_frame); void parseCie(); void parseFde(); void parseInstructions(u32 loc, const char *end); @@ -138,13 +138,12 @@ class DwarfParser { public: DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr); + const char *eh_frame_hdr, u64 adjust_eh_frame = 0); // manual parse of fde DwarfParser(const char *name, const char *image_base); - - void addFde(const char* fde, const char *cie); + void addFde(const char *fde, const char *cie); FrameDesc *table() const { return _table; } diff --git a/include/async-profiler/elf_helpers.h b/include/async-profiler/elf_helpers.h index aeb1ce6d1..07b36f887 100644 --- a/include/async-profiler/elf_helpers.h +++ b/include/async-profiler/elf_helpers.h @@ -4,11 +4,31 @@ struct Elf; +struct SectionInfo { + const char *_data; + Offset_t _offset; + ElfAddress_t _vaddr_sec; +}; + +// To adjust addresses inside the eh_frame_hdr +// If we are in different segments, we should consider +// (vaddr_eh_frame - vaddr_eh_frame_hdr) +// + (offset_eh_frame - offset_eh_frame_hdr) +struct EhFrameInfo { + SectionInfo _eh_frame; + SectionInfo _eh_frame_hdr; +}; + bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, - Offset_t &elf_offset, - Offset_t &bias_offset, Offset_t &text_base); + Offset_t &elf_offset, Offset_t &bias_offset, + Offset_t &text_base); -const char* get_section_data(Elf *elf, const char *section_name, +const char *get_section_data(Elf *elf, const char *section_name, Offset_t &elf_offset); -bool process_fdes(Elf *elf); +bool get_section_info(Elf *elf, const char *section_name, + SectionInfo §ion_info); + +bool get_eh_frame_info(Elf *elf, EhFrameInfo &eh_frame_info); + +bool process_fdes(Elf *elf); diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index e92ebb436..f1f663c94 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -73,7 +73,7 @@ FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, -2 * DW_STACK_SLOT}; DwarfParser::DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr) { + const char *eh_frame_hdr, u64 adjust_eh_frame) { _name = name; _image_base = image_base; @@ -85,10 +85,10 @@ DwarfParser::DwarfParser(const char *name, const char *image_base, _code_align = sizeof(instruction_t); _data_align = -(int)sizeof(void *); - parse(eh_frame_hdr); + parse(eh_frame_hdr, adjust_eh_frame); } -void DwarfParser::parse(const char *eh_frame_hdr) { +void DwarfParser::parse(const char *eh_frame_hdr, u64 adjust_eh_frame) { u8 version = eh_frame_hdr[0]; u8 eh_frame_ptr_enc = eh_frame_hdr[1]; u8 fde_count_enc = eh_frame_hdr[2]; @@ -102,12 +102,12 @@ void DwarfParser::parse(const char *eh_frame_hdr) { } int fde_count = *(int *)(eh_frame_hdr + 8); -#ifdef DEBUG +#ifdef DEBUG printf("fde count = %d \n", fde_count); #endif int *table = (int *)(eh_frame_hdr + 16); for (int i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2]; + _ptr = eh_frame_hdr + table[i * 2] - adjust_eh_frame; if (i == 0) { printf("ptr = %lx, table offset = %lx \n", _ptr, table[i * 2]); } @@ -132,7 +132,7 @@ void DwarfParser::parseCie() { void DwarfParser::parseFde() { u32 fde_len = get32(); -// printf("fde len = %u \n", fde_len); + // printf("fde len = %u \n", fde_len); if (fde_len == 0 || fde_len == 0xffffffff) { return; } @@ -140,7 +140,7 @@ void DwarfParser::parseFde() { const char *fde_start = _ptr; u32 cie_offset = get32(); if (_count == 0) { -#ifdef DEBUG +#ifdef DEBUG printf("Change pointer to %lx - %lx \n", fde_start, cie_offset); #endif _ptr = fde_start - cie_offset; @@ -150,8 +150,8 @@ void DwarfParser::parseFde() { u32 range_start = getPtr() - _image_base; if (_count == 0) { - printf("Dwarf range start: %lx (ptr) - %lx (image) = %lx \n", getPtr(), _image_base, - range_start); + printf("Dwarf range start: %lx (ptr) - %lx (image) = %lx \n", getPtr(), + _image_base, range_start); } u32 range_len = get32(); diff --git a/src/async-profiler/elf_helpers.cpp b/src/async-profiler/elf_helpers.cpp index 5bf81b356..e88595f90 100644 --- a/src/async-profiler/elf_helpers.cpp +++ b/src/async-profiler/elf_helpers.cpp @@ -6,16 +6,19 @@ #include #include -#include #include +#include #define LG_WRN(args...) printf(args) -const char* get_section_data(Elf *elf, const char *section_name, Offset_t &elf_offset) { +const char *get_section_data(Elf *elf, const char *section_name, + Offset_t &elf_offset) { // Get the string table index for the section header strings size_t shstrndx; if (elf_getshdrstrndx(elf, &shstrndx) != 0) { - fprintf(stderr, "Failed to get string table index for section header strings: %s\n", elf_errmsg(-1)); + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); return nullptr; } @@ -38,24 +41,82 @@ const char* get_section_data(Elf *elf, const char *section_name, Offset_t &elf_o // Check if the section is the .eh_frame section if (strcmp(name, section_name) == 0) { - printf("%s section found at offset 0x%lx, size %ld\n", section_name, shdr.sh_offset, shdr.sh_size); + printf("%s section found at offset 0x%lx, size %ld\n", section_name, + shdr.sh_offset, shdr.sh_size); // Get the data for the .eh_frame section elf_offset = shdr.sh_offset; Elf_Data *data = elf_getdata(scn, NULL); if (data == NULL) { + fprintf(stderr, "Unable to find section data: %s\n", section_name); return nullptr; } else { - return reinterpret_cast(data->d_buf); + return reinterpret_cast(data->d_buf); } } } + fprintf(stderr, "Failed to find section: %s\n", section_name); return nullptr; } +bool get_section_info(Elf *elf, const char *section_name, + SectionInfo §ion_info) { + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); + return false; + } + + // Iterate over the sections and find the .eh_frame section + Elf_Scn *scn = NULL; + bool found = false; + GElf_Shdr shdr; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + // Get the section header for the current section + if (gelf_getshdr(scn, &shdr) != &shdr) { + fprintf(stderr, "Failed to get section header: %s\n", elf_errmsg(-1)); + return false; + } + + // Get the name of the current section + char *name = elf_strptr(elf, shstrndx, shdr.sh_name); + if (name == NULL) { + fprintf(stderr, "Failed to get section name: %s\n", elf_errmsg(-1)); + return false; + } + + // Check if the section is the .eh_frame section + if (strcmp(name, section_name) == 0) { + printf("%s section found at offset 0x%lx, size %ld, vaddr %lx\n", + section_name, shdr.sh_offset, shdr.sh_size, shdr.sh_addr); + // Get the data for the .eh_frame section + Elf_Data *data = elf_getdata(scn, NULL); + if (data == NULL) { + fprintf(stderr, "Unable to find section data: %s\n", section_name); + return false; + } else { + section_info._data = reinterpret_cast(data->d_buf); + section_info._offset = shdr.sh_offset; + section_info._vaddr_sec = shdr.sh_addr; + found = true; + } + } + } + if (!found) { + fprintf(stderr, "Failed to find section: %s\n", section_name); + return false; + } + + return true; +} + bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, - Offset_t &elf_offset, - Offset_t &bias_offset, Offset_t &text_base) { + Offset_t &elf_offset, Offset_t &bias_offset, + Offset_t &text_base) { vaddr = 0; bias_offset = 0; GElf_Ehdr ehdr_mem; @@ -116,7 +177,17 @@ bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, return found_exec; } +bool get_eh_frame_info(Elf *elf, EhFrameInfo &eh_frame_info) { + if (!get_section_info(elf, ".eh_frame_hdr", eh_frame_info._eh_frame_hdr)) { + return false; + } + if (!get_section_info(elf, ".eh_frame", eh_frame_info._eh_frame)) { + return false; + } + return true; +} +// correct way of parsing the FDEs bool process_fdes(Elf *elf) { Elf_Scn *scn = NULL; Elf_Data *data = NULL; @@ -125,14 +196,17 @@ bool process_fdes(Elf *elf) { // Get the string table index for the section header strings size_t shstrndx; if (elf_getshdrstrndx(elf, &shstrndx) != 0) { - fprintf(stderr, "Failed to get string table index for section header strings: %s\n", elf_errmsg(-1)); + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); return false; } while ((scn = elf_nextscn(elf, scn)) != NULL) { gelf_getshdr(scn, &shdr); - if (shdr.sh_type == SHT_PROGBITS && (strcmp(".debug_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0 || - strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0)) { + if (shdr.sh_type == SHT_PROGBITS && + (strcmp(".debug_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0 || + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0)) { // This is the .debug_frame or .eh_frame section data = elf_getdata(scn, NULL); break; @@ -150,17 +224,16 @@ bool process_fdes(Elf *elf) { Dwarf_Off next_offset; Dwarf_CFI_Entry entry; - int result = dwarf_next_cfi(reinterpret_cast(elf_getident(elf, NULL)), - data, - strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0, - offset, &next_offset, - &entry); + int result = dwarf_next_cfi( + reinterpret_cast(elf_getident(elf, NULL)), data, + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0, + offset, &next_offset, &entry); if (result != 0) { // End of CFI records break; } -// printf("cfi id = %lx\n", entry); + // printf("cfi id = %lx\n", entry); // Process the CFI record // ... diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 578bcdffb..24847913f 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -186,7 +186,8 @@ class ElfParser { void parseDynamicSection(); void parseDwarfInfo(); - void parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote); + void parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote, + Offset_t adjust_eh_frame); void loadSymbols(bool use_debug); bool loadSymbolsUsingBuildId(); bool loadSymbolsUsingDebugLink(); @@ -376,9 +377,11 @@ void ElfParser::parseDynamicSection() { } } -void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote) { +void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data, + const char *base_remote, + Offset_t adjust_eh_frame) { printf("Create dwarf with base:%p - eh_frame_hdr:%p\n", _base, eh_frame_data); - DwarfParser dwarf(_cc->name(), base_remote, eh_frame_data); + DwarfParser dwarf(_cc->name(), base_remote, eh_frame_data, adjust_eh_frame); _cc->setDwarfTable(dwarf.table(), dwarf.count()); printf("Created a number of dwarf entries = %d \n", dwarf.count()); } @@ -515,8 +518,9 @@ void ElfParser::loadSymbolTable(ElfSection *symtab) { // Skip special AArch64 mapping symbols: $x and $d if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { -// printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + sym->st_name, -// _base + sym->st_value, _base); + // printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + + // sym->st_name, + // _base + sym->st_value, _base); _cc->add(_base + sym->st_value, (int)sym->st_size, strings + sym->st_name); ++cpt; @@ -639,7 +643,7 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); unsigned long inode = map.inode(); - printf("Considering %s \n", map.file()); + printf("+++++ Considering %s ++++ \n", map.file()); if (inode != 0) { // remote unwinding int fd = open(map.file(), O_RDONLY); @@ -649,21 +653,18 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, } size_t length = (size_t)lseek64(fd, 0, SEEK_END); // todo : remove the mmap - void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); - printf("mmap at %lx - size = %lx\n", addr, length); Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); - if (elf == NULL || addr == MAP_FAILED) { - LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf, - addr); + if (elf == NULL) { + LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf); goto continue_loop; } Offset_t biais_offset; ElfAddress_t vaddr; - ElfAddress_t text_base; + ElfAddress_t text_base; // not used Offset_t elf_offset; - // Compute how to convert a process address - if (get_elf_offsets(elf, map.file(), vaddr, elf_offset, biais_offset, text_base)) { + if (get_elf_offsets(elf, map.file(), vaddr, elf_offset, biais_offset, + text_base)) { printf("vaddr from get_elf_offset: %lx \n", vaddr); printf("biais offset get_elf_offset: %lx \n", biais_offset); printf("text base from get_elf_offset: %lx \n", text_base); @@ -674,27 +675,38 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa - // image base is used to offset + // todo: This should be something with the biais instead if ((image_base -= map.offs()) >= last_readable_base) { - printf("image base = %lx \n", image_base); - ElfParser elf_remote(cc, image_base, addr); - if (elf_remote.validHeader()) { - Offset_t eh_frame_hdr_data_offset; - Offset_t eh_frame_data_offset; - // todo: This should be something with the biais instead - cc->setTextBase(image_base); - const char* eh_frame_data_hdr = get_section_data(elf, ".eh_frame_hdr", - eh_frame_hdr_data_offset); - const char* eh_frame_data = get_section_data(elf, ".eh_frame", - eh_frame_data_offset); - printf("eh_frame_hdr = %p, eh_frame = %p \n", eh_frame_data_hdr, eh_frame_data); - if (eh_frame_data_hdr) { - elf_remote.parseDwarfInfoRemote(eh_frame_data_hdr, - eh_frame_data_hdr - eh_frame_hdr_data_offset); - } + // process elf info + EhFrameInfo eh_frame_info = {}; + if (!get_eh_frame_info(elf, eh_frame_info)) { + printf("Failed to retrieve eh frame info\n"); } - else { - printf("Not a valid header - %s\n", map.file()); + const char *elf_base = eh_frame_info._eh_frame_hdr._data - + eh_frame_info._eh_frame_hdr._offset; + // this is used during unwinding to offset PC to dwarf instructions + cc->setTextBase(image_base); + printf("image base = %lx \n", image_base); + + if (eh_frame_info._eh_frame_hdr._data) { + // todo: is this always valid ? + ElfParser elf_remote(cc, image_base, elf_base); + + // (vaddr_eh_frame - vaddr_eh_frame_hdr) - (offset_sec_1 - + // offset_sec_2) + Offset_t adjust_eh_frame = + (eh_frame_info._eh_frame._vaddr_sec - + eh_frame_info._eh_frame_hdr._vaddr_sec) - + (eh_frame_info._eh_frame._offset - + eh_frame_info._eh_frame_hdr._offset); + printf("adjust eh_frame %lx \n", adjust_eh_frame); + elf_remote.parseDwarfInfoRemote( + eh_frame_info._eh_frame_hdr._data, + eh_frame_info._eh_frame_hdr._data - + eh_frame_info._eh_frame_hdr._offset, + adjust_eh_frame); + } else { + printf("No EH Frame data - %s\n", map.file()); } } ElfParser::parseFile(cc, image_base, map.file(), true); @@ -703,13 +715,11 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, continue_loop: close(fd); elf_end(elf); // no-op if null - munmap(addr, length); } else if (strcmp(map.file(), "[vdso]") == 0) { // find our self address for vdso const char *addr_vdso = ElfParser::get_self_vdso(); ElfParser::parseMemRemote(cc, image_base, addr_vdso); } - cc->sort(); array->add(cc); } diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 058dfaa4c..dfa219a44 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -29,14 +29,14 @@ DDPROF_NOINLINE size_t funcA(std::array ®s); DDPROF_NOINLINE size_t funcB(std::array ®s); size_t funcB(std::array ®s) { - printf("dwarf_unwind-ut:%s %lx \n",__FUNCTION__, _THIS_IP_); + printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_); size_t size = save_context(retrieve_stack_end_address(), regs, stack); return size; } size_t funcA(std::array ®s) { - printf("dwarf_unwind-ut:%s %lx \n",__FUNCTION__, _THIS_IP_); + printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_); return funcB(regs); } @@ -111,8 +111,8 @@ TEST(dwarf_unwind, remote) { pid_t temp_pid = fork(); if (!temp_pid) { func_intermediate_0(10); -// char *const argList[] = {"sleep", "10", nullptr}; -// execvp("sleep", argList); + // char *const argList[] = {"sleep", "10", nullptr}; + // execvp("sleep", argList); return; } diff --git a/test/loader/note.txt b/test/loader/note.txt index 66a4d5cf0..bd97ae5fe 100644 --- a/test/loader/note.txt +++ b/test/loader/note.txt @@ -77,5 +77,108 @@ Change pointer to 7188fe17fd2c - 104 loader[2949720]: has encountered an error and will exit -so eh_fram +it should be something like + +// eh_frame_hdr +eh_frame_hdr->offset + +// eh_frame +eh_frame->offset + + OffSeg1 OffSeg2 +// OffsetScn1 [x] OffsetScn2 [Y] + +What we expect + +// VAddr1 [x] VAddr2 [Y] + + + +// off + (vaddr1 - offset1) - (vaddr2 - offset2) + + +x - offset 1 + +Considering /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.60.11 +image_base = 0x7bb4bb800000, map.offs() = (nil), last_readable_base = 0x7bb4bb800000 +Setting text base = 0x7bb4bb800000 +relent = 24 +GOT start == 0x7bb4bd160018 +Create dwarf with 7bb4bb800000 - at:7bb4bcb73880 +eh_frame_ptr_enc = 1b +table_enc = 3b +fde count = 43629 +ptr = 7bb4bcdc8d28, table offset = 2554a8 +Change pointer to 7bb4bcdc8d2c - 1c +Dwarf range start: 7bb4bcdc99f4 (ptr) - 7bb4bb800000 (image) = c9d00 +Created a number of dwarf entries = 326475 +Considered 14 symbols + + +Considering /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.60.11 +0 - 0 (vaddr - p_offset) +vaddr from get_elf_offset: 0 +biais offset get_elf_offset: 0 +text base from get_elf_offset: ca9d0 +offset from get_elf_offset: 0 +last readable: 7bb4bb800000 +.eh_frame_hdr section found at offset 0x1373880, size 349044, vaddr 1373880 +.eh_frame section found at offset 0x13c8d10, size 3201136, vaddr 15c8d10 +.eh_frame_hdr section found at offset 0x1373880, size 349044 +.eh_frame section found at offset 0x13c8d10, size 3201136 +eh_frame_hdr = 0x7bb4ba84c880, eh_frame = 0x7bb4ba8a1d10 +image base = 7bb4bb800000 +adjust eh_frame 200000 +Create dwarf with base:0x7bb4bb800000 - eh_frame_hdr:0x7bb4ba84c880 +eh_frame_ptr_enc = 1b +table_enc = 3b +fde count = 43629 +ptr = 7bb4baca1d28, table offset = 2554a8 +Change pointer to 7bb4baca1d2c - 5f5e544a + + + +-- backup code -- + + + // Look for matching segments + GElf_Ehdr ehdr_mem; + GElf_Ehdr *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (ehdr == nullptr) { + LG_WRN("Unable to retrieve elf header"); + return false; + } + size_t phnum; + if (unlikely(elf_getphdrnum(elf, &phnum) != 0)) { + LG_WRN("Unable to retrieve number of program headers \n"); + return false; + } + bool found_ph = false; + for (size_t i = 0; i < phnum; ++i) { + GElf_Phdr phdr_mem; + // Retrieve the program header + GElf_Phdr *ph = gelf_getphdr(elf, i, &phdr_mem); + if (unlikely(ph == NULL)) { + LG_WRN("Unable to retrieve program header \n"); + return false; + } + printf("Section %lx - segment %lx/size(%lx)=%lx\n", + shdr.sh_offset, ph->p_offset, ph->p_filesz, ph->p_offset + ph->p_filesz); + printf("%s - %s \n", (shdr.sh_offset >= ph->p_offset)?"true":"false", + shdr.sh_offset < (ph->p_offset + ph->p_filesz)?"true":"false"); + // If we are included in this segment + if (shdr.sh_offset >= ph->p_offset && shdr.sh_offset < (ph->p_offset + ph->p_filesz)) { + printf("We found program header for the section %lx, %lx \n", + ph->p_offset, + ph->p_vaddr); + found_ph = true; + section_info._vaddr_seg = ph->p_vaddr; + section_info._offset_seg = ph->p_offset; + break; + } + } + if (!found_ph) { + LG_WRN("Unable to find matching program header \n"); + return false; + } \ No newline at end of file From e8ce23151526a157dae1c4dcd22f5f1ad3d7c4c6 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 9 Jan 2023 11:41:18 +0100 Subject: [PATCH 23/29] Add the show frames to debug unwinding issues --- include/pprof/ddprof_pprof.hpp | 5 ++-- include/unwind_output.hpp | 1 + src/async-profiler/dwarf.cpp | 9 ++++++ src/ddprof_worker.cc | 12 ++++---- src/pprof/ddprof_pprof.cc | 53 ++++++++++------------------------ src/unwind.cc | 49 ++++++++++++++++++++++++++++++- 6 files changed, 82 insertions(+), 47 deletions(-) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index 028cbebf9..0eb593e14 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -29,7 +29,7 @@ struct DDProfPProf { DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext *ctx); DDRes pprof_aggregate_v2(ddprof::span callchain, - CodeCacheArray &cache_arary, uint64_t value, + ddprof::span symbols, uint64_t value, uint64_t count, const PerfWatcher *watcher, DDProfPProf *pprof); @@ -51,6 +51,5 @@ DDRes pprof_write_profile(const DDProfPProf *pprof, int fd); DDRes pprof_free_profile(DDProfPProf *pprof); -void ddprof_print_sample(const UnwindOutput &uw_output, - const SymbolHdr &symbol_hdr, uint64_t value, +void ddprof_print_sample(const UnwindOutput_V2 &uw_output, uint64_t value, const PerfWatcher &watcher); diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 39cd2a2dd..815ffb059 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -29,6 +29,7 @@ typedef struct UnwindOutput { struct UnwindOutput_V2 { const void *callchain[DD_MAX_STACK_DEPTH]; + const char *symbols[DD_MAX_STACK_DEPTH]; uint64_t nb_locs; int pid = {}; int tid = {}; diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index f1f663c94..e45d41f30 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -189,6 +189,15 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { break; case DW_CFA_advance_loc2: addRecord(loc, cfa_reg, cfa_off, fp_off); +#ifdef ALIGN_ISSUES + // 16 bits alignement check + if ((reinterpret_cast(_ptr) & 0xf) != 0) { + // The address is not 16-bit aligned here + printf("Addr is not aligned 0x%lx (code_align=%i)\n", + _ptr, code_align); + exit(1); + } +#endif loc += get16() * code_align; break; case DW_CFA_advance_loc4: diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 77a52c798..fad640549 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -241,14 +241,14 @@ DDRes ddprof_pr_sample(DDProfContext *ctx, perf_event_sample *sample, int i_export = ctx->worker_ctx.i_current_pprof; DDProfPProf *pprof = ctx->worker_ctx.pprof[i_export]; - DDRES_CHECK_FWD(pprof_aggregate_v2( - ddprof::span(us->output.callchain, us->output.nb_locs), - us->code_cache[sample->pid], sample_val, 1, watcher, pprof)); if (ctx->params.show_samples) { - // todo show samples - // ddprof_print_sample(us->output, us->symbol_hdr, sample->period, - // *watcher); + ddprof_print_sample(us->output, sample->period, *watcher); } + + DDRES_CHECK_FWD(pprof_aggregate_v2( + ddprof::span(us->output.callchain, us->output.nb_locs), + ddprof::span(us->output.symbols, us->output.nb_locs), + sample_val, 1, watcher, pprof)); } DDRES_CHECK_FWD(ddprof_stats_add(STATS_AGGREGATION_AVG_TIME, diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 08dab4192..53b1b3e4d 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -185,11 +185,8 @@ static void write_line_v2(const char *func, ddog_Line *ffi_line) { ffi_line->line = 0; } -#include "async-profiler/codeCache.h" -#include "async-profiler/stackWalker.h" - DDRes pprof_aggregate_v2(ddprof::span callchain, - CodeCacheArray &cache_arary, uint64_t value, + ddprof::span symbols, uint64_t value, uint64_t count, const PerfWatcher *watcher, DDProfPProf *pprof) { ddog_Profile *profile = pprof->_profile; @@ -205,19 +202,12 @@ DDRes pprof_aggregate_v2(ddprof::span callchain, ddog_Line line_buff[DD_MAX_STACK_DEPTH]; // todo skip frames - unsigned cur_loc = 0; - for (const void *ip : callchain) { - const char *func = "unknown"; - CodeCache *code_cache = findLibraryByAddress(&cache_arary, ip); - if (code_cache) { - func = code_cache->binarySearch(ip); - } - + for(int i = 0; i < symbols.size(); ++i ) { + assert(i < DD_MAX_STACK_DEPTH); // possibly several lines to handle inlined function (not handled for now) - write_line_v2(func, &line_buff[cur_loc]); - ddog_Slice_line lines = {.ptr = &line_buff[cur_loc], .len = 1}; - write_location_v2(ip, &lines, &locations_buff[cur_loc]); - ++cur_loc; + write_line_v2(symbols[i], &line_buff[i]); + ddog_Slice_line lines = {.ptr = &line_buff[i], .len = 1}; + write_location_v2(callchain[i], &lines, &locations_buff[i]); } ddog_Label labels[PPROF_MAX_LABELS] = {}; @@ -237,7 +227,7 @@ DDRes pprof_aggregate_v2(ddprof::span callchain, ++labels_num; } ddog_Sample sample = { - .locations = {.ptr = locations_buff, .len = cur_loc}, + .locations = {.ptr = locations_buff, .len = symbols.size()}, .values = {.ptr = values, .len = pprof->_nb_values}, .labels = {.ptr = labels, .len = labels_num}, }; @@ -344,12 +334,10 @@ DDRes pprof_reset(DDProfPProf *pprof) { return ddres_init(); } -void ddprof_print_sample(const UnwindOutput &uw_output, - const SymbolHdr &symbol_hdr, uint64_t value, +void ddprof_print_sample(const UnwindOutput_V2 &uw_output, uint64_t value, const PerfWatcher &watcher) { - auto &symbol_table = symbol_hdr._symbol_table; - ddprof::span locs{uw_output.locs, uw_output.nb_locs}; + ddprof::span locs{uw_output.callchain, uw_output.nb_locs}; const char *sample_name = sample_type_name_from_idx( sample_type_id_to_count_sample_type_id(watcher.sample_type_id)); @@ -358,26 +346,17 @@ void ddprof_print_sample(const UnwindOutput &uw_output, ddprof::string_format("sample[type=%s;pid=%ld;tid=%ld] ", sample_name, uw_output.pid, uw_output.tid); - for (auto loc_it = locs.rbegin(); loc_it != locs.rend(); ++loc_it) { - auto &sym = symbol_table[loc_it->_symbol_idx]; - if (loc_it != locs.rbegin()) { + for (int i = 0; i < uw_output.nb_locs; ++i) { + std::string_view cur_sym(uw_output.symbols[i]); + if (i==0){ buf += ";"; } - if (sym._symname.empty()) { - if (loc_it->ip == 0) { - std::string_view path{sym._srcpath}; - auto pos = path.rfind('/'); - buf += "("; - buf += path.substr(pos == std::string_view::npos ? 0 : pos + 1); - buf += ")"; - } else { - buf += ddprof::string_format("%p", loc_it->ip); - } + // todo what if we don't have a sym ? + if (cur_sym.empty()) { + // todo add ip } else { - std::string_view func{sym._symname}; - buf += func.substr(0, func.find('(')); + buf += cur_sym.substr(0, cur_sym.find('(')); } } - PRINT_NFO("%s %ld", buf.c_str(), value); } diff --git a/src/unwind.cc b/src/unwind.cc index 8642fae13..3db60111d 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -29,6 +29,7 @@ void unwind_init_sample(UnwindState *us, uint64_t *sample_regs, pid_t sample_pid, uint64_t sample_size_stack, char *sample_data_stack) { us->output.nb_locs = 0; + us->output.symbols[0] = nullptr; memcpy(&us->initial_regs.regs[0], sample_regs, K_NB_REGS_UNWIND * sizeof(uint64_t)); us->current_ip = us->initial_regs.regs[REGNAME(PC)]; @@ -44,6 +45,51 @@ static bool is_ld(const std::string &path) { return path.starts_with("ld-"); } + +bool utf8_check_is_valid(const std::string string) +{ + int c,i,ix,n,j; + for (i=0, ix=string.length(); i < ix; i++) + { + c = (unsigned char) string[i]; + //if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii + if (0x00 <= c && c <= 0x7f) n=0; // 0bbbbbbb + else if ((c & 0xE0) == 0xC0) n=1; // 110bbbbb + else if ( c==0xed && i<(ix-1) && ((unsigned char)string[i+1] & 0xa0)==0xa0) return false; //U+d800 to U+dfff + else if ((c & 0xF0) == 0xE0) n=2; // 1110bbbb + else if ((c & 0xF8) == 0xF0) n=3; // 11110bbb + //else if (($c & 0xFC) == 0xF8) n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8 + //else if (($c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8 + else return false; + for (j=0; joutput; + ddprof::span callchain(output.callchain, output.nb_locs); + CodeCacheArray &cache_arary = us->code_cache[us->pid]; + for (unsigned i = 0; i < callchain.size(); ++i) { + output.symbols[i] = "unknown"; + CodeCache *code_cache = findLibraryByAddress(&cache_arary, + callchain[i]); + if (code_cache) { + output.symbols[i] = code_cache->binarySearch(callchain[i]); +#ifdef DEBUG + if(!utf8_check_is_valid(std::string(output.symbols[i]))){ + printf("INVALID UTF8 = %s \n", output.symbols[i]); + exit(1); + } +#endif + } + } + return ddres_init(); +} + DDRes unwindstate__unwind(UnwindState *us) { DDRes res = ddres_init(); if (us->pid != 0) { // we can not unwind pid 0 @@ -67,10 +113,11 @@ DDRes unwindstate__unwind(UnwindState *us) { DD_MAX_STACK_DEPTH, 0); } // todo error management (error frame) - // Add a frame that identifies executable to which these belong // todo base frame + // todo symbolization is only useful at export + DDRES_CHECK_FWD(symbolize(us)); return res; } From a589266d55e28b17a91e811f00956733ce6bb522 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 9 Jan 2023 15:14:41 +0100 Subject: [PATCH 24/29] Adjust max unwinding depth --- src/async-profiler/dwarf.cpp | 4 +-- src/ddprof_worker.cc | 4 +-- src/pprof/ddprof_pprof.cc | 6 ++--- src/unwind.cc | 49 ++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index e45d41f30..358dde677 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -193,8 +193,8 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { // 16 bits alignement check if ((reinterpret_cast(_ptr) & 0xf) != 0) { // The address is not 16-bit aligned here - printf("Addr is not aligned 0x%lx (code_align=%i)\n", - _ptr, code_align); + printf("Addr is not aligned 0x%lx (code_align=%i)\n", _ptr, + code_align); exit(1); } #endif diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index fad640549..d69c85307 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -247,8 +247,8 @@ DDRes ddprof_pr_sample(DDProfContext *ctx, perf_event_sample *sample, DDRES_CHECK_FWD(pprof_aggregate_v2( ddprof::span(us->output.callchain, us->output.nb_locs), - ddprof::span(us->output.symbols, us->output.nb_locs), - sample_val, 1, watcher, pprof)); + ddprof::span(us->output.symbols, us->output.nb_locs), sample_val, 1, + watcher, pprof)); } DDRES_CHECK_FWD(ddprof_stats_add(STATS_AGGREGATION_AVG_TIME, diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 53b1b3e4d..dc9a56128 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -202,7 +202,7 @@ DDRes pprof_aggregate_v2(ddprof::span callchain, ddog_Line line_buff[DD_MAX_STACK_DEPTH]; // todo skip frames - for(int i = 0; i < symbols.size(); ++i ) { + for (int i = 0; i < symbols.size(); ++i) { assert(i < DD_MAX_STACK_DEPTH); // possibly several lines to handle inlined function (not handled for now) write_line_v2(symbols[i], &line_buff[i]); @@ -348,7 +348,7 @@ void ddprof_print_sample(const UnwindOutput_V2 &uw_output, uint64_t value, for (int i = 0; i < uw_output.nb_locs; ++i) { std::string_view cur_sym(uw_output.symbols[i]); - if (i==0){ + if (i != 0) { buf += ";"; } // todo what if we don't have a sym ? @@ -358,5 +358,5 @@ void ddprof_print_sample(const UnwindOutput_V2 &uw_output, uint64_t value, buf += cur_sym.substr(0, cur_sym.find('(')); } } - PRINT_NFO("%s %ld", buf.c_str(), value); + PRINT_NFO("(depth=%u) %s %ld", uw_output.nb_locs, buf.c_str(), value); } diff --git a/src/unwind.cc b/src/unwind.cc index 3db60111d..fac301507 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -45,24 +45,30 @@ static bool is_ld(const std::string &path) { return path.starts_with("ld-"); } - -bool utf8_check_is_valid(const std::string string) -{ - int c,i,ix,n,j; - for (i=0, ix=string.length(); i < ix; i++) - { - c = (unsigned char) string[i]; - //if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii - if (0x00 <= c && c <= 0x7f) n=0; // 0bbbbbbb - else if ((c & 0xE0) == 0xC0) n=1; // 110bbbbb - else if ( c==0xed && i<(ix-1) && ((unsigned char)string[i+1] & 0xa0)==0xa0) return false; //U+d800 to U+dfff - else if ((c & 0xF0) == 0xE0) n=2; // 1110bbbb - else if ((c & 0xF8) == 0xF0) n=3; // 11110bbb - //else if (($c & 0xFC) == 0xF8) n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8 - //else if (($c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8 - else return false; - for (j=0; jcode_cache[us->pid]; for (unsigned i = 0; i < callchain.size(); ++i) { output.symbols[i] = "unknown"; - CodeCache *code_cache = findLibraryByAddress(&cache_arary, - callchain[i]); + CodeCache *code_cache = findLibraryByAddress(&cache_arary, callchain[i]); if (code_cache) { output.symbols[i] = code_cache->binarySearch(callchain[i]); #ifdef DEBUG - if(!utf8_check_is_valid(std::string(output.symbols[i]))){ + if (!utf8_check_is_valid(std::string(output.symbols[i]))) { printf("INVALID UTF8 = %s \n", output.symbols[i]); exit(1); } @@ -110,7 +115,7 @@ DDRes unwindstate__unwind(UnwindState *us) { // todo remove char* in favour of uint64 us->output.nb_locs = stackWalk(&code_cache_array, sc, buffer, (us->output.callchain), - DD_MAX_STACK_DEPTH, 0); + DD_MAX_STACK_DEPTH - 2, 0); } // todo error management (error frame) // Add a frame that identifies executable to which these belong From 5554831c5797f91845d5eb74c409a4c88ced02f2 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 10 Jan 2023 14:12:22 +0100 Subject: [PATCH 25/29] Add in binary information --- include/pprof/ddprof_pprof.hpp | 5 +++- include/unwind_output.hpp | 2 ++ src/async-profiler/symbols_linux.cpp | 1 + src/ddprof_worker.cc | 4 ++- src/pprof/ddprof_pprof.cc | 40 +++++++++++++++++++++++----- src/unwind.cc | 6 +++++ 6 files changed, 49 insertions(+), 9 deletions(-) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index 0eb593e14..4beb74c91 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -11,6 +11,7 @@ #include "perf_watcher.hpp" #include "tags.hpp" #include "unwind_output.hpp" +#include "codeCache.h" #include "async-profiler/codeCache.h" #include "span.hpp" @@ -29,7 +30,9 @@ struct DDProfPProf { DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext *ctx); DDRes pprof_aggregate_v2(ddprof::span callchain, - ddprof::span symbols, uint64_t value, + ddprof::span symbols, + ddprof::span code_cache, + uint64_t value, uint64_t count, const PerfWatcher *watcher, DDProfPProf *pprof); diff --git a/include/unwind_output.hpp b/include/unwind_output.hpp index 815ffb059..4f64baba5 100644 --- a/include/unwind_output.hpp +++ b/include/unwind_output.hpp @@ -9,6 +9,7 @@ #include +#include "codeCache.h" #include "ddprof_defs.hpp" #include "string_view.hpp" #include @@ -30,6 +31,7 @@ typedef struct UnwindOutput { struct UnwindOutput_V2 { const void *callchain[DD_MAX_STACK_DEPTH]; const char *symbols[DD_MAX_STACK_DEPTH]; + const CodeCache *code_cache[DD_MAX_STACK_DEPTH]; uint64_t nb_locs; int pid = {}; int tid = {}; diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 24847913f..1bc7e2d91 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -694,6 +694,7 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, // (vaddr_eh_frame - vaddr_eh_frame_hdr) - (offset_sec_1 - // offset_sec_2) + // If eh frame is not in the same segment Offset_t adjust_eh_frame = (eh_frame_info._eh_frame._vaddr_sec - eh_frame_info._eh_frame_hdr._vaddr_sec) - diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index d69c85307..46748fa4d 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -247,7 +247,9 @@ DDRes ddprof_pr_sample(DDProfContext *ctx, perf_event_sample *sample, DDRES_CHECK_FWD(pprof_aggregate_v2( ddprof::span(us->output.callchain, us->output.nb_locs), - ddprof::span(us->output.symbols, us->output.nb_locs), sample_val, 1, + ddprof::span(us->output.symbols, us->output.nb_locs), + ddprof::span(us->output.code_cache, us->output.nb_locs), + sample_val, 1, watcher, pprof)); } diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index dc9a56128..b5a160664 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -144,6 +144,21 @@ static void write_function(const ddprof::Symbol &symbol, ffi_func->start_line = 0; } +static void write_mapping_v2(const CodeCache *code_cache, + ddog_Mapping *ffi_mapping) { + // ffi_mapping->memory_start = (code_cache->minAddress()); + ffi_mapping->memory_start = 0; + ffi_mapping->memory_limit = 0; + ffi_mapping->file_offset = 0; + if (code_cache) { + ffi_mapping->filename = to_CharSlice(code_cache->name()); + } + else { + ffi_mapping->filename = to_CharSlice("unknown"); + } + ffi_mapping->build_id = to_CharSlice(""); +} + static void write_mapping(const ddprof::MapInfo &mapinfo, ddog_Mapping *ffi_mapping) { ffi_mapping->memory_start = mapinfo._low_addr; @@ -168,25 +183,33 @@ static void write_line(const ddprof::Symbol &symbol, ddog_Line *ffi_line) { ffi_line->line = symbol._lineno; } -static void write_location_v2(const void *ip, const ddog_Slice_line *lines, +static void write_location_v2(const void *ip, + const ddog_Slice_line *lines, + const CodeCache *code_cache, ddog_Location *ffi_location) { + write_mapping_v2(code_cache, &ffi_location->mapping); ffi_location->address = reinterpret_cast(ip); ffi_location->lines = *lines; // Folded not handled for now ffi_location->is_folded = false; } -static void write_function_v2(const char *func, ddog_Function *ffi_func) { +static void write_function_v2(const char *func, const char *filename, ddog_Function *ffi_func) { + // todo demangling ffi_func->name = to_CharSlice(string_view_create_strlen(func)); + ffi_func->system_name = to_CharSlice(string_view_create_strlen(func)); + ffi_func->filename = to_CharSlice(filename); } -static void write_line_v2(const char *func, ddog_Line *ffi_line) { - write_function_v2(func, &ffi_line->function); +static void write_line_v2(const char *func, const char *filename, ddog_Line *ffi_line) { + write_function_v2(func, filename, &ffi_line->function); ffi_line->line = 0; } DDRes pprof_aggregate_v2(ddprof::span callchain, - ddprof::span symbols, uint64_t value, + ddprof::span symbols, + ddprof::span code_cache, + uint64_t value, uint64_t count, const PerfWatcher *watcher, DDProfPProf *pprof) { ddog_Profile *profile = pprof->_profile; @@ -205,9 +228,12 @@ DDRes pprof_aggregate_v2(ddprof::span callchain, for (int i = 0; i < symbols.size(); ++i) { assert(i < DD_MAX_STACK_DEPTH); // possibly several lines to handle inlined function (not handled for now) - write_line_v2(symbols[i], &line_buff[i]); + // todo: get file name from dwarf + write_line_v2(symbols[i], + code_cache[i]?code_cache[i]->name():"unknown", + &line_buff[i]); ddog_Slice_line lines = {.ptr = &line_buff[i], .len = 1}; - write_location_v2(callchain[i], &lines, &locations_buff[i]); + write_location_v2(callchain[i], &lines, code_cache[i], &locations_buff[i]); } ddog_Label labels[PPROF_MAX_LABELS] = {}; diff --git a/src/unwind.cc b/src/unwind.cc index fac301507..fab8f1fc6 100644 --- a/src/unwind.cc +++ b/src/unwind.cc @@ -84,13 +84,19 @@ static DDRes symbolize(UnwindState *us) { CodeCache *code_cache = findLibraryByAddress(&cache_arary, callchain[i]); if (code_cache) { output.symbols[i] = code_cache->binarySearch(callchain[i]); + // Warning: assumption is that it stays valid until aggregation + output.code_cache[i] = code_cache; #ifdef DEBUG + // some issues with non utf 8 symbols if (!utf8_check_is_valid(std::string(output.symbols[i]))) { printf("INVALID UTF8 = %s \n", output.symbols[i]); exit(1); } #endif } + else { + output.code_cache[i] = nullptr; + } } return ddres_init(); } From 9d38887df5658c2522701e6788be870c897d670e Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 10 Jan 2023 16:19:12 +0100 Subject: [PATCH 26/29] Implement a basic red zone optim fix --- src/async-profiler/stackWalker.cpp | 31 ++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 75e777770..55de4b9c3 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -41,7 +41,20 @@ bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { } if (addr < buffer.sp_start && addr > buffer.sp_start - 4096) { - // todo red zone thing + if (*res > buffer.sp_start && *res < buffer.sp_end) { + // todo this is true only on leaf function afaik ? + // printf("red zone optim (current value %lx) -- OK\n", *res); + // high addr + // + // sp(2) + // + // sp(3) (-32 fp_off) + // + // low addr + // red zone + return true; + } + // printf("red zone optim (current value %lx) -- KO\n", *res); return false; } else if (addr < buffer.sp_start || addr + sizeof(uint64_t) > buffer.sp_end) { @@ -76,6 +89,8 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { f = &FrameDesc::default_frame; } +// const char *sym = cc?cc->binarySearch(sc.pc):"unknown"; +// printf("-- Unwind from %s, %s \n", sym?sym:"unknown", cc?cc->name():"unknown"); return stepStackContext(sc, buffer, f); } @@ -93,9 +108,10 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, } else if (cfa_reg == DW_REG_PLT) { sc.sp += ((uintptr_t)sc.pc & 15) >= 11 ? cfa_off * 2 : cfa_off; } else { + // it is interesting to categorize these cases + // printf("unhandled reg \n"); return false; } - // Check if the next frame is below on the current stack if (sc.sp < prev_sp || sc.sp >= prev_sp + MAX_FRAME_SIZE || sc.sp >= bottom) { return false; @@ -105,14 +121,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { return false; } - // high addr - // - // sp(2) - // - // sp(3) (-32 fp_off) - // - // red zone - // low addr + if (f->fp_off & DW_PC_OFFSET) { sc.pc = (const char *)sc.pc + (f->fp_off >> 1); } else { @@ -156,7 +165,6 @@ int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, const ap::StackBuffer &buffer, void const **callchain, int max_depth, int skip) { int depth = -skip; - // Walk until the bottom of the stack or until the first Java frame while (depth < max_depth) { int d = depth++; @@ -167,6 +175,5 @@ int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, break; } } - return depth; } From 82f40ad7c9381622dc4e49d553e9f0de64ea8dbb Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 11 Jan 2023 11:45:28 +0100 Subject: [PATCH 27/29] Lost sample throttling Add a mechanism to ensure we don't flood the buffer with lost events --- include/lib/allocation_tracker.hpp | 2 ++ src/lib/allocation_tracker.cc | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/lib/allocation_tracker.hpp b/include/lib/allocation_tracker.hpp index 723c4d589..37e032d6d 100644 --- a/include/lib/allocation_tracker.hpp +++ b/include/lib/allocation_tracker.hpp @@ -63,6 +63,8 @@ class AllocationTracker { std::mutex mutex; std::atomic track_allocations = false; std::atomic track_deallocations = false; + // The following flag avoids a flood of lost events + std::atomic real_sample_pushed = true; std::atomic lost_count; // count number of lost events std::atomic failure_count; std::atomic pid; // cache of pid diff --git a/src/lib/allocation_tracker.cc b/src/lib/allocation_tracker.cc index 63ef0e026..4dc041ac8 100644 --- a/src/lib/allocation_tracker.cc +++ b/src/lib/allocation_tracker.cc @@ -221,7 +221,8 @@ DDRes AllocationTracker::push_lost_sample(MPSCRingBufferWriter &writer, lost_event->id = 0; lost_event->lost = lost_count; notify_needed = writer.commit(buffer); - + // Throttle future lost events, out of order write should not matter + _state.real_sample_pushed.store(false, std::memory_order_relaxed); return {}; } @@ -231,7 +232,8 @@ DDRes AllocationTracker::push_sample(uint64_t allocated_size, bool notify_consumer{false}; bool timeout = false; - if (unlikely(_state.lost_count.load(std::memory_order_relaxed))) { + if (unlikely(_state.lost_count.load(std::memory_order_relaxed)) + && _state.real_sample_pushed.load(std::memory_order_acq_rel)) { DDRES_CHECK_FWD(push_lost_sample(writer, notify_consumer)); } @@ -275,7 +277,8 @@ DDRes AllocationTracker::push_sample(uint64_t allocated_size, event->dyn_size = save_context(tl_state.stack_end, event->regs, ddprof::Buffer{event->data, event->size}); - + // Authorize the next lost events to be pushed + _state.real_sample_pushed.store(true, std::memory_order_acq_rel); if (writer.commit(buffer) || notify_consumer) { uint64_t count = 1; if (write(_pevent.fd, &count, sizeof(count)) != sizeof(count)) { From 305262709a69a7325e13af936c30fd0763b52c56 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 18 Jan 2023 11:30:56 +0100 Subject: [PATCH 28/29] Add pthread to the async profiler compilation --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22abcb251..963fd8b18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,7 @@ aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} ${CMAKE_SOURCE_DIR}/include) -target_link_libraries(async_prof_lib PRIVATE dw elf) +target_link_libraries(async_prof_lib PRIVATE dw elf Threads::Threads) set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(DDProf::AsyncProf ALIAS async_prof_lib) # ------------------- From 4b394f79b88d04a83514d84ceeb12ed745fad65c Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 18 Jan 2023 13:35:01 +0100 Subject: [PATCH 29/29] Ensure in whole host we are able to run using the /proc//root link to files --- src/async-profiler/symbols_linux.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 1bc7e2d91..be3ea3121 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -645,8 +645,15 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, unsigned long inode = map.inode(); printf("+++++ Considering %s ++++ \n", map.file()); if (inode != 0) { + char proc_root_filename[1024] = {}; + // use /proc//root to access the file (whole host) + int n = snprintf(proc_root_filename, 1024, "%s/proc/%d/root%s", "", pid, map.file()); + if (n < 0) { + printf("error encoding file %s \n", map.file()); + continue; + } + int fd = open(proc_root_filename, O_RDONLY); // remote unwinding - int fd = open(map.file(), O_RDONLY); if (-1 == fd) { printf("error opening file %s \n", map.file()); continue;