From fe5b05b38c2a45b834313b61990d80610a2cdf87 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 4 May 2026 16:19:50 +0200 Subject: [PATCH 1/2] fix(wall): break SIGPROF self-chain in signal handler install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If SIGPROF is already pointing at HandleProfilerSignal when Install() runs the else-branch (installed_ == false), the captured "previous" handler is our own function. Storing that into old_handler_func_ turns the next signal delivery into infinite recursion via the chained call in HandleProfilerSignal — observed in the wild as ~430 stack frames of HandleProfilerSignal calling itself before the thread blows its stack. Refuse to chain to ourselves: if the captured previous disposition is &HandleProfilerSignal, store nullptr instead, so the handler fast-paths out via the existing `if (!old_handler) return;` check. --- bindings/profilers/wall.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index c4630ad3..4d0e681b 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -342,7 +342,13 @@ class SignalHandler { sigaction(SIGPROF, &sa, nullptr); } else { installed_ = (sigaction(SIGPROF, &sa, &old_handler_) == 0); - old_handler_func_.store(old_handler_.sa_sigaction, + auto prev = old_handler_.sa_sigaction; + // Refuse to chain to ourselves. If SIGPROF was already pointing at + // HandleProfilerSignal when we ran the install (e.g. because another + // SIGPROF user restored its own saved disposition — which it had + // captured while we were the active handler — after we Restore()'d), + // calling prev would loop forever on the next signal. + old_handler_func_.store(prev == &HandleProfilerSignal ? nullptr : prev, std::memory_order_relaxed); } } From eca17bca94e09e880753b8807f159ed65a623b7b Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 4 May 2026 16:56:44 +0200 Subject: [PATCH 2/2] fix(wall): surface self-chain detection as a StartImpl error Storing nullptr in old_handler_func_ avoided the runaway recursion but left the wall profiler's SIGPROF handler unable to forward signals to v8's sampler, which silently produces empty profiles. Plumb the install result out through IncreaseUseCount and StartInternal so StartImpl can roll back the just-started v8 cpu profile and return a Result error to the JS caller. The condition can only arise on the very first install (installed_ == false branch), so StopCore's restart path cannot hit it. --- bindings/profilers/wall.cc | 63 ++++++++++++++++++++++++++------------ bindings/profilers/wall.hh | 2 +- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 4d0e681b..e6fc3346 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -313,11 +313,15 @@ namespace { #if DD_WALL_USE_SIGPROF class SignalHandler { public: - static void IncreaseUseCount() { + // Returns true on success; false if the previous SIGPROF disposition was + // already &HandleProfilerSignal, in which case we cannot chain to v8's + // sampler and the caller should surface the failure rather than silently + // produce empty profiles. + static bool IncreaseUseCount() { std::lock_guard lock(mutex_); ++use_count_; // Always reinstall the signal handler - Install(); + return Install(); } static void DecreaseUseCount() { @@ -333,24 +337,29 @@ class SignalHandler { } private: - static void Install() { + static bool Install() { struct sigaction sa; sa.sa_sigaction = &HandleProfilerSignal; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK; if (installed_) { sigaction(SIGPROF, &sa, nullptr); - } else { - installed_ = (sigaction(SIGPROF, &sa, &old_handler_) == 0); - auto prev = old_handler_.sa_sigaction; - // Refuse to chain to ourselves. If SIGPROF was already pointing at - // HandleProfilerSignal when we ran the install (e.g. because another - // SIGPROF user restored its own saved disposition — which it had - // captured while we were the active handler — after we Restore()'d), - // calling prev would loop forever on the next signal. - old_handler_func_.store(prev == &HandleProfilerSignal ? nullptr : prev, - std::memory_order_relaxed); + return true; + } + installed_ = (sigaction(SIGPROF, &sa, &old_handler_) == 0); + auto prev = old_handler_.sa_sigaction; + if (prev == &HandleProfilerSignal) { + // SIGPROF was already pointing at HandleProfilerSignal when we ran + // the install (e.g. another SIGPROF user restored its own saved + // disposition — which it had captured while we were the active + // handler — after we Restore()'d). Chaining to prev would loop + // forever on the next signal, so refuse. Sampling won't work in + // this state because we can't reach v8's SIGPROF sampler. + old_handler_func_.store(nullptr, std::memory_order_relaxed); + return false; } + old_handler_func_.store(prev, std::memory_order_relaxed); + return true; } static void Restore() { @@ -419,7 +428,7 @@ void SignalHandler::HandleProfilerSignal(int sig, #else class SignalHandler { public: - static void IncreaseUseCount() {} + static bool IncreaseUseCount() { return true; } static void DecreaseUseCount() {} }; #endif @@ -864,7 +873,9 @@ Result WallProfiler::StartImpl() { isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); } - profileId_ = StartInternal(); + if (auto res = StartInternal(); !res.success) { + return res; + } auto collectionMode = withContexts_ ? CollectionMode::kCollectContexts @@ -876,7 +887,7 @@ Result WallProfiler::StartImpl() { return {}; } -v8::ProfilerId WallProfiler::StartInternal() { +Result WallProfiler::StartInternal() { // Reuse the same names for the profiles because strings used for profile // names are not released until v8::CpuProfiler object is destroyed. // https://github.com/nodejs/node/blob/b53c51995380b1f8d642297d848cab6010d2909c/deps/v8/src/profiler/profile-generator.h#L516 @@ -891,10 +902,21 @@ v8::ProfilerId WallProfiler::StartInternal() { // (ie. starting or deopt samples) have been processed, and therefore if // SamplingEventsProcessor::ProcessOneSample is stuck on vm_ticks_buffer_. withContexts_ || detectV8Bug_); + profileId_ = result.id; // reinstall sighandler on each new upload period if (withContexts_ || workaroundV8Bug_) { - SignalHandler::IncreaseUseCount(); + if (!SignalHandler::IncreaseUseCount()) { + // SIGPROF was already pointing at HandleProfilerSignal at the moment we + // installed (with installed_ == false), so chaining would loop. Without + // a working chain, v8's sampler can't fire and we'd silently produce + // empty profiles — surface a startup error instead. + cpuProfiler_->Stop(profileId_); + return Result{ + "Cannot start wall profiler: SIGPROF disposition was already " + "&HandleProfilerSignal at install time. This typically means " + "more than one copy of @datadog/pprof is loaded in the process."}; + } fields_[kSampleCount] = 0; } @@ -923,7 +945,7 @@ v8::ProfilerId WallProfiler::StartInternal() { cpuProfiler_->CollectSample(v8::Isolate::GetCurrent()); } - return result.id; + return {}; } // stopAndCollect(restart, callback): callback result @@ -1008,7 +1030,10 @@ Result WallProfiler::StopCore(bool restart, ProfileBuilder&& buildProfile) { auto startProcessCpuTime = startProcessCpuTime_; if (restart) { - profileId_ = StartInternal(); + // In restart mode the signal handler is already installed (use_count_ + // stayed positive across the cycle), so the install can never go through + // the else-branch in Install() and IncreaseUseCount() can't fail. + StartInternal(); // record call count to wait for next signal at the end of function callCount = noCollectCallCount_.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 4604a974..541c2905 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -154,7 +154,7 @@ class WallProfiler : public Nan::ObjectWrap { v8::Local GetMetrics(v8::Isolate*); Result StartImpl(); - v8::ProfilerId StartInternal(); + Result StartInternal(); template Result StopCore(bool restart, ProfileBuilder&& buildProfile); Result StopAndCollectImpl(bool restart,