Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "os.h"
#include "profiler.h"
#include "thread.h"
#include "threadLocal.h"
#include "tsc.h"
#include <jni.h>
#include <string.h>
Expand Down Expand Up @@ -276,6 +277,29 @@ Error LivenessTracker::initialize(Arguments &args) {
return _stored_error = Error::OK;
}

static void* create_mt19937() {
// std::mt19937 itself is noexcept, but std::random_device and `new` may throw.
// If that happens we let the failure terminate the process (same outcome as
// failing thread_local initialization previously).
return (void*)(new std::mt19937(std::random_device{}()));
}

static void* create_uniform_real_distribution() {
// std::uniform_real_distribution<> construction is noexcept, but `new` may throw.
// If allocation fails the process is likely to abort anyway.
return (void*)(new std::uniform_real_distribution<>(0, 1.0));
}

static void free_mt19937(void* p) {
std::mt19937* mt = (std::mt19937*)p;
delete mt;
}

static void free_uniform_real_distribution(void* p) {
std::uniform_real_distribution<>* urd = (std::uniform_real_distribution<>*)p;
delete urd;
}

void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
jobject object, u64 call_trace_id) {
if (!_enabled) {
Expand All @@ -287,13 +311,17 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
return;
}

static thread_local std::mt19937 gen(std::random_device{}());
static thread_local std::uniform_real_distribution<> dis(0, 1.0);
static thread_local double skipped = 0;
static ThreadLocal<std::mt19937*, create_mt19937, free_mt19937> gen;
static ThreadLocal<std::uniform_real_distribution<>*, create_uniform_real_distribution, free_uniform_real_distribution> dis;
static ThreadLocal<double> skipped;

if (_subsample_ratio < 1.0 && dis(gen) > _subsample_ratio) {
skipped += static_cast<double>(event._weight) * event._size;
return;
if (_subsample_ratio < 1.0) {
std::mt19937* genp = gen.get();
std::uniform_real_distribution<>* disp = dis.get();
if (disp->operator()(*genp) > _subsample_ratio) {
skipped.set(skipped.get() + static_cast<double>(event._weight) * event._size);
return;
}
}

jweak ref = env->NewWeakGlobalRef(object);
Expand Down Expand Up @@ -322,7 +350,7 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
_table[idx].time = TSC::ticks();
_table[idx].ref = ref;
_table[idx].alloc = event;
_table[idx].skipped = skipped;
_table[idx].skipped = skipped.get();
_table[idx].age = 0;
_table[idx].call_trace_id = call_trace_id;
_table[idx].ctx = ContextApi::snapshot();
Expand Down Expand Up @@ -376,7 +404,7 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
env->DeleteWeakGlobalRef(ref);
}
}
skipped = 0; // reset the subsampling skipped bytes
skipped.set(0); // reset the subsampling skipped bytes

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · correctness] (Pre-existing, not introduced by this PR) skipped.set(0) resets the accumulated skipped-bytes counter even when the current event was dropped (table full), losing weight that should carry forward to the next successfully recorded sample.

Suggestion: move skipped.set(0) inside the idx < _table_cap branch so it only resets when the sample was actually recorded.

}

void JNICALL LivenessTracker::GarbageCollectionFinish(jvmtiEnv *jvmti_env) {
Expand Down
142 changes: 142 additions & 0 deletions ddprof-lib/src/main/cpp/threadLocal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#ifndef _THREADLOCAL_H
#define _THREADLOCAL_H

#include <cassert>
#include <cstring>
#include <pthread.h>
#include "arch.h"

/**
* This file implements an alternative to C/C++ thread local.
* Due to some restrictions of the language implementations, especially, on musl/aarch64,
* they cannot be safely used in profiler.
*
* How to use?
* A ThreadLocal should be declared as a static variable, e.g.
*
* static void* create_my_object() {
* return new MyObject();
* }
*
* static void delete_my_object(void* p) {
* MyObject* obj = (MyObject*)p;
* delete obj;
* }
*
* static ThreadLocal<MyObject*, create_my_object, delete_my_object> var;
* MyObject* value = var.get();
* ...
* var.clear();
* ...
* MyObject* new_value = new MyObject();
* var.set(new_value);
* ...
* var.clear();
*
*/

// The function to create value if it does not exist
typedef void* (*CREATE_FUNC)(void);
// Cleanup the value when deleting the key
typedef void (*CLEAN_FUNC)(void*);
template <typename T, CREATE_FUNC C = nullptr, CLEAN_FUNC F = nullptr>
class ThreadLocal {
protected:
pthread_key_t _key;

public:
ThreadLocal(const ThreadLocal&) = delete;
ThreadLocal& operator=(const ThreadLocal&) = delete;

ThreadLocal() {
int err = pthread_key_create(&_key, F);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH · robustness] pthread_key_create failure is guarded only by assert(), which is compiled out under NDEBUG. In a release build a failed key creation leaves _key uninitialized; all subsequent get()/set() calls then operate on an arbitrary or zero key, silently aliasing another thread's TSD slot (e.g. the ProfiledThread key in thread.cpp).

Suggestion: replace the assert with an unconditional abort:

if (err != 0) { fprintf(stderr, "ThreadLocal: pthread_key_create failed: %d\n", err); abort(); }

This matches the existing error handling in thread.cpp and makes failures visible in production.

// What to do if we can not create a key?
assert(err == 0);
Comment on lines +57 to +59

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Handle pthread_key_create failures outside assert

When the process exhausts pthread TLS keys, pthread_key_create returns an error; in release builds this project compiles with -DNDEBUG, so this assertion is removed and the static _key remains zero/unusable. Subsequent liveness sampling get()/set() calls can then operate on an unrelated key or repeatedly allocate and leak per-thread RNG state instead of failing cleanly, so the error path needs non-assert handling such as an unconditional abort or disabling this TLS instance.

Useful? React with 👍 / 👎.

}
Comment thread
zhengyu123 marked this conversation as resolved.

~ThreadLocal() {
pthread_key_delete(_key);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH · robustness] pthread_key_delete(_key) is called unconditionally in the destructor. If pthread_key_create failed in the constructor and the assert was compiled out (NDEBUG / release build), _key stays at its zero-initialized value (0). Calling pthread_key_delete(0) deletes a key we don't own — on musl, key 0 is legitimately held by another component (see thread.cpp:91–93) — corrupting the global pthread key table.

Suggestion: track construction success with a bool _key_valid member:

_key_valid = (pthread_key_create(&_key, F) == 0);
// ...
if (_key_valid) pthread_key_delete(_key);

Apply the same pattern to the double specialization.

}

void set(T value) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · correctness] The primary template casts T directly to const void* with no compile-time guard that sizeof(T) == sizeof(void*). Instantiation with a sub-pointer-width type (e.g. ThreadLocal<int>) compiles silently but produces implementation-defined behaviour on the set/get round-trip for values that set the sign bit.

Suggestion: add a static_assert in the primary template constructor mirroring the guards already in the double specialization:

static_assert(sizeof(T) == sizeof(void*),
    "ThreadLocal<T> requires sizeof(T)==sizeof(void*); use a pointer type or add a specialization");

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · correctness] set(nullptr) on a lazy-create ThreadLocal (i.e. C != nullptr) is semantically identical to 'never set': the next get() call will invoke C() and return a fresh object instead of null. This makes it impossible to store a null value or 'unset' without triggering re-creation, which is a silent foot-gun for the planned future conversions mentioned in the PR description.

Suggestion: document explicitly that set(nullptr) is reserved for clear() and must not be used when C != nullptr, or provide a separate reset() method that bypasses the lazy-create path.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · completeness] set() does not invoke F (CLEAN_FUNC) on the existing TSD value before overwriting it. If get() auto-created an object via C() and a caller subsequently calls set() with a different pointer, the original allocation is leaked — POSIX does not call destructors on pthread_setspecific overwrite, only on thread exit.

Suggestion: in set(), retrieve the current value first and call F on it if non-null:

void set(T value) {
    void* prev = pthread_getspecific(_key);
    int err = pthread_setspecific(_key, (const void*)value);
    assert(err == 0);
    if (prev != nullptr && F != nullptr) F(prev);
}

Alternatively, document that set() must only be called when the TSD slot is empty (i.e. after clear()) and enforce with an assert.

int err = pthread_setspecific(_key, (const void*)value);
assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · necessity] clear() has zero callers in production code and zero test coverage. The class docstring presents it as a core usage pattern, yet no shipped path exercises it — including the subtle branch where a failed pthread_setspecific in clear() leaves a still-reachable pointer un-freed.

Suggestion: either remove clear() until a caller needs it, or add unit tests that cover: (1) clear() resets the value, (2) the cleanup function is invoked, (3) F == nullptr branch actually zeroes the slot. Do not ship an untested method that the public docs present as core API.

T get() {
void* p = pthread_getspecific(_key);
if (p == nullptr && C != nullptr) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · robustness] get() cannot distinguish 'never set' from 'explicitly set to nullptr'. When a create function is registered, storing null (or C() returning null) causes C() to be re-invoked on every get(), repeatedly allocating. This differs from real thread_local semantics and is a foot-gun for the planned future conversions mentioned in the PR description.

Suggestion: document that C must never return nullptr and that set(nullptr) is reserved for clear(); optionally assert(p != nullptr) after C() to fail fast.

p = C();

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH · robustness] After p = C() allocates an object, set() asserts on pthread_setspecific; under NDEBUG the assert is stripped. If pthread_setspecific then fails silently, the TSD slot stays null. Every subsequent get() re-invokes C() and leaks the prior allocation — unbounded per-call leak for the lazy-create path.

Suggestion: check pthread_setspecific directly after C() and free on failure:

p = C();
if (pthread_setspecific(_key, (const void*)p) != 0) {
    if (F) F(p);
    return T{};
}

Alternatively, make set() call abort() instead of assert so failure is not silently swallowed in release builds.

set((T)p);
}
return (T)p;
}

// Clear the value
void clear() {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · correctness] clear() silently skips pthread_setspecific when F == nullptr, leaving the TSD slot non-zero. The documented use-case — clear before re-set — does not work for pointer types without a destructor function.

Suggestion: always zero the TSD slot and only call F() when non-null:

void clear() {
    void* p = pthread_getspecific(_key);
    if (p != nullptr) {
        pthread_setspecific(_key, nullptr);
        if (F != nullptr) F(p);
    }
}

void* p = nullptr;
if (F != nullptr && (p = pthread_getspecific(_key)) != nullptr) {
int err = pthread_setspecific(_key, nullptr);
// Safety: if reset the value failed, get() can see staled value if

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · consistency] The comment here says "get() can see staled value if it is freed", but in this code path the value is explicitly not freed (F is not called when pthread_setspecific fails). The value is stale but not freed; the comment inverts the actual safety invariant.

Suggestion:

// Safety: only free after the slot is cleared; if the slot reset failed,
// skip the free to avoid get() returning a dangling pointer.

// it is freed.
if (err == 0) {
F(p);
}
}
}
Comment thread
Copilot marked this conversation as resolved.
};

template <>
class ThreadLocal<double> {
protected:
pthread_key_t _key;
public:
ThreadLocal(const ThreadLocal&) = delete;
ThreadLocal& operator=(const ThreadLocal&) = delete;

ThreadLocal() {
// Only support 64-bit platforms, double and void* are the same size
static_assert(sizeof(void*) == 8);
static_assert(sizeof(double) == 8);
int err = pthread_key_create(&_key, nullptr);
// What to do if we can not create a key?
assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.

~ThreadLocal() {
pthread_key_delete(_key);
}

// double <--> u64 cast, preserve bit format
// Can use std::bit_cast after upgrade C++ version to 20
void set(double value) {
u64 val;
memcpy(&val, &value, sizeof(value));
int err = pthread_setspecific(_key, (const void*)val);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · consistency] Integer-to-pointer cast at line 120 uses a C-style (const void*)val while the reverse pointer-to-integer cast at line 130 uses a C-style (u64)p. Both should use reinterpret_cast for clarity and to suppress potential -Wint-to-pointer-cast warnings:

// line 120:
int err = pthread_setspecific(_key, reinterpret_cast<const void*>(val));
// line 130:
u64 val = reinterpret_cast<u64>(p);

assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.

double get() {
void* p = pthread_getspecific(_key);
if (p == nullptr) {
return 0.0;
}

u64 val = (u64)p;
double value;
memcpy(&value, &val, sizeof(val));
return value;
}

void clear() {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · robustness] ThreadLocal<double>::clear() uses assert(err == 0) for the pthread_setspecific return value; under NDEBUG the assert is stripped. If pthread_setspecific fails silently, the TSD slot retains the stale encoded double bits; the next get() sees a non-null value and decodes garbage instead of returning 0.0.

Suggestion: mirror the defensive pattern in the generic clear() — read the existing value first, attempt the set, and only proceed if it succeeded — or use abort() instead of assert so the failure is not silently swallowed in release builds.

int err = pthread_setspecific(_key, nullptr);
assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.
};

#endif // _THREADLOCAL_H
Loading
Loading