Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
1045a85
feat: wall-clock precheck and signal suppression
kaahos May 28, 2026
aed7b1a
Merge branch 'main' into paul.fournillon/wallclock-suppression
kaahos May 28, 2026
7a250b6
fix
kaahos May 29, 2026
6028fdd
Merge branch 'main' into paul.fournillon/wallclock-suppression
kaahos May 29, 2026
1e1bcd1
fix: fix build + tests
kaahos May 29, 2026
b1cb73f
fix: fix mem leaks in tests
kaahos May 29, 2026
a3a9462
fix: track wall precheck block state in thread filter
kaahos Jun 1, 2026
137065c
fix: arm wall precheck after recording sample
kaahos Jun 1, 2026
c7caa46
fix: include wait states in wall precheck suppression
kaahos Jun 1, 2026
55073d0
Fix ProfiledThread ownership in park_state_ut
kaahos Jun 1, 2026
619449a
Add Java block-state bridge for wall-clock precheck
kaahos Jun 1, 2026
1cd0f8b
Fix wall-clock thread filter reset
kaahos Jun 2, 2026
3ee7f42
Gate wall-clock precheck on untraced context
kaahos Jun 2, 2026
6bda356
Merge branch 'main' into paul.fournillon/wallclock-suppression
kaahos Jun 3, 2026
8bb1fed
fix: avoid exact suppression for unowned blocked states
kaahos Jun 8, 2026
f82fe70
Merge branch 'main' into paul.fournillon/wallclock-suppression
kaahos Jun 9, 2026
6ece88d
fix: address ownership correctness review
kaahos Jun 11, 2026
110abaa
fix: address thread filter review
kaahos Jun 11, 2026
e9f53b9
fix: factorize code and add support for jvmti
kaahos Jun 12, 2026
e2d60da
fix: fix wall-clock counters and misleading comment
kaahos Jun 12, 2026
5ab946c
Merge branch 'main' into paul.fournillon/wallclock-suppression
kaahos Jun 12, 2026
37df0a5
fix: apply review about test and unused stuff
kaahos Jun 12, 2026
43471ee
fix: apply review about drainSuppressedSampledRun
kaahos Jun 12, 2026
a764667
fix: clean up branch based on PR review recommendations
kaahos Jun 18, 2026
0940fbe
fix: remove TaskBlock snapshot mechanism
kaahos Jun 18, 2026
dc3fdb8
chore: isolate TaskBlock recording infrastructure
kaahos Jun 8, 2026
d032564
chore: group park and monitor TaskBlock producers
kaahos Jun 8, 2026
3163a96
chore: split out native socket interposition
kaahos Jun 8, 2026
1f3627e
fix: address JFR recording review
kaahos Jun 11, 2026
3d7ab5e
fix: address TaskBlock recorder review
kaahos Jun 11, 2026
c451125
fix: FLAG_PARKED was published before context
kaahos Jun 12, 2026
d2b453e
fix: address merge regressions
kaahos Jun 12, 2026
5b3e863
fix: fix wallprecheck anchoring and delegated sample recording
kaahos Jun 16, 2026
b07a466
fix: fix TaskBlock monitor ownership and counters
kaahos Jun 16, 2026
bddb37c
fix: fix native socket hook correctness
kaahos Jun 16, 2026
9eaecdc
fix: fix blocked-run ownership and exit races and add TaskBlockQueue …
kaahos Jun 17, 2026
05d8933
fix: refresh native socket fd type after dup2 and dup3
kaahos Jun 17, 2026
fc0a61a
fix: document native patcher and call trace concurrency invariants
kaahos Jun 17, 2026
64bec6a
fix: make TaskBlock events carry direct stack references
kaahos Jun 18, 2026
673289b
fix: test TaskBlock stack reference capture
kaahos Jun 18, 2026
c278669
test: assert TaskBlock events are self-contained in JFR tests
kaahos Jun 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ddprof-lib/src/main/cpp/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,14 @@ Error Arguments::parse(const char *args) {
_jvmtistacks = true;
}

CASE("wallprecheck")
if (value != NULL) {
_wall_precheck = strcmp(value, "false") != 0 && strcmp(value, "0") != 0;
} else {
// No value means enable
_wall_precheck = true;
}

CASE("wallsampler")
if (value != NULL) {
switch (value[0]) {
Expand Down
2 changes: 2 additions & 0 deletions ddprof-lib/src/main/cpp/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ class Arguments {
long _cpu;
long _wall;
bool _wall_collapsing;
bool _wall_precheck;
int _wall_threads_per_tick;
WallclockSampler _wallclock_sampler;
long _memory;
Expand Down Expand Up @@ -207,6 +208,7 @@ class Arguments {
_cpu(-1),
_wall(-1),
_wall_collapsing(false),
_wall_precheck(false),
_wall_threads_per_tick(DEFAULT_WALL_THREADS_PER_TICK),
_wallclock_sampler(ASGCT),
_memory(-1),
Expand Down
11 changes: 4 additions & 7 deletions ddprof-lib/src/main/cpp/callTraceHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,14 @@ CallTrace *CallTraceHashTable::findCallTrace(LongHashTable *table, u64 hash) {
while (true) {
// Use atomic load: keys[] can be written concurrently via CAS in put()
// when a table is promoted to prev but still has in-flight insertions.
u64 key = __atomic_load_n(&keys[slot], __ATOMIC_ACQUIRE);
if (key == hash) {
u64 key_value = __atomic_load_n(&keys[slot], __ATOMIC_ACQUIRE);
if (key_value == hash) {
// Use acquireTrace() to pair with the RELEASE store in setTrace().
// If still PREPARING, treat as not found: callers will create a new entry.
CallTrace *trace = table->values()[slot].acquireTrace();
if (trace == CallTraceSample::PREPARING) {
return nullptr;
}
return trace;
return trace == CallTraceSample::PREPARING ? nullptr : trace;
}
if (key == 0) {
if (key_value == 0) {
return nullptr;
}
if (!probe.hasNext()) {
Expand Down
32 changes: 30 additions & 2 deletions ddprof-lib/src/main/cpp/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,25 +310,44 @@ void CodeCache::saveImport(ImportId id, void** entry) {
void CodeCache::addImport(void **entry, const char *name) {
switch (name[0]) {
case 'a':
if (strcmp(name, "aligned_alloc") == 0) {
if (strcmp(name, "accept") == 0) {
saveImport(im_accept, entry);
} else if (strcmp(name, "accept4") == 0) {
saveImport(im_accept4, entry);
} else if (strcmp(name, "aligned_alloc") == 0) {
saveImport(im_aligned_alloc, entry);
}
break;
case 'c':
if (strcmp(name, "calloc") == 0) {
saveImport(im_calloc, entry);
} else if (strcmp(name, "close") == 0) {
saveImport(im_close, entry);
} else if (strcmp(name, "connect") == 0) {
saveImport(im_connect, entry);
}
break;
case 'd':
if (strcmp(name, "dlopen") == 0) {
saveImport(im_dlopen, entry);
} else if (strcmp(name, "dup2") == 0) {
saveImport(im_dup2, entry);
} else if (strcmp(name, "dup3") == 0) {
saveImport(im_dup3, entry);
}
break;
case 'f':
if (strcmp(name, "free") == 0) {
saveImport(im_free, entry);
}
break;
case 'e':
if (strcmp(name, "epoll_wait") == 0) {
saveImport(im_epoll_wait, entry);
} else if (strcmp(name, "epoll_pwait") == 0) {
saveImport(im_epoll_pwait, entry);
}
break;
case 'm':
if (strcmp(name, "malloc") == 0) {
saveImport(im_malloc, entry);
Expand All @@ -343,6 +362,10 @@ void CodeCache::addImport(void **entry, const char *name) {
saveImport(im_pthread_setspecific, entry);
} else if (strcmp(name, "poll") == 0) {
saveImport(im_poll, entry);
} else if (strcmp(name, "ppoll") == 0) {
saveImport(im_ppoll, entry);
} else if (strcmp(name, "pselect") == 0) {
saveImport(im_pselect, entry);
} else if (strcmp(name, "posix_memalign") == 0) {
saveImport(im_posix_memalign, entry);
}
Expand All @@ -352,6 +375,10 @@ void CodeCache::addImport(void **entry, const char *name) {
saveImport(im_realloc, entry);
} else if (strcmp(name, "recv") == 0) {
saveImport(im_recv, entry);
} else if (strcmp(name, "recvfrom") == 0) {
saveImport(im_recvfrom, entry);
} else if (strcmp(name, "recvmsg") == 0) {
saveImport(im_recvmsg, entry);
} else if (strcmp(name, "read") == 0) {
saveImport(im_read, entry);
}
Expand All @@ -361,6 +388,8 @@ void CodeCache::addImport(void **entry, const char *name) {
saveImport(im_send, entry);
} else if (strcmp(name, "sigaction") == 0) {
saveImport(im_sigaction, entry);
} else if (strcmp(name, "select") == 0) {
saveImport(im_select, entry);
}
break;
case 'w':
Expand Down Expand Up @@ -467,4 +496,3 @@ void CodeCache::setBuildId(const char* build_id, size_t build_id_len) {
}
}
}

13 changes: 13 additions & 0 deletions ddprof-lib/src/main/cpp/codeCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ const int MAX_NATIVE_LIBS = 2048;

enum ImportId {
im_dlopen,
im_dup2,
im_dup3,
im_pthread_create,
im_pthread_exit,
im_pthread_setspecific,
Expand All @@ -42,6 +44,17 @@ enum ImportId {
im_recv,
im_write,
im_read,
im_close,
im_connect,
im_accept,
im_accept4,
im_recvfrom,
im_recvmsg,
im_epoll_wait,
im_epoll_pwait,
im_ppoll,
im_select,
im_pselect,
NUM_IMPORTS
};

Expand Down
5 changes: 5 additions & 0 deletions ddprof-lib/src/main/cpp/counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
X(AGCT_NATIVE_NO_JAVA_CONTEXT, "agct_native_no_java_context") \
X(AGCT_BLOCKED_IN_VM, "agct_blocked_in_vm") \
X(SKIPPED_WALLCLOCK_UNWINDS, "skipped_wallclock_unwinds") \
X(WC_SIGNAL_SUPPRESSED_SAMPLED_RUN, "wc_signals_suppressed_sampled_run") \
X(WC_UNOWNED_BLOCKED_SUPPRESSED, "wc_unowned_blocked_suppressed") \
X(WC_UNOWNED_BLOCKED_RECORDED, "wc_unowned_blocked_recorded") \
X(WC_SIGNAL_QUEUE_FULL, "wc_signals_queue_full") \
X(UNWINDING_TIME_ASYNC, "unwinding_ticks_async") \
X(UNWINDING_TIME_JVMTI, "unwinding_ticks_jvmti") \
X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces") \
Expand Down Expand Up @@ -116,6 +120,7 @@
X(JVMTI_STACKS_FAILED_OTHER, "jvmti_stacks_failed_other") \
/* Delegated stacks dropped at slot-lock. Rec-lock drops from all recording \
* paths (delegated and direct) go into SAMPLES_DROPPED_REC_LOCK. */ \
X(TASK_BLOCK_QUEUE_DROPPED, "task_block_queue_dropped") \
X(JVMTI_STACKS_DROPPED_LOCK, "jvmti_stacks_dropped_lock") \
X(SAMPLES_DROPPED_REC_LOCK, "samples_dropped_rec_lock")
#define X_ENUM(a, b) a,
Expand Down
57 changes: 54 additions & 3 deletions ddprof-lib/src/main/cpp/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ class ExecutionEvent : public Event {
OSThreadState _thread_state;
ExecutionMode _execution_mode;
u64 _weight;
u32 _call_trace_id;
u64 _call_trace_id;
u64 _correlation_id;
u64 _sample_id;

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary. The TaskBlockEvent describes time range on a particular thread so we don't need to connect it with an arbitrary sample.
It should also capture the stacktrace (it does not, ATM) so it is fully self-contained.


ExecutionEvent()
: Event(), _thread_state(OSThreadState::RUNNABLE), _execution_mode(ExecutionMode::UNKNOWN),
_weight(1), _call_trace_id(0) {}
_weight(1), _call_trace_id(0), _correlation_id(0), _sample_id(0) {}
};

class AllocEvent : public Event {
Expand Down Expand Up @@ -122,12 +124,18 @@ class WallClockEpochEvent {
u32 _num_failed_samples;
u32 _num_exited_threads;
u32 _num_permission_denied;
u64 _num_suppressed_sampled_run;
u64 _num_task_block_emitted;
u64 _num_task_block_skipped_trace_context;
u64 _num_task_block_skipped_too_short;

Comment on lines +127 to 131

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarification - are these counters supposed to be used to drive the data reconstruction from the samples? Or they are just counters?
If it is the latter case, we should rather use the COUNTERS (they will get automatically written in the recording)

WallClockEpochEvent(u64 start_time)
: _dirty(false), _start_time(start_time), _duration_millis(0),
_num_samplable_threads(0), _num_successful_samples(0),
_num_failed_samples(0), _num_exited_threads(0),
_num_permission_denied(0) {}
_num_permission_denied(0), _num_suppressed_sampled_run(0),
_num_task_block_emitted(0), _num_task_block_skipped_trace_context(0),
_num_task_block_skipped_too_short(0) {}

bool hasChanged() { return _dirty; }

Expand Down Expand Up @@ -166,13 +174,45 @@ class WallClockEpochEvent {
}
}

void addNumSuppressedSampledRun(u64 n) {
if (n > 0) {
_dirty = true;
_num_suppressed_sampled_run += n;
}
}

void addNumTaskBlockEmitted(u64 n) {
if (n > 0) {
_dirty = true;
_num_task_block_emitted += n;
}
}

void addNumTaskBlockSkippedTraceContext(u64 n) {
if (n > 0) {
_dirty = true;
_num_task_block_skipped_trace_context += n;
}
}

void addNumTaskBlockSkippedTooShort(u64 n) {
if (n > 0) {
_dirty = true;
_num_task_block_skipped_too_short += n;
}
}

void endEpoch(u64 millis) { _duration_millis = millis; }

void clean() { _dirty = false; }

void newEpoch(u64 start_time) {
_dirty = false;
_start_time = start_time;
_num_suppressed_sampled_run = 0;
_num_task_block_emitted = 0;
_num_task_block_skipped_trace_context = 0;
_num_task_block_skipped_too_short = 0;
}
};

Expand All @@ -197,4 +237,15 @@ typedef struct QueueTimeEvent {
u32 _queueLength;
} QueueTimeEvent;

typedef struct TaskBlockEvent {
u64 _start;
u64 _end;
u64 _blocker;
u64 _unblockingSpanId;
Context _ctx;
u64 _callTraceId;
u64 _correlationId;
OSThreadState _observedBlockingState;
} TaskBlockEvent;

#endif // _EVENT_H
Loading
Loading