From 7e6d55d6ac1db74623f693e958357decb4b00c05 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 11:03:22 +0200 Subject: [PATCH 1/3] fix(jfr): prevent duplicate method-pool ids (PROF-15130) resolveMethod() derived the method-pool id from _method_map->size()+1. cleanupUnreferencedMethods() erases entries, so size()+1 later reissues an id still owned by a live method, producing duplicate jdk.types.Method constant-pool ids in a chunk. First-wins strict parsers (JMC, Datadog backend) then render a phantom method. Replace size()+1 with a free-list id allocator (allocId/freeId) that recycles an id only after the owning method is erased, so no two live methods ever share an id. Also drop the dead _base_id field and FlightRecorder::flush(), simplify switchChunk to the always-valid-fd path, and add an open()-failure guard in dump(). Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 107 ++-- ddprof-lib/src/main/cpp/flightRecorder.h | 31 +- ddprof-lib/src/test/cpp/methodMapId_ut.cpp | 156 ++++++ .../profiler/metadata/MethodIdReuseTest.java | 527 ++++++++++++++++++ 4 files changed, 754 insertions(+), 67 deletions(-) create mode 100644 ddprof-lib/src/test/cpp/methodMapId_ut.cpp create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 1170b6aec..308993ec5 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -529,7 +529,12 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { mi->_mark = true; bool first_time = mi->_key == 0; if (first_time) { - mi->_key = _method_map->size() + 1; // avoid zero key + // Allocate a method-pool id that is unique among live methods. Must not + // be derived from the map size: cleanupUnreferencedMethods() erases + // entries, so size()+1 would reissue an id still owned by a surviving + // method, producing duplicate ids in the chunk's method constant pool + // (PROF-15130). The allocator recycles ids freed on erase instead. + mi->_key = _method_map->allocId(); } if (method == nullptr) { fillNativeMethodInfo(mi, UNKNOWN, nullptr); @@ -641,7 +646,6 @@ Recording::Recording(int fd, Arguments &args) _start_ticks = TSC::ticks(); _recording_start_time = _start_time; _recording_start_ticks = _start_ticks; - _base_id = 0; _bytes_written = 0; _tid = OS::threadId(); @@ -813,45 +817,39 @@ off_t Recording::finishChunk(bool end_recording, bool do_cleanup) { return chunk_end; } +// Finish the current chunk, move it to the external file `fd` (must be a valid +// open descriptor), then restart the continuous recording file with a fresh +// chunk header. Callers guarantee fd > -1 (see FlightRecorder::dump). void Recording::switchChunk(int fd) { - _chunk_start = finishChunk(fd > -1, /*do_cleanup=*/true); + _chunk_start = finishChunk(/*end_recording=*/true, /*do_cleanup=*/true); TEST_LOG("MethodMap: %zu methods after cleanup", _method_map.size()); _start_time = _stop_time; _start_ticks = _stop_ticks; _bytes_written = 0; - if (fd > -1) { - // move the chunk to external file and reset the continuous recording file - OS::copyFile(_fd, fd, 0, _chunk_start); - OS::truncateFile(_fd); - // need to reset the file offset here - _chunk_start = 0; - _base_id = 0; - } else { - // same file, different logical chunk - _base_id += 0x1000000; - } + // move the chunk to the external file and reset the continuous recording file + OS::copyFile(_fd, fd, 0, _chunk_start); + OS::truncateFile(_fd); + _chunk_start = 0; + + // the recording file is restarted, so write out all the info events again writeHeader(_buf); writeMetadata(_buf); - if (fd > -1) { - // if the recording file is to be restarted write out all the info events - // again - writeSettings(_buf, _args); - if (!_args.hasOption(NO_SYSTEM_INFO)) { - writeOsCpuInfo(_buf); - writeJvmInfo(_buf); - } - if (!_args.hasOption(NO_SYSTEM_PROPS)) { - writeSystemProperties(_buf); - } - if (!_args.hasOption(NO_NATIVE_LIBS)) { - _recorded_lib_count = 0; - writeNativeLibraries(_buf); - } else { - _recorded_lib_count = -1; - } + writeSettings(_buf, _args); + if (!_args.hasOption(NO_SYSTEM_INFO)) { + writeOsCpuInfo(_buf); + writeJvmInfo(_buf); + } + if (!_args.hasOption(NO_SYSTEM_PROPS)) { + writeSystemProperties(_buf); + } + if (!_args.hasOption(NO_NATIVE_LIBS)) { + _recorded_lib_count = 0; + writeNativeLibraries(_buf); + } else { + _recorded_lib_count = -1; } flush(_buf); } @@ -916,6 +914,9 @@ void Recording::cleanupUnreferencedMethods() { if (has_line_table) { removed_with_line_tables++; } + // Recycle the erased method's pool id so a later method can reuse it + // without colliding with any still-live method (PROF-15130). + _method_map.freeId(mi._key); it = _method_map.erase(it); removed_count++; continue; @@ -1579,8 +1580,8 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) { mi._mark = false; buf->putVar64(mi._key); buf->putVar64(mi._class); - buf->putVar64(mi._name | _base_id); - buf->putVar64(mi._sig | _base_id); + buf->putVar64(mi._name); + buf->putVar64(mi._sig); buf->putVar64(mi._modifiers); buf->putVar64(mi.isHidden()); flushIfNeeded(buf); @@ -1604,8 +1605,8 @@ void Recording::writeClasses(Buffer *buf, Lookup *lookup) { const char *name = it->second; buf->putVar64(it->first); buf->putVar64(0); // classLoader - buf->putVar64(lookup->getSymbol(name) | _base_id); - buf->putVar64(lookup->getPackage(name) | _base_id); + buf->putVar64(lookup->getSymbol(name)); + buf->putVar64(lookup->getPackage(name)); buf->putVar64(0); // access flags flushIfNeeded(buf); } @@ -1619,8 +1620,8 @@ void Recording::writePackages(Buffer *buf, Lookup *lookup) { buf->putVar32(packages.size()); for (std::map::const_iterator it = packages.begin(); it != packages.end(); ++it) { - buf->putVar64(it->first | _base_id); - buf->putVar64(lookup->getSymbol(it->second) | _base_id); + buf->putVar64(it->first); + buf->putVar64(lookup->getSymbol(it->second)); flushIfNeeded(buf); } } @@ -1635,7 +1636,7 @@ void Recording::writeConstantPoolSection( int length = strlen(it->second); // 5 is max varint length flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - length - 5); - buf->putVar64(it->first | _base_id); + buf->putVar64(it->first); buf->putUtf8(it->second, length); } } @@ -1976,6 +1977,9 @@ Error FlightRecorder::dump(const char *filename, const int length) { // if the filename to dump the recording to is specified move the current // working file there int copy_fd = open(filename, O_CREAT | O_RDWR | O_TRUNC, 0644); + if (copy_fd == -1) { + return Error("Could not open recording file for dump"); + } rec->switchChunk(copy_fd); close(copy_fd); return Error::OK; @@ -1986,33 +1990,6 @@ Error FlightRecorder::dump(const char *filename, const int length) { return Error("No active recording"); } -void FlightRecorder::flush() { - DEBUG_ASSERT_NOT_IN_SIGNAL(); - ExclusiveLockGuard locker(&_rec_lock); - Recording* rec = _rec; - if (rec != nullptr) { - jvmtiEnv* jvmti = VM::jvmti(); - JNIEnv* env = VM::jni(); - - jclass* classes = NULL; - jint count = 0; - // Pin currently-loaded classes for the duration of switchChunk() so that - // resolveMethod() can safely call JVMTI methods on jmethodIDs whose classes - // might otherwise be concurrently unloaded by the GC. See the matching - // comment in finishChunk() for scope and limitations of this protection. - jvmtiError err = jvmti->GetLoadedClasses(&count, &classes); - rec->switchChunk(-1); - if (!err) { - // delete all local references - for (int i = 0; i < count; i++) { - env->DeleteLocalRef((jobject) classes[i]); - } - // deallocate the class array - jvmti->Deallocate((unsigned char*) classes); - } - } -} - void FlightRecorder::wallClockEpoch(int lock_index, WallClockEpochEvent *event) { OptionalSharedLockGuard locker(&_rec_lock); diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index 595e87070..acb43da8c 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -158,6 +159,34 @@ class MethodMap : public std::map { assert((key & KEY_TYPE_MASK) == 0); return (key | VTABLE_RECEIVER_MARK); } + + // JFR method-pool id allocator. Ids must be unique among the methods written + // in a single chunk, but may be recycled once a method is erased by + // cleanupUnreferencedMethods() — an erased method is never written again, so + // its id is free for reuse. Recycling freed ids bounds the id range to the + // peak number of live methods (keeping LEB128 encoding compact) while + // guaranteeing no two live methods ever share an id. Id 0 stays reserved as + // the "no entry" sentinel. Single-threaded: only touched on the dump thread + // (allocId from resolveMethod under lockAll, freeId from + // cleanupUnreferencedMethods under the recording lock). + u32 allocId() { + if (!_free_ids.empty()) { + u32 id = _free_ids.back(); + _free_ids.pop_back(); + return id; + } + return ++_id_high_water; + } + + void freeId(u32 id) { + if (id != 0) { + _free_ids.push_back(id); + } + } + +private: + u32 _id_high_water = 0; + std::vector _free_ids; }; class Recording { @@ -189,7 +218,6 @@ class Recording { u64 _stop_time; u64 _stop_ticks; - u64 _base_id; u64 _bytes_written; int _tid; @@ -405,7 +433,6 @@ class FlightRecorder { Error start(Arguments &args, bool reset); void stop(); Error dump(const char *filename, const int length); - void flush(); void wallClockEpoch(int lock_index, WallClockEpochEvent *event); void recordTraceRoot(int lock_index, int tid, TraceRootEvent *event); void recordQueueTime(int lock_index, int tid, QueueTimeEvent *event); diff --git a/ddprof-lib/src/test/cpp/methodMapId_ut.cpp b/ddprof-lib/src/test/cpp/methodMapId_ut.cpp new file mode 100644 index 000000000..e2d246aaa --- /dev/null +++ b/ddprof-lib/src/test/cpp/methodMapId_ut.cpp @@ -0,0 +1,156 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + * + * Regression test for PROF-15130: duplicate jdk.types.Method constant-pool + * ids. resolveMethod() used `mi->_key = _method_map->size() + 1`, but + * cleanupUnreferencedMethods() erases unreferenced entries and shrinks the + * map, so size()+1 later reissues an id still owned by a surviving method. + * Two live methods then share an id in one chunk → first-wins strict parsers + * (JMC/jafar/Datadog backend) render a phantom method. + * + * The fix replaced size()+1 with MethodMap::allocId()/freeId(): a free-list + * id allocator that recycles an id only after the owning method is erased. + * These tests drive the allocator directly and assert the live-id-uniqueness + * invariant, including the exact scenario that broke size()+1. + */ + +#include +#include "../../main/cpp/flightRecorder.h" +#include +#include + +// Reference model of the BUGGY scheme that the fix replaced. +// size()+1 derives the id from the current number of live entries. +static u32 buggyNextId(size_t live_count) { + return static_cast(live_count) + 1; // avoid zero key +} + +// 1) Basic monotonic allocation with no frees: ids are unique and start at 1. +TEST(MethodMapIdTest, AllocWithoutFreeIsUniqueAndStartsAtOne) { + MethodMap m; + std::set seen; + u32 prev = 0; + for (int i = 0; i < 1000; i++) { + u32 id = m.allocId(); + EXPECT_NE(id, 0U) << "id 0 is reserved as the no-entry sentinel"; + EXPECT_TRUE(seen.insert(id).second) << "duplicate id handed out: " << id; + EXPECT_GT(id, prev) << "ids must be monotonic before any free"; + prev = id; + } +} + +// 2) freeId(0) is a no-op: the sentinel must never enter the free list. +TEST(MethodMapIdTest, FreeIdZeroIsNoOp) { + MethodMap m; + m.freeId(0); + EXPECT_NE(m.allocId(), 0U); +} + +// 3) A freed id is recycled (LEB128 stays compact) and is only re-handed out +// after it was freed. +TEST(MethodMapIdTest, FreedIdIsRecycled) { + MethodMap m; + u32 a = m.allocId(); // 1 + u32 b = m.allocId(); // 2 + u32 c = m.allocId(); // 3 + EXPECT_EQ(a, 1U); + EXPECT_EQ(b, 2U); + EXPECT_EQ(c, 3U); + + m.freeId(b); // free the middle id + u32 d = m.allocId(); + EXPECT_EQ(d, b) << "the freed id should be recycled before the high water grows"; +} + +// 4) THE INVARIANT (this is what size()+1 violated). +// Simulate the resolve/erase cycle: alloc a batch, free a middle subset +// (the "erased" methods), alloc more, and assert at every step that the set +// of ids currently held by live entries has no duplicates and that a newly +// allocated id never collides with a still-live id. +TEST(MethodMapIdTest, LiveIdsNeverCollideAcrossEraseReuseCycle) { + MethodMap m; + std::set live; // ids currently owned by live methods + + auto alloc = [&]() { + u32 id = m.allocId(); + EXPECT_NE(id, 0U); + // The core invariant: a newly handed-out id must not already be live. + EXPECT_EQ(live.count(id), 0U) + << "allocId() returned id " << id << " that is still owned by a live method"; + live.insert(id); + return id; + }; + auto freeOne = [&](u32 id) { + live.erase(id); + m.freeId(id); + }; + + // Phase 1: allocate ids 1..N for an initial population of live methods. + const int N = 64; + std::vector ids; + for (int i = 0; i < N; i++) ids.push_back(alloc()); + + // Phase 2: free a middle subset (simulating cleanupUnreferencedMethods + // erasing methods that aged out) while a disjoint set survives. + // Free indices 20..39 (ids 21..40); keep 1..20 and 41..64 live. + for (int i = 20; i < 40; i++) freeOne(ids[i]); + + // Phase 3: allocate a new batch. With size()+1 these would collide with the + // surviving high ids; with the free-list they reuse the freed ids. + // The alloc lambda asserts no collision with any live id. + for (int i = 0; i < 40; i++) alloc(); + + // Cross-check: enumerate every live id and confirm uniqueness holds. + std::set uniq(live.begin(), live.end()); + EXPECT_EQ(uniq.size(), live.size()) << "duplicate id among live methods"; +} + +// 5) Demonstrate that the OLD size()+1 scheme DOES produce a duplicate live id +// in exactly the erase+reuse scenario above. This pins the bug so the test +// is not vacuous: if someone reintroduces size()+1, the asserted property +// (no duplicate live ids) is false. +TEST(MethodMapIdTest, BuggySizePlusOneSchemeProducesDuplicateLiveId) { + // Model live methods as a set of ids. size()+1 derives the id from the + // current live count, mirroring `mi->_key = _method_map->size() + 1`. + std::set live; + + // Phase 1: resolve N methods → ids 1..N (count grows each insert). + const int N = 64; + std::vector ids; + for (int i = 0; i < N; i++) { + u32 id = buggyNextId(live.size()); + ids.push_back(id); + live.insert(id); + } + // After phase 1, live = {1..64}. + + // Phase 2: erase a middle subset (ids 21..40), shrinking the map. NOTE the + // buggy scheme has nothing analogous to freeId — it just shrinks. + for (int i = 20; i < 40; i++) live.erase(ids[i]); + // live now = {1..20, 41..64}, size() == 44. + + // Phase 3: resolve ONE more method. size()+1 == 44+1 == 45, but 45 is still + // owned by a surviving phase-1 method → DUPLICATE id in the chunk. + u32 next = buggyNextId(live.size()); + EXPECT_TRUE(live.count(next) > 0) + << "expected size()+1 to collide with a live id, got fresh id " << next; + EXPECT_EQ(next, 45U); + + // Contrast: the fixed allocator does NOT collide for the same sequence. + MethodMap m; + std::set fixedLive; + std::vector fixedIds; + for (int i = 0; i < N; i++) { + u32 id = m.allocId(); + fixedIds.push_back(id); + fixedLive.insert(id); + } + for (int i = 20; i < 40; i++) { + fixedLive.erase(fixedIds[i]); + m.freeId(fixedIds[i]); + } + u32 fixedNext = m.allocId(); + EXPECT_EQ(fixedLive.count(fixedNext), 0U) + << "fixed allocator handed out a still-live id " << fixedNext; +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java new file mode 100644 index 000000000..191e3a352 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java @@ -0,0 +1,527 @@ +/* + * Copyright 2026 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datadoghq.profiler.metadata; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Regression test for PROF-15130: duplicate {@code jdk.types.Method} constant-pool ids. + * + *

{@code resolveMethod()} used to assign {@code mi->_key = _method_map->size() + 1}, but + * {@code cleanupUnreferencedMethods()} (enabled by default, erases methods unreferenced for + * {@code AGE_THRESHOLD = 3} chunks) shrinks the map. After an erase, {@code size()+1} reissues an + * id still owned by a surviving method, so two live methods share one id in a single chunk's + * method constant pool. First-wins strict parsers (JMC / jafar / the Datadog backend) then render + * a phantom (wrong-but-valid) method, while last-wins OpenJDK {@code jfr} renders correctly. JMC's + * loader is last-wins and therefore CANNOT see the duplicate — so this test parses the raw chunk + * constant pools itself, which is the precise oracle. + * + *

The fix replaced {@code size()+1} with a free-list id allocator + * ({@code MethodMap::allocId()/freeId()}): an id is recycled only after the owning method is + * erased, so no two live methods ever share an id. + * + *

Deterministic erase+reuse driver: each {@code dump()} produces a chunk and runs + * {@code cleanupUnreferencedMethods}. Phase 1 touches a LARGE set of distinct lambda call targets + * (high ids). Phases 2..N (≥ AGE_THRESHOLD dumps) stop touching the phase-1 lambdas (so they age + * out and get erased) while touching a DIFFERENT persistent set plus brand-new lambdas (which draw + * recycled ids). The oracle asserts no {@code jdk.types.Method} id maps to two distinct method + * definitions within any chunk. + */ +public class MethodIdReuseTest extends AbstractProfilerTest { + + // Phases beyond AGE_THRESHOLD (3) so phase-1 methods are definitely erased. + private static final int PHASE1_DUMPS = 3; + private static final int PHASE2_DUMPS = 7; // well beyond AGE_THRESHOLD; many erase+reuse chunks + private static final int DISTINCT_TARGETS_PER_PHASE = 600; + + // Generated lambda call targets. A large, churned set maximises method-pool turnover so the + // free-list / size()+1 schemes diverge. + private final List phase1Targets = new ArrayList<>(); + private final List persistentTargets = new ArrayList<>(); + + protected static volatile long sink; + + private static Runnable makeBusyTarget(final int seed) { + return () -> { + long acc = seed; + for (int i = 0; i < 50_000; i++) { + acc += (i ^ (acc >> 1)) + seed; + } + sink += acc; + }; + } + + private void buildTargets() { + for (int i = 0; i < DISTINCT_TARGETS_PER_PHASE; i++) { + phase1Targets.add(makeBusyTarget(1_000 + i)); + persistentTargets.add(makeBusyTarget(9_000_000 + i)); + } + } + + private void runFor(List targets, List extra, long millis) { + long deadline = System.currentTimeMillis() + millis; + int idx = 0; + while (System.currentTimeMillis() < deadline) { + targets.get(idx % targets.size()).run(); + if (extra != null && !extra.isEmpty()) { + extra.get(idx % extra.size()).run(); + } + idx++; + } + } + + @Test + public void methodPoolIdsAreUniquePerChunk() throws Exception { + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + Assumptions.assumeFalse(Platform.isJ9()); + + buildTargets(); + registerCurrentThreadForWallClockProfiling(); + waitForProfilerReady(2000); + + List dumps = new ArrayList<>(); + + // Phase 1: touch the large phase-1 lambda set (+ persistent set) so they all get resolved + // and assigned high method-pool ids. + for (int d = 0; d < PHASE1_DUMPS; d++) { + runFor(phase1Targets, persistentTargets, 600); + Path rec = Files.createTempFile("prof15130-reuse-p1-" + d + "-", ".jfr"); + dumps.add(rec); + dump(rec); + System.out.println("[PROF-15130] phase1 dump #" + d + " -> " + rec.toAbsolutePath()); + } + + // Phase 2: STOP touching phase-1 lambdas (they age out and get erased after AGE_THRESHOLD + // dumps) while touching the persistent set + brand-new lambdas that draw recycled ids. + for (int d = 0; d < PHASE2_DUMPS; d++) { + List fresh = new ArrayList<>(); + for (int i = 0; i < DISTINCT_TARGETS_PER_PHASE; i++) { + fresh.add(makeBusyTarget(50_000_000 + d * 100_000 + i)); + } + runFor(persistentTargets, fresh, 600); + Path rec = Files.createTempFile("prof15130-reuse-p2-" + d + "-", ".jfr"); + dumps.add(rec); + dump(rec); + System.out.println("[PROF-15130] phase2 dump #" + d + " -> " + rec.toAbsolutePath()); + } + + stopProfiler(); + + System.out.println("[PROF-15130] produced dump files:"); + for (Path p : dumps) { + System.out.println("[PROF-15130] " + p.toAbsolutePath()); + } + + // Oracle: across every chunk in every dump, no method-pool id maps to two distinct method + // definitions. + int totalDuplicates = 0; + for (Path dump : dumps) { + int dups = countDuplicateMethodIds(dump); + System.out.println("[PROF-15130] " + dump.getFileName() + " duplicate method-pool ids: " + dups); + totalDuplicates += dups; + } + + assertEquals(0, totalDuplicates, + "Found " + totalDuplicates + " jdk.types.Method constant-pool id(s) mapping to two " + + "distinct method definitions (PROF-15130). See stdout for the dump files."); + } + + @Override + protected String getProfilerCommand() { + // wall+cpu maximise sample density so the churned lambdas land in stacktraces and thus the + // method pool. Method cleanup is left ENABLED (default) — that is what triggers the bug. + return "cpu=1ms,wall=~1ms"; + } + + // ───────────────────────── raw JFR method-pool oracle ───────────────────────── + // + // A minimal, dependency-free JFR reader that walks every chunk in the file, parses the + // metadata to learn the field layout of jdk.types.Method / jdk.types.Symbol / java.lang.Class, + // then reads each constant-pool checkpoint and detects any jdk.types.Method id that resolves to + // two DISTINCT method definitions within the same chunk. This is the precise duplicate-id + // oracle (JMC's last-wins loader would silently hide it). + + /** @return number of method-pool ids in the file that map to >1 distinct definition. */ + static int countDuplicateMethodIds(Path file) throws IOException { + byte[] all; + try (FileChannel ch = FileChannel.open(file, StandardOpenOption.READ)) { + long size = ch.size(); + ByteBuffer bb = ByteBuffer.allocate((int) size); + while (bb.hasRemaining() && ch.read(bb) > 0) { /* read fully */ } + all = bb.array(); + } + int duplicates = 0; + long pos = 0; + while (pos + 8 <= all.length) { + // Chunk magic "FLR\0" + if (!(all[(int) pos] == 'F' && all[(int) pos + 1] == 'L' && all[(int) pos + 2] == 'R' + && all[(int) pos + 3] == 0)) { + break; + } + Chunk chunk = new Chunk(all, (int) pos); + duplicates += chunk.countDuplicateMethodIds(); + if (chunk.chunkSize <= 0) { + break; + } + pos += chunk.chunkSize; + } + return duplicates; + } + + private static final class Chunk { + final byte[] f; + final int base; // chunk start offset within the file + final long chunkSize; + final long cpOffset; // checkpoint offset, relative to chunk start + final long metaOffset; // metadata offset, relative to chunk start + + // metadata: classId -> ClassDef + final Map classes = new HashMap<>(); + long methodTypeId = -1, symbolTypeId = -1, classTypeId = -1, stringTypeId = -1; + + Chunk(byte[] f, int base) { + this.f = f; + this.base = base; + // Header (big-endian): magic(4) major(2) minor(2) chunkSize(8) cpOffset(8) metaOffset(8) + this.chunkSize = beLong(base + 8); + this.cpOffset = beLong(base + 16); + this.metaOffset = beLong(base + 24); + parseMetadata(); + } + + // Resolution tables, unioned across all checkpoints in the chunk (last-wins is fine: a + // symbol/class id is stable within a chunk). Used only to render readable defs. + private final Map symbols = new HashMap<>(); + private final Map classNameRef = new HashMap<>(); // class id -> name symbol ref + // id -> set of DISTINCT raw [type,name,descriptor] ref-tuples seen for that method id. + // size() > 1 ⇒ the id carried two different method definitions in this chunk ⇒ the bug. + private final Map> methodRefTuples = new LinkedHashMap<>(); + + int countDuplicateMethodIds() { + // Follow the checkpoint delta chain. + Set visited = new HashSet<>(); + long off = base + cpOffset; + while (off >= base && off < base + chunkSize && !visited.contains(off)) { + visited.add(off); + long[] p = {off}; + readVarLong(p); // size + readVarLong(p); // typeId (EventCheckpoint) + readVarLong(p); // start + readVarLong(p); // duration + long deltaRaw = readVarLong(p); + long delta = (deltaRaw >>> 1) ^ -(deltaRaw & 1); // zig-zag + p[0] += 1; // flush byte + long poolCount = readVarLong(p); + for (long i = 0; i < poolCount; i++) { + long classId = readVarLong(p); + long count = readVarLong(p); + ClassDef cd = classes.get(classId); + if (cd == null) { + // Unknown layout — cannot safely parse further in this checkpoint. + return duplicateCount(); + } + boolean isMethod = classId == methodTypeId; + boolean isSymbol = classId == symbolTypeId; + boolean isClass = classId == classTypeId; + for (long e = 0; e < count; e++) { + long id = readVarLong(p); + if (isSymbol) { + String s = readString(p); + symbols.put(id, s); + } else if (isMethod) { + long typeRef = 0, nameRef = 0, descRef = 0; + for (FieldDef fd : cd.fields) { + Object v = readField(p, fd); + if (v instanceof Long) { + if ("type".equals(fd.name)) typeRef = (Long) v; + else if ("name".equals(fd.name)) nameRef = (Long) v; + else if ("descriptor".equals(fd.name)) descRef = (Long) v; + } + } + // Record the DISTINCT raw ref-tuple for this id. Two different tuples + // for one id within the chunk is exactly the PROF-15130 collision. + String tuple = typeRef + ":" + nameRef + ":" + descRef; + methodRefTuples.computeIfAbsent(id, k -> new HashSet<>()).add(tuple); + } else if (isClass) { + long nameRef = 0; + for (FieldDef fd : cd.fields) { + Object v = readField(p, fd); + if ("name".equals(fd.name) && v instanceof Long) { + nameRef = (Long) v; + } + } + classNameRef.put(id, nameRef); + } else { + // skip every field of this entry + for (FieldDef fd : cd.fields) { + readField(p, fd); + } + } + } + } + if (delta == 0) break; + off += delta; + } + return duplicateCount(); + } + + private int duplicateCount() { + int dups = 0; + for (Map.Entry> en : methodRefTuples.entrySet()) { + if (en.getValue().size() > 1) { + dups++; + System.out.println("[PROF-15130] DUPLICATE method id=" + en.getKey() + + " defs=" + renderTuples(en.getValue())); + } + } + return dups; + } + + private List renderTuples(Set tuples) { + List out = new ArrayList<>(); + for (String t : tuples) { + String[] parts = t.split(":"); + long typeRef = Long.parseLong(parts[0]); + long nameRef = Long.parseLong(parts[1]); + long descRef = Long.parseLong(parts[2]); + String cls = ""; + Long cnr = classNameRef.get(typeRef); + if (cnr != null) { + String s = symbols.get(cnr); + if (s != null) cls = s; + } + String nm = symbols.getOrDefault(nameRef, ""); + String desc = symbols.getOrDefault(descRef, ""); + out.add(cls + "." + nm + desc); + } + return out; + } + + private Object readField(long[] p, FieldDef fd) { + if (fd.dimension == 1) { + long n = readVarLong(p); + for (long i = 0; i < n; i++) { + readScalar(p, fd); + } + return null; + } + return readScalar(p, fd); + } + + private Object readScalar(long[] p, FieldDef fd) { + if (fd.constantPool) { + return readVarLong(p); // a constant-pool reference id + } + ClassDef t = classes.get(fd.typeId); + String tn = t != null ? t.name : null; + if ("java.lang.String".equals(tn)) { + return readString(p); + } + if ("boolean".equals(tn) || "byte".equals(tn)) { + p[0] += 1; + return null; + } + if ("short".equals(tn) || "char".equals(tn) || "int".equals(tn) || "long".equals(tn)) { + return readVarLong(p); + } + if ("float".equals(tn)) { p[0] += 4; return null; } + if ("double".equals(tn)) { p[0] += 8; return null; } + // inline struct: read its fields recursively + if (t != null) { + for (FieldDef sub : t.fields) { + readField(p, sub); + } + } + return null; + } + + // ── metadata parsing ── + private void parseMetadata() { + long[] p = {base + metaOffset}; + readVarLong(p); // size + readVarLong(p); // typeId (Metadata event) + readVarLong(p); // start + readVarLong(p); // duration + readVarLong(p); // metadataId + long stringCount = readVarLong(p); + String[] pool = new String[(int) stringCount]; + for (int i = 0; i < stringCount; i++) { + pool[i] = readString(p); + } + // root element + Element root = readElement(p, pool); + // walk to find elements + collectClasses(root); + for (ClassDef cd : classes.values()) { + switch (cd.name) { + case "jdk.types.Method": methodTypeId = cd.id; break; + case "jdk.types.Symbol": symbolTypeId = cd.id; break; + case "java.lang.Class": classTypeId = cd.id; break; + case "java.lang.String": stringTypeId = cd.id; break; + default: break; + } + } + } + + private void collectClasses(Element el) { + if ("class".equals(el.name)) { + String name = el.attrs.get("name"); + String idStr = el.attrs.get("id"); + if (name != null && idStr != null) { + ClassDef cd = new ClassDef(); + cd.id = Long.parseLong(idStr); + cd.name = name; + for (Element child : el.children) { + if ("field".equals(child.name)) { + FieldDef fd = new FieldDef(); + fd.name = child.attrs.get("name"); + fd.typeId = Long.parseLong(child.attrs.get("class")); + String cp = child.attrs.get("constantPool"); + fd.constantPool = "true".equals(cp); + String dim = child.attrs.get("dimension"); + fd.dimension = dim != null ? Integer.parseInt(dim) : 0; + cd.fields.add(fd); + } + } + classes.put(cd.id, cd); + } + } + for (Element child : el.children) { + collectClasses(child); + } + } + + private Element readElement(long[] p, String[] pool) { + Element el = new Element(); + long nameIdx = readVarLong(p); + el.name = pool[(int) nameIdx]; + long attrCount = readVarLong(p); + for (long i = 0; i < attrCount; i++) { + long k = readVarLong(p); + long v = readVarLong(p); + el.attrs.put(pool[(int) k], pool[(int) v]); + } + long childCount = readVarLong(p); + for (long i = 0; i < childCount; i++) { + el.children.add(readElement(p, pool)); + } + return el; + } + + // ── low-level readers ── + private long beLong(long off) { + long v = 0; + for (int i = 0; i < 8; i++) { + v = (v << 8) | (f[(int) off + i] & 0xffL); + } + return v; + } + + private long readVarLong(long[] p) { + long result = 0; + int shift = 0; + int i = (int) p[0]; + for (int b = 0; b < 9; b++) { + int by = f[i++] & 0xff; + if (b == 8) { + result |= ((long) by) << shift; + p[0] = i; + return result; + } + result |= ((long) (by & 0x7f)) << shift; + if ((by & 0x80) == 0) { + p[0] = i; + return result; + } + shift += 7; + } + p[0] = i; + return result; + } + + private String readString(long[] p) { + int i = (int) p[0]; + int enc = f[i++] & 0xff; + p[0] = i; + switch (enc) { + case 0: return null; // null + case 1: return ""; // empty + case 3: { // UTF-8 + long len = readVarLong(p); + String s = new String(f, (int) p[0], (int) len, java.nio.charset.StandardCharsets.UTF_8); + p[0] += len; + return s; + } + case 4: { // char array + long len = readVarLong(p); + StringBuilder sb = new StringBuilder((int) len); + for (long c = 0; c < len; c++) { + sb.append((char) readVarLong(p)); + } + return sb.toString(); + } + case 5: { // latin1 + long len = readVarLong(p); + String s = new String(f, (int) p[0], (int) len, java.nio.charset.StandardCharsets.ISO_8859_1); + p[0] += len; + return s; + } + default: + throw new IllegalStateException("Unknown JFR string encoding " + enc); + } + } + } + + private static final class ClassDef { + long id; + String name; + final List fields = new ArrayList<>(); + } + + private static final class FieldDef { + String name; + long typeId; + boolean constantPool; + int dimension; + } + + private static final class Element { + String name; + final Map attrs = new HashMap<>(); + final List children = new ArrayList<>(); + } +} From 16250190f82d542dde2714c1c296aa3c7220550d Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 11:40:00 +0200 Subject: [PATCH 2/3] ci: gate reliability tests on deploy-artifact Reliability/chaos tests download the branch SNAPSHOT from Maven, which is published by deploy-artifact. Without the dependency the child pipeline could start before the snapshot existed (cold branch) or pull a stale one. Co-Authored-By: Claude Sonnet 4.6 --- .gitlab-ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 83ce0def5..fdd98822a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -202,6 +202,14 @@ run-reliability-tests: artifacts: true - job: prepare:start artifacts: true + # Reliability/chaos tests download com.datadoghq:ddprof:-SNAPSHOT from + # Maven snapshots; that artifact is published by deploy-artifact. Without this + # gate the child pipeline can start before the snapshot exists (cold branch) or + # download a stale snapshot from a previous push. optional: true so release + # branches, where deploy-artifact never runs, stay satisfiable. + - job: deploy-artifact + artifacts: false + optional: true rules: - if: '$CI_PIPELINE_SOURCE == "schedule"' when: never From de329397e65702ed8ad619e9284d1fcf110776af Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 18 Jun 2026 12:00:42 +0200 Subject: [PATCH 3/3] test(jfr): address PR review on MethodIdReuseTest - generate a distinct Runnable class per target (was one lambda call site -> one method id) so phase 1 builds the high-id population the oracle depends on - drop the unreachable 9th-byte branch in readVarLong - remove unused imports (ByteOrder, Assertions.fail) - delete temp JFR dumps in after() unless ddprof_test.keep_jfrs Co-Authored-By: Claude Opus 4.8 (1M context) --- .../profiler/metadata/MethodIdReuseTest.java | 135 +++++++++++++++--- 1 file changed, 113 insertions(+), 22 deletions(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java index 191e3a352..146d28c71 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/metadata/MethodIdReuseTest.java @@ -19,10 +19,13 @@ import com.datadoghq.profiler.Platform; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Test; +import org.objectweb.asm.ClassWriter; +import org.objectweb.asm.Label; +import org.objectweb.asm.MethodVisitor; +import org.objectweb.asm.Opcodes; import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -36,7 +39,6 @@ import java.util.Set; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.fail; /** * Regression test for PROF-15130: duplicate {@code jdk.types.Method} constant-pool ids. @@ -68,21 +70,39 @@ public class MethodIdReuseTest extends AbstractProfilerTest { private static final int PHASE2_DUMPS = 7; // well beyond AGE_THRESHOLD; many erase+reuse chunks private static final int DISTINCT_TARGETS_PER_PHASE = 600; - // Generated lambda call targets. A large, churned set maximises method-pool turnover so the - // free-list / size()+1 schemes diverge. + // Generated distinct-class call targets (one JVM method each). A large, churned set maximises + // method-pool turnover so the free-list / size()+1 schemes diverge. private final List phase1Targets = new ArrayList<>(); private final List persistentTargets = new ArrayList<>(); - protected static volatile long sink; - - private static Runnable makeBusyTarget(final int seed) { - return () -> { - long acc = seed; - for (int i = 0; i < 50_000; i++) { - acc += (i ^ (acc >> 1)) + seed; - } - sink += acc; - }; + // Every temporary JFR dump produced by the test; deleted in after() unless ddprof_test.keep_jfrs. + private final List dumps = new ArrayList<>(); + + // Distinct generated Runnable classes are loaded here; each carries its busy loop in its own + // run() method, so every target is a distinct JVM method (and thus a distinct method-pool id). + private final GeneratingClassLoader targetLoader = new GeneratingClassLoader(); + private int targetCounter = 0; + + // Written by the generated run() methods; public so the generated classes (loaded by a child + // class loader, in a different runtime package) can reach it via GETSTATIC/PUTSTATIC. + public static volatile long sink; + + /** + * Generates and instantiates a fresh class implementing {@link Runnable} whose {@code run()} + * body spins on a CPU-busy loop. Because each call produces a distinct class, every target is a + * distinct JVM method — unlike a shared lambda call site, which HotSpot compiles to a single + * synthetic method (and thus a single method-pool id). Distinct methods are what build the large + * high-id population PROF-15130 needs to age out and recycle ids. + */ + private Runnable makeBusyTarget(final int seed) { + String internalName = "com/datadoghq/profiler/metadata/gen/BusyTarget_" + (targetCounter++); + byte[] bytecode = generateBusyRunnable(internalName, seed); + try { + Class clazz = targetLoader.define(internalName.replace('/', '.'), bytecode); + return (Runnable) clazz.getDeclaredConstructor().newInstance(); + } catch (ReflectiveOperationException e) { + throw new IllegalStateException("failed to generate busy target for seed " + seed, e); + } } private void buildTargets() { @@ -92,6 +112,80 @@ private void buildTargets() { } } + @Override + protected void after() throws Exception { + if (!Boolean.getBoolean("ddprof_test.keep_jfrs")) { + for (Path dump : dumps) { + Files.deleteIfExists(dump); + } + } + } + + /** + * Emits {@code public final class implements Runnable} whose {@code run()} runs + * the CPU-busy loop the test previously expressed as a lambda: + * {@code long acc = seed; for (int i = 0; i < 50000; i++) acc += (i ^ (acc >> 1)) + seed; sink += acc;} + */ + private static byte[] generateBusyRunnable(String internalName, long seed) { + ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS); + cw.visit(Opcodes.V1_8, Opcodes.ACC_PUBLIC | Opcodes.ACC_FINAL | Opcodes.ACC_SUPER, + internalName, null, "java/lang/Object", new String[] {"java/lang/Runnable"}); + + MethodVisitor ctor = cw.visitMethod(Opcodes.ACC_PUBLIC, "", "()V", null, null); + ctor.visitCode(); + ctor.visitVarInsn(Opcodes.ALOAD, 0); + ctor.visitMethodInsn(Opcodes.INVOKESPECIAL, "java/lang/Object", "", "()V", false); + ctor.visitInsn(Opcodes.RETURN); + ctor.visitMaxs(0, 0); + ctor.visitEnd(); + + MethodVisitor mv = cw.visitMethod(Opcodes.ACC_PUBLIC, "run", "()V", null, null); + mv.visitCode(); + mv.visitLdcInsn(seed); // acc = seed + mv.visitVarInsn(Opcodes.LSTORE, 1); + mv.visitInsn(Opcodes.ICONST_0); // i = 0 + mv.visitVarInsn(Opcodes.ISTORE, 3); + Label cond = new Label(); + Label end = new Label(); + mv.visitLabel(cond); + mv.visitVarInsn(Opcodes.ILOAD, 3); + mv.visitLdcInsn(50_000); + mv.visitJumpInsn(Opcodes.IF_ICMPGE, end); + mv.visitVarInsn(Opcodes.LLOAD, 1); // acc + mv.visitVarInsn(Opcodes.ILOAD, 3); // i + mv.visitInsn(Opcodes.I2L); // (long) i + mv.visitVarInsn(Opcodes.LLOAD, 1); // acc + mv.visitInsn(Opcodes.ICONST_1); // shift count is an int for LSHR + mv.visitInsn(Opcodes.LSHR); // acc >> 1 + mv.visitInsn(Opcodes.LXOR); // (long) i ^ (acc >> 1) + mv.visitLdcInsn(seed); + mv.visitInsn(Opcodes.LADD); // + seed + mv.visitInsn(Opcodes.LADD); // acc + ... + mv.visitVarInsn(Opcodes.LSTORE, 1); // acc = + mv.visitIincInsn(3, 1); // i++ + mv.visitJumpInsn(Opcodes.GOTO, cond); + mv.visitLabel(end); + mv.visitFieldInsn(Opcodes.GETSTATIC, + "com/datadoghq/profiler/metadata/MethodIdReuseTest", "sink", "J"); + mv.visitVarInsn(Opcodes.LLOAD, 1); + mv.visitInsn(Opcodes.LADD); + mv.visitFieldInsn(Opcodes.PUTSTATIC, + "com/datadoghq/profiler/metadata/MethodIdReuseTest", "sink", "J"); + mv.visitInsn(Opcodes.RETURN); + mv.visitMaxs(0, 0); + mv.visitEnd(); + + cw.visitEnd(); + return cw.toByteArray(); + } + + /** Child loader that exposes defineClass so generated targets become distinct loaded classes. */ + private static final class GeneratingClassLoader extends ClassLoader { + Class define(String binaryName, byte[] bytecode) { + return defineClass(binaryName, bytecode, 0, bytecode.length); + } + } + private void runFor(List targets, List extra, long millis) { long deadline = System.currentTimeMillis() + millis; int idx = 0; @@ -113,8 +207,6 @@ public void methodPoolIdsAreUniquePerChunk() throws Exception { registerCurrentThreadForWallClockProfiling(); waitForProfilerReady(2000); - List dumps = new ArrayList<>(); - // Phase 1: touch the large phase-1 lambda set (+ persistent set) so they all get resolved // and assigned high method-pool ids. for (int d = 0; d < PHASE1_DUMPS; d++) { @@ -455,13 +547,9 @@ private long readVarLong(long[] p) { long result = 0; int shift = 0; int i = (int) p[0]; - for (int b = 0; b < 9; b++) { + // Up to 8 continuation bytes carry 7 data bits each. + for (int b = 0; b < 8; b++) { int by = f[i++] & 0xff; - if (b == 8) { - result |= ((long) by) << shift; - p[0] = i; - return result; - } result |= ((long) (by & 0x7f)) << shift; if ((by & 0x80) == 0) { p[0] = i; @@ -469,6 +557,9 @@ private long readVarLong(long[] p) { } shift += 7; } + // 9th byte (shift == 56) carries all 8 bits with no continuation flag. + int by = f[i++] & 0xff; + result |= ((long) by) << shift; p[0] = i; return result; }