Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ run-reliability-tests:
artifacts: true
- job: prepare:start
artifacts: true
# Reliability/chaos tests download com.datadoghq:ddprof:<branch>-SNAPSHOT from
Comment thread
rkennke marked this conversation as resolved.
# Maven snapshots; that artifact is published by deploy-artifact. Without this
# gate the child pipeline can start before the snapshot exists (cold branch) or
# download a stale snapshot from a previous push. optional: true so release
# branches, where deploy-artifact never runs, stay satisfiable.
- job: deploy-artifact
artifacts: false
optional: true
rules:
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
Expand Down
107 changes: 42 additions & 65 deletions ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,12 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) {
mi->_mark = true;
bool first_time = mi->_key == 0;
if (first_time) {
mi->_key = _method_map->size() + 1; // avoid zero key
// Allocate a method-pool id that is unique among live methods. Must not
// be derived from the map size: cleanupUnreferencedMethods() erases
// entries, so size()+1 would reissue an id still owned by a surviving
// method, producing duplicate ids in the chunk's method constant pool
// (PROF-15130). The allocator recycles ids freed on erase instead.
mi->_key = _method_map->allocId();
}
if (method == nullptr) {
fillNativeMethodInfo(mi, UNKNOWN, nullptr);
Expand Down Expand Up @@ -641,7 +646,6 @@ Recording::Recording(int fd, Arguments &args)
_start_ticks = TSC::ticks();
_recording_start_time = _start_time;
_recording_start_ticks = _start_ticks;
_base_id = 0;
_bytes_written = 0;

_tid = OS::threadId();
Expand Down Expand Up @@ -813,45 +817,39 @@ off_t Recording::finishChunk(bool end_recording, bool do_cleanup) {
return chunk_end;
}

// Finish the current chunk, move it to the external file `fd` (must be a valid
// open descriptor), then restart the continuous recording file with a fresh
// chunk header. Callers guarantee fd > -1 (see FlightRecorder::dump).
void Recording::switchChunk(int fd) {
_chunk_start = finishChunk(fd > -1, /*do_cleanup=*/true);
_chunk_start = finishChunk(/*end_recording=*/true, /*do_cleanup=*/true);

TEST_LOG("MethodMap: %zu methods after cleanup", _method_map.size());

_start_time = _stop_time;
_start_ticks = _stop_ticks;
_bytes_written = 0;
if (fd > -1) {
// move the chunk to external file and reset the continuous recording file
OS::copyFile(_fd, fd, 0, _chunk_start);
OS::truncateFile(_fd);
// need to reset the file offset here
_chunk_start = 0;
_base_id = 0;
} else {
// same file, different logical chunk
_base_id += 0x1000000;
}

// move the chunk to the external file and reset the continuous recording file
OS::copyFile(_fd, fd, 0, _chunk_start);
OS::truncateFile(_fd);
_chunk_start = 0;

// the recording file is restarted, so write out all the info events again
writeHeader(_buf);
writeMetadata(_buf);
if (fd > -1) {
// if the recording file is to be restarted write out all the info events
// again
writeSettings(_buf, _args);
if (!_args.hasOption(NO_SYSTEM_INFO)) {
writeOsCpuInfo(_buf);
writeJvmInfo(_buf);
}
if (!_args.hasOption(NO_SYSTEM_PROPS)) {
writeSystemProperties(_buf);
}
if (!_args.hasOption(NO_NATIVE_LIBS)) {
_recorded_lib_count = 0;
writeNativeLibraries(_buf);
} else {
_recorded_lib_count = -1;
}
writeSettings(_buf, _args);
if (!_args.hasOption(NO_SYSTEM_INFO)) {
writeOsCpuInfo(_buf);
writeJvmInfo(_buf);
}
if (!_args.hasOption(NO_SYSTEM_PROPS)) {
writeSystemProperties(_buf);
}
if (!_args.hasOption(NO_NATIVE_LIBS)) {
_recorded_lib_count = 0;
writeNativeLibraries(_buf);
} else {
_recorded_lib_count = -1;
}
flush(_buf);
}
Expand Down Expand Up @@ -916,6 +914,9 @@ void Recording::cleanupUnreferencedMethods() {
if (has_line_table) {
removed_with_line_tables++;
}
// Recycle the erased method's pool id so a later method can reuse it
// without colliding with any still-live method (PROF-15130).
_method_map.freeId(mi._key);
it = _method_map.erase(it);
removed_count++;
continue;
Expand Down Expand Up @@ -1579,8 +1580,8 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) {
mi._mark = false;
buf->putVar64(mi._key);
buf->putVar64(mi._class);
buf->putVar64(mi._name | _base_id);
buf->putVar64(mi._sig | _base_id);
buf->putVar64(mi._name);
buf->putVar64(mi._sig);
buf->putVar64(mi._modifiers);
buf->putVar64(mi.isHidden());
flushIfNeeded(buf);
Expand All @@ -1604,8 +1605,8 @@ void Recording::writeClasses(Buffer *buf, Lookup *lookup) {
const char *name = it->second;
buf->putVar64(it->first);
buf->putVar64(0); // classLoader
buf->putVar64(lookup->getSymbol(name) | _base_id);
buf->putVar64(lookup->getPackage(name) | _base_id);
buf->putVar64(lookup->getSymbol(name));
buf->putVar64(lookup->getPackage(name));
buf->putVar64(0); // access flags
flushIfNeeded(buf);
}
Expand All @@ -1619,8 +1620,8 @@ void Recording::writePackages(Buffer *buf, Lookup *lookup) {
buf->putVar32(packages.size());
for (std::map<u32, const char *>::const_iterator it = packages.begin();
it != packages.end(); ++it) {
buf->putVar64(it->first | _base_id);
buf->putVar64(lookup->getSymbol(it->second) | _base_id);
buf->putVar64(it->first);
buf->putVar64(lookup->getSymbol(it->second));
flushIfNeeded(buf);
}
}
Expand All @@ -1635,7 +1636,7 @@ void Recording::writeConstantPoolSection(
int length = strlen(it->second);
// 5 is max varint length
flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - length - 5);
buf->putVar64(it->first | _base_id);
buf->putVar64(it->first);
buf->putUtf8(it->second, length);
}
}
Expand Down Expand Up @@ -1976,6 +1977,9 @@ Error FlightRecorder::dump(const char *filename, const int length) {
// if the filename to dump the recording to is specified move the current
// working file there
int copy_fd = open(filename, O_CREAT | O_RDWR | O_TRUNC, 0644);
if (copy_fd == -1) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks unrelated, but still a reasonable change. Any particular reasoning behind it? (Safe to keep, IMO, just curious).

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I took a stab at finally removing dead code - actually, the work started with the assumption that there were some issues in this part of code so I started elimination dead branches and then it looked too pretty to revert :)

return Error("Could not open recording file for dump");
}
rec->switchChunk(copy_fd);
close(copy_fd);
return Error::OK;
Expand All @@ -1986,33 +1990,6 @@ Error FlightRecorder::dump(const char *filename, const int length) {
return Error("No active recording");
}

void FlightRecorder::flush() {
DEBUG_ASSERT_NOT_IN_SIGNAL();
ExclusiveLockGuard locker(&_rec_lock);
Recording* rec = _rec;
if (rec != nullptr) {
jvmtiEnv* jvmti = VM::jvmti();
JNIEnv* env = VM::jni();

jclass* classes = NULL;
jint count = 0;
// Pin currently-loaded classes for the duration of switchChunk() so that
// resolveMethod() can safely call JVMTI methods on jmethodIDs whose classes
// might otherwise be concurrently unloaded by the GC. See the matching
// comment in finishChunk() for scope and limitations of this protection.
jvmtiError err = jvmti->GetLoadedClasses(&count, &classes);
rec->switchChunk(-1);
if (!err) {
// delete all local references
for (int i = 0; i < count; i++) {
env->DeleteLocalRef((jobject) classes[i]);
}
// deallocate the class array
jvmti->Deallocate((unsigned char*) classes);
}
}
}

void FlightRecorder::wallClockEpoch(int lock_index,
WallClockEpochEvent *event) {
OptionalSharedLockGuard locker(&_rec_lock);
Expand Down
31 changes: 29 additions & 2 deletions ddprof-lib/src/main/cpp/flightRecorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include <limits.h>
#include <string.h>
Expand Down Expand Up @@ -158,6 +159,34 @@ class MethodMap : public std::map<unsigned long, MethodInfo> {
assert((key & KEY_TYPE_MASK) == 0);
return (key | VTABLE_RECEIVER_MARK);
}

// JFR method-pool id allocator. Ids must be unique among the methods written
// in a single chunk, but may be recycled once a method is erased by
// cleanupUnreferencedMethods() — an erased method is never written again, so
// its id is free for reuse. Recycling freed ids bounds the id range to the
// peak number of live methods (keeping LEB128 encoding compact) while
// guaranteeing no two live methods ever share an id. Id 0 stays reserved as
// the "no entry" sentinel. Single-threaded: only touched on the dump thread
// (allocId from resolveMethod under lockAll, freeId from
// cleanupUnreferencedMethods under the recording lock).
u32 allocId() {
if (!_free_ids.empty()) {
u32 id = _free_ids.back();
_free_ids.pop_back();
return id;
}
return ++_id_high_water;
}

void freeId(u32 id) {
if (id != 0) {
_free_ids.push_back(id);
}
}

private:
u32 _id_high_water = 0;
std::vector<u32> _free_ids;
};

class Recording {
Expand Down Expand Up @@ -189,7 +218,6 @@ class Recording {
u64 _stop_time;
u64 _stop_ticks;

u64 _base_id;
u64 _bytes_written;

int _tid;
Expand Down Expand Up @@ -405,7 +433,6 @@ class FlightRecorder {
Error start(Arguments &args, bool reset);
void stop();
Error dump(const char *filename, const int length);
void flush();
void wallClockEpoch(int lock_index, WallClockEpochEvent *event);
void recordTraceRoot(int lock_index, int tid, TraceRootEvent *event);
void recordQueueTime(int lock_index, int tid, QueueTimeEvent *event);
Expand Down
Loading
Loading