From a167861295fb1db602d8042c7719beb683078894 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 29 Jan 2026 11:33:04 +0100 Subject: [PATCH 1/9] libdatadog upgrade to v26.0.0 --- cmake/Findlibdatadog.cmake | 5 +- src/exporter/ddprof_exporter.cc | 81 +++++++++++---------------------- test/ddprof_pprof-ut.cc | 3 +- tools/libdatadog_checksums.txt | 12 ++--- 4 files changed, 36 insertions(+), 65 deletions(-) diff --git a/cmake/Findlibdatadog.cmake b/cmake/Findlibdatadog.cmake index 9a41c835e..69920f3fb 100644 --- a/cmake/Findlibdatadog.cmake +++ b/cmake/Findlibdatadog.cmake @@ -2,10 +2,9 @@ # License Version 2.0. This product includes software developed at Datadog # (https://www.datadoghq.com/). Copyright 2021-Present Datadog, Inc. -# libdatadog : common profiler imported libraries -# https://github.com/DataDog/libdatadog/releases/tag/v7.0.0 +# libdatadog : common profiler imported libraries https://github.com/DataDog/libdatadog/releases set(TAG_LIBDATADOG - "v19.1.0" + "v26.0.0" CACHE STRING "libdatadog github tag") set(Datadog_ROOT ${VENDOR_PATH}/libdatadog-${TAG_LIBDATADOG}) diff --git a/src/exporter/ddprof_exporter.cc b/src/exporter/ddprof_exporter.cc index d7532c288..7e60296d2 100644 --- a/src/exporter/ddprof_exporter.cc +++ b/src/exporter/ddprof_exporter.cc @@ -263,10 +263,10 @@ DDRes ddprof_exporter_new(const UserTags *user_tags, DDProfExporter *exporter) { ddog_CharSlice const base_url = to_CharSlice(exporter->_url); ddog_prof_Endpoint endpoint; if (exporter->_agent) { - endpoint = ddog_prof_Endpoint_agent(base_url); + endpoint = ddog_prof_Endpoint_agent(base_url, k_timeout_ms); } else { ddog_CharSlice const api_key = to_CharSlice(exporter->_input.api_key); - endpoint = ddog_prof_Endpoint_agentless(base_url, api_key); + endpoint = ddog_prof_Endpoint_agentless(base_url, api_key, k_timeout_ms); } ddog_prof_ProfileExporter_Result res_exporter = ddog_prof_Exporter_new( @@ -283,14 +283,6 @@ DDRes ddprof_exporter_new(const UserTags *user_tags, DDProfExporter *exporter) { static_cast(res_exporter.err.message.len), res_exporter.err.message.ptr); } - auto result = - ddog_prof_Exporter_set_timeout(&exporter->_exporter, k_timeout_ms); - if (result.tag == DDOG_VOID_RESULT_ERR) { - defer { ddog_Error_drop(&result.err); }; - DDRES_RETURN_ERROR_LOG(DD_WHAT_EXPORTER, "Failure setting timeout - %.*s", - static_cast(result.err.message.len), - result.err.message.ptr); - } return {}; } @@ -335,53 +327,34 @@ DDRes ddprof_exporter_export(ddog_prof_Profile *profile, LG_NTC("[EXPORTER] Export buffer of size %lu", buffer->len); - // clang-format off - ddog_prof_Request_Result res_request = - ddog_prof_Exporter_Request_build(&exporter->_exporter, - encoded_profile, - ddog_prof_Exporter_Slice_File_empty(), // files_to_compress_and_export - ddog_prof_Exporter_Slice_File_empty(), // already compressed - &ffi_additional_tags, // optional tags - nullptr, // internal_metadata_json - nullptr // optional_info_json - ); - // clang-format on - - if (res_request.tag == DDOG_PROF_REQUEST_RESULT_OK_HANDLE_REQUEST) { - ddog_prof_Request request = res_request.ok; - - // dropping the request is not useful if we have a send - // however the send will replace the request by null when it takes - // ownership - defer { ddog_prof_Exporter_Request_drop(&request); }; - - ddog_prof_Result_HttpStatus result = - ddog_prof_Exporter_send(&exporter->_exporter, &request, nullptr); - - if (result.tag == DDOG_PROF_RESULT_HTTP_STATUS_ERR_HTTP_STATUS) { - defer { ddog_Error_drop(&result.err); }; - LG_WRN("Failure to establish connection, check url %s", - exporter->_url.c_str()); - LG_WRN("Failure to send profiles (%.*s)", (int)result.err.message.len, - result.err.message.ptr); - // Free error buffer (prefer this API to the free API) - if (exporter->_nb_consecutive_errors++ >= - k_max_nb_consecutive_errors_allowed) { - // this will shut down profiler - res = ddres_error(DD_WHAT_EXPORTER); - } else { - res = ddres_warn(DD_WHAT_EXPORTER); - } + ddog_prof_Result_HttpStatus result = ddog_prof_Exporter_send_blocking( + &exporter->_exporter, encoded_profile, + ddog_prof_Exporter_Slice_File_empty(), // files_to_compress_and_export + &ffi_additional_tags, // optional_additional_tags + nullptr, // optional_process_tags + nullptr, // optional_internal_metadata_json + nullptr, // optional_info_json + nullptr // cancellation_token + ); + + if (result.tag == DDOG_PROF_RESULT_HTTP_STATUS_ERR_HTTP_STATUS) { + defer { ddog_Error_drop(&result.err); }; + LG_WRN("Failure to establish connection, check url %s", + exporter->_url.c_str()); + LG_WRN("Failure to send profiles (%.*s)", (int)result.err.message.len, + result.err.message.ptr); + // Free error buffer (prefer this API to the free API) + if (exporter->_nb_consecutive_errors++ >= + k_max_nb_consecutive_errors_allowed) { + // this will shut down profiler + res = ddres_error(DD_WHAT_EXPORTER); } else { - // success establishing connection - exporter->_nb_consecutive_errors = 0; - res = check_send_response_code(result.ok.code); + res = ddres_warn(DD_WHAT_EXPORTER); } } else { - defer { ddog_Error_drop(&res_request.err); }; - LG_ERR("[EXPORTER] Failure to build request: %s", - res_request.err.message.ptr); - res = ddres_error(DD_WHAT_EXPORTER); + // success establishing connection + exporter->_nb_consecutive_errors = 0; + res = check_send_response_code(result.ok.code); } } return res; diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index b6aaad6b4..9e7999987 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -47,8 +47,7 @@ void test_pprof(DDProfPProf *pprofs) { EXPECT_TRUE(buffer->ptr); - // Test that we are generating content - EXPECT_TRUE(buffer->len > 500); + EXPECT_TRUE(buffer->len > 100); ddog_prof_EncodedProfile_drop(&serialized_result.ok); } diff --git a/tools/libdatadog_checksums.txt b/tools/libdatadog_checksums.txt index a7d23590b..a8b413867 100644 --- a/tools/libdatadog_checksums.txt +++ b/tools/libdatadog_checksums.txt @@ -1,6 +1,6 @@ -7c69a37cb335260610b61ae956192a6dbd104d05a8278c8ff894dbfebc2efd53 libdatadog-aarch64-alpine-linux-musl.tar.gz -b992a11b90ec5927646a0c96b74fe9fcd63e7e471307e74a670ddf42fc10eaf9 libdatadog-aarch64-apple-darwin.tar.gz -606b23f4de7defacd5d4a381816f8d7bfe26112c97fcdf21ec2eb998a6c5fbbd libdatadog-aarch64-unknown-linux-gnu.tar.gz -2008886021ddee573c0d539626d1d58d41e2a7dbc8deca22b3662da52de6f4d9 libdatadog-x86_64-alpine-linux-musl.tar.gz -6a12ef60fd7b00544343c2b6761ef801ad2e1237075711bd16dfb7247464bc43 libdatadog-x86_64-apple-darwin.tar.gz -4e5b05515ab180aec0819608aa5d277ff710055819654147a9d69caea27a0dbc libdatadog-x86_64-unknown-linux-gnu.tar.gz +38b83da2781f20f004d278c077b071441f40671de2e0adf72f7e14e37b10db15 libdatadog-x86_64-apple-darwin.tar.gz +1420ba4970ff9158aec4bd8a80d139abe8c19cfd71ae31c6c518f8a2ad1416b8 libdatadog-aarch64-apple-darwin.tar.gz +1778bed8bb4ec5a63af792ed6d7b0acd2564e5c7633d9b65d7c715e7f8635743 libdatadog-x86_64-unknown-linux-gnu.tar.gz +c90bd4959026f7fddb9012036fdc5b1e49bdf57d716cb429cdde291af6108740 libdatadog-aarch64-alpine-linux-musl.tar.gz +c67ada4359cd6a806adafcb44043bc8fb0dffd463e3aa328856496e2883142ac libdatadog-aarch64-unknown-linux-gnu.tar.gz +394b13591400b36d90755bc9851be047e6d31813347ed9d0e2638355cc9617d4 libdatadog-x86_64-alpine-linux-musl.tar.gz From cdcad87ef395e49f5b60ef4c21f30c9792fbf7af Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 29 Jan 2026 14:41:53 +0100 Subject: [PATCH 2/9] String interning WIP --- include/pprof/ddprof_pprof.hpp | 3 ++- include/symbol.hpp | 1 - include/symbol_hdr.hpp | 26 ++++++++++++++++++++++++-- src/ddog_profiling_utils.cc | 1 - src/ddprof_worker.cc | 8 ++++++-- src/pprof/ddprof_pprof.cc | 16 +++++++++------- test/CMakeLists.txt | 4 ++++ test/ddprof_exporter-ut.cc | 3 ++- test/ddprof_pprof-ut.cc | 11 ++++++++--- 9 files changed, 55 insertions(+), 18 deletions(-) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index e4e93688e..6da352556 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -36,7 +36,8 @@ struct DDProfValuePack { uint64_t timestamp; }; -DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx); +DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx, + const ddog_prof_ProfilesDictionaryHandle *dict); /** * Aggregate to the existing profile the provided unwinding output. diff --git a/include/symbol.hpp b/include/symbol.hpp index c38413869..62cd32ad6 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -17,7 +17,6 @@ class Symbol { public: Symbol() : _lineno(0) {} - // Warning : Generates some string copies (these are not rvalues) Symbol(std::string symname, std::string demangled_name, uint32_t lineno, std::string srcpath) : _symname(std::move(symname)), diff --git a/include/symbol_hdr.hpp b/include/symbol_hdr.hpp index 863343ed7..be56930e6 100644 --- a/include/symbol_hdr.hpp +++ b/include/symbol_hdr.hpp @@ -15,11 +15,30 @@ #include "runtime_symbol_lookup.hpp" #include +#include + +// Forward declarations for libdatadog types (must be at global scope) +struct ddog_prof_ProfilesDictionary; +using ddog_prof_ProfilesDictionaryHandle = ddog_prof_ProfilesDictionary *; namespace ddprof { + +struct ProfilesDictionaryDeleter { + void operator()(ddog_prof_ProfilesDictionaryHandle *handle) const; +}; + +using ProfilesDictionaryPtr = + std::unique_ptr; + struct SymbolHdr { - explicit SymbolHdr(std::string_view path_to_proc = "") - : _runtime_symbol_lookup(path_to_proc) {} + explicit SymbolHdr(std::string_view path_to_proc = ""); + ~SymbolHdr() = default; + + SymbolHdr(const SymbolHdr &) = delete; + SymbolHdr &operator=(const SymbolHdr &) = delete; + SymbolHdr(SymbolHdr &&) noexcept = default; + SymbolHdr &operator=(SymbolHdr &&) noexcept = default; void display_stats() const { _dso_symbol_lookup.stats_display(); } void cycle() { _runtime_symbol_lookup.cycle(); } @@ -44,6 +63,9 @@ struct SymbolHdr { // The mapping table MapInfoTable _mapinfo_table; + + // String interning dictionary (persists across profile exports) + ProfilesDictionaryPtr _profiles_dictionary; }; } // namespace ddprof diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index a5433b26f..1444166b2 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -27,7 +27,6 @@ std::string_view get_or_insert_demangled_sym( void write_function(const Symbol &symbol, ddog_prof_Function *ffi_func) { ffi_func->name = to_CharSlice(symbol._demangled_name); - // We can also send symbol._symname if useful ffi_func->system_name = {.ptr = nullptr, .len = 0}; ffi_func->filename = to_CharSlice(symbol._srcpath); } diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index aa1394370..1e1b907e1 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -710,8 +710,12 @@ DDRes ddprof_worker_init(DDProfContext &ctx, DDRES_CHECK_FWD( ddprof_exporter_new(ctx.worker_ctx.user_tags, ctx.worker_ctx.exp[1])); - DDRES_CHECK_FWD(pprof_create_profile(ctx.worker_ctx.pprof[0], ctx)); - DDRES_CHECK_FWD(pprof_create_profile(ctx.worker_ctx.pprof[1], ctx)); + DDRES_CHECK_FWD(pprof_create_profile( + ctx.worker_ctx.pprof[0], ctx, + ctx.worker_ctx.us->symbol_hdr._profiles_dictionary.get())); + DDRES_CHECK_FWD(pprof_create_profile( + ctx.worker_ctx.pprof[1], ctx, + ctx.worker_ctx.us->symbol_hdr._profiles_dictionary.get())); DDRES_CHECK_FWD(worker_init_stats(&ctx.worker_ctx)); } CatchExcept2DDRes(); diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 66f7c5739..2539bf345 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -388,7 +388,8 @@ DDRes process_symbolization( } // namespace -DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx) { +DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx, + const ddog_prof_ProfilesDictionaryHandle *dict) { size_t const num_watchers = ctx.watchers.size(); ActiveIdsResult active_ids = {}; @@ -463,15 +464,16 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx) { .value = default_period, }; } - auto prof_res = ddog_prof_Profile_new( - sample_types, + + ddog_prof_Status status = ddog_prof_Profile_with_dictionary( + &pprof->_profile, dict, sample_types, pprof_values.get_num_sample_type_ids() > 0 ? &period : nullptr); - if (prof_res.tag != DDOG_PROF_PROFILE_NEW_RESULT_OK) { - ddog_Error_drop(&prof_res.err); - DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to create new profile"); + if (status.err != nullptr) { + defer { ddog_prof_Status_drop(&status); }; + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to create new profile: %s", + status.err); } - pprof->_profile = prof_res.ok; // Add relevant tags { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 01d4e6257..27cb074e4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -196,6 +196,7 @@ add_unit_test( ../src/ddog_profiling_utils.cc ../src/ddprof_cmdline_watcher.cc ../src/pprof/ddprof_pprof.cc + ../src/symbol_hdr.cc ../src/symbolizer.cc ../src/demangler/demangler.cc ../src/perf_watcher.cc @@ -209,6 +210,7 @@ add_unit_test( ../src/exporter/ddprof_exporter.cc ../src/pprof/ddprof_pprof.cc ../src/perf_watcher.cc + ../src/symbol_hdr.cc ../src/symbolizer.cc ../src/demangler/demangler.cc ../src/tags.cc @@ -266,6 +268,7 @@ add_unit_test( ../src/mapinfo_lookup.cc ../src/procutils.cc ../src/runtime_symbol_lookup.cc + ../src/symbol_hdr.cc ../src/symbol_map.cc ../src/signal_helper.cc ../src/statsd.cc @@ -313,6 +316,7 @@ set(ALLOCATION_TRACKER_UT_SRCS ../src/mapinfo_lookup.cc ../src/procutils.cc ../src/runtime_symbol_lookup.cc + ../src/symbol_hdr.cc ../src/symbol_map.cc ../src/signal_helper.cc ../src/statsd.cc diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 648b3217c..9ead0d151 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -160,7 +160,8 @@ TEST(DDProfExporter, simple) { fill_unwind_symbols(table, mapinfo_table, mock_output); DDProfContext ctx = {}; ctx.watchers.push_back(*ewatcher_from_str("sCPU")); - res = pprof_create_profile(&pprofs, ctx); + res = pprof_create_profile(&pprofs, ctx, + symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(IsDDResOK(res)); res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[0], file_infos, false, kSumPos, diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 9e7999987..3f402eabb 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -25,10 +25,13 @@ TEST(DDProfPProf, init_profiles) { DDProfPProf pprof; DDProfContext ctx = {}; ctx.watchers.push_back(*ewatcher_from_str("sCPU")); - DDRes res = pprof_create_profile(&pprof, ctx); + ddog_prof_ProfilesDictionaryHandle dict; + ddog_prof_ProfilesDictionary_new(&dict); + DDRes res = pprof_create_profile(&pprof, ctx, &dict); EXPECT_TRUE(IsDDResOK(res)); res = pprof_free_profile(&pprof); EXPECT_TRUE(IsDDResOK(res)); + ddog_prof_ProfilesDictionary_drop(&dict); } void test_pprof(DDProfPProf *pprofs) { @@ -65,7 +68,8 @@ TEST(DDProfPProf, aggregate) { bool ok = watchers_from_str("sCPU", ctx.watchers); EXPECT_TRUE(ok); - DDRes res = pprof_create_profile(&pprof, ctx); + DDRes res = pprof_create_profile(&pprof, ctx, + symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index != -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index != -1); res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, @@ -101,7 +105,8 @@ TEST(DDProfPProf, just_live) { log_watcher(&(ctx.watchers[0]), 0); log_watcher(&(ctx.watchers[1]), 1); - DDRes res = pprof_create_profile(&pprof, ctx); + DDRes res = pprof_create_profile(&pprof, ctx, + symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(IsDDResOK(res)); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index == -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index == -1); From 9209277697e24b6a00c804791e7a031155619bc0 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 09:52:53 +0100 Subject: [PATCH 3/9] String interning - use libdatadog's dictionary to intern strings --- include/base_frame_symbol_lookup.hpp | 8 +- include/common_mapinfo_lookup.hpp | 5 +- include/common_symbol_lookup.hpp | 6 +- include/ddog_profiling_utils.hpp | 38 +++- include/dso_symbol_lookup.hpp | 11 +- include/mapinfo_lookup.hpp | 4 +- include/mapinfo_table.hpp | 3 + include/pprof/ddprof_pprof.hpp | 7 + include/runtime_symbol_lookup.hpp | 15 +- include/symbol.hpp | 27 +-- include/symbol_hdr.hpp | 4 + include/symbol_helper.hpp | 17 +- src/base_frame_symbol_lookup.cc | 23 +- src/common_mapinfo_lookup.cc | 5 +- src/common_symbol_lookup.cc | 18 +- src/ddog_profiling_utils.cc | 168 ++++++++++++++- src/ddprof_worker.cc | 2 +- src/dso_symbol_lookup.cc | 39 ++-- src/mapinfo_lookup.cc | 11 +- src/pprof/ddprof_pprof.cc | 309 ++++++++++++++++++++++++--- src/runtime_symbol_lookup.cc | 57 ++--- src/symbol_hdr.cc | 35 +++ src/unwind_dwfl.cc | 12 +- src/unwind_helper.cc | 20 +- test/CMakeLists.txt | 6 +- test/ddprof_exporter-ut.cc | 3 +- test/ddprof_pprof-ut.cc | 6 +- test/runtime_symbol_lookup-ut.cc | 89 ++++++-- test/unwind_output_mock.hpp | 23 +- 29 files changed, 794 insertions(+), 177 deletions(-) create mode 100644 src/symbol_hdr.cc diff --git a/include/base_frame_symbol_lookup.hpp b/include/base_frame_symbol_lookup.hpp index 2cf7f0d76..aa7880ae3 100644 --- a/include/base_frame_symbol_lookup.hpp +++ b/include/base_frame_symbol_lookup.hpp @@ -9,12 +9,15 @@ #include "dso_symbol_lookup.hpp" #include "symbol_table.hpp" +struct ddog_prof_ProfilesDictionary; + namespace ddprof { class BaseFrameSymbolLookup { public: SymbolIdx_t get_or_insert(pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr); + DsoHdr &dso_hdr, + const ddog_prof_ProfilesDictionary *dict); // Erase symbol lookup for this pid (warning symbols still exist) void erase(pid_t pid) { @@ -27,7 +30,8 @@ class BaseFrameSymbolLookup { private: SymbolIdx_t insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr); + DsoHdr &dso_hdr, + const ddog_prof_ProfilesDictionary *dict); static const int k_nb_bin_lookups = 10; struct PidSymbol { explicit PidSymbol(SymbolIdx_t symb_idx) : _symb_idx(symb_idx) {} diff --git a/include/common_mapinfo_lookup.hpp b/include/common_mapinfo_lookup.hpp index f13b6350a..06f12391c 100644 --- a/include/common_mapinfo_lookup.hpp +++ b/include/common_mapinfo_lookup.hpp @@ -12,6 +12,8 @@ #include #include +struct ddog_prof_ProfilesDictionary; + namespace ddprof { // Generates virtual frames for common unhandled cases class CommonMapInfoLookup { @@ -21,7 +23,8 @@ class CommonMapInfoLookup { }; SymbolIdx_t get_or_insert(MappingErrors lookup_case, - MapInfoTable &mapinfo_table); + MapInfoTable &mapinfo_table, + const ddog_prof_ProfilesDictionary *dict); private: std::unordered_map _map; diff --git a/include/common_symbol_lookup.hpp b/include/common_symbol_lookup.hpp index 802ab63d5..4d1111a71 100644 --- a/include/common_symbol_lookup.hpp +++ b/include/common_symbol_lookup.hpp @@ -11,12 +11,14 @@ #include +struct ddog_prof_ProfilesDictionary; + namespace ddprof { // Generates virtual frames for common unhandled cases class CommonSymbolLookup { public: - SymbolIdx_t get_or_insert(SymbolErrors lookup_case, - SymbolTable &symbol_table); + SymbolIdx_t get_or_insert(SymbolErrors lookup_case, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); private: std::unordered_map _map; diff --git a/include/ddog_profiling_utils.hpp b/include/ddog_profiling_utils.hpp index de25b850a..0d4e79712 100644 --- a/include/ddog_profiling_utils.hpp +++ b/include/ddog_profiling_utils.hpp @@ -11,18 +11,41 @@ #include "symbol.hpp" #include "unwind_output.hpp" -#include "datadog/blazesym.h" #include "datadog/common.h" #include "datadog/profiling.h" #include +struct blaze_sym; + namespace ddprof { inline ddog_CharSlice to_CharSlice(std::string_view str) { return {.ptr = str.data(), .len = str.size()}; } -void write_function(const Symbol &symbol, ddog_prof_Function *ffi_func); +ddog_prof_StringId2 intern_string(const ddog_prof_ProfilesDictionary *dict, + std::string_view str); + +ddog_prof_FunctionId2 +intern_function_ids(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 name_id, ddog_prof_StringId2 file_id, + ddog_prof_StringId2 system_name_id); + +ddog_prof_FunctionId2 intern_function(const ddog_prof_ProfilesDictionary *dict, + std::string_view demangled_name, + std::string_view file_name, + std::string_view system_name = {}); + +ddog_prof_MappingId2 intern_mapping(const ddog_prof_ProfilesDictionary *dict, + const MapInfo &mapinfo); + +Symbol make_symbol(std::string symname, std::string demangled_name, + uint32_t lineno, std::string srcpath, + const ddog_prof_ProfilesDictionary *dict); + +void write_function(const Symbol &symbol, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Function *ffi_func); void write_function(std::string_view demangled_name, std::string_view file_name, ddog_prof_Function *ffi_func); @@ -30,13 +53,22 @@ void write_function(std::string_view demangled_name, std::string_view file_name, void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping); void write_location(const FunLoc &loc, const MapInfo &mapinfo, - const Symbol &symbol, ddog_prof_Location *ffi_location); + const Symbol &symbol, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Location *ffi_location); void write_location(ProcessAddress_t ip_or_elf_addr, std::string_view demangled_name, std::string_view file_name, uint32_t lineno, const MapInfo &mapinfo, ddog_prof_Location *ffi_location); +void write_location2(const FunLoc &loc, const MapInfo &mapinfo, + const Symbol &symbol, ddog_prof_Location2 *ffi_location); + +DDRes write_location2_from_location(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location &src, + ddog_prof_Location2 *dst); + DDRes write_location_blaze( ElfAddress_t elf_addr, ddprof::HeterogeneousLookupStringMap &demangled_names, diff --git a/include/dso_symbol_lookup.hpp b/include/dso_symbol_lookup.hpp index 2a2c45caf..4ed20793f 100644 --- a/include/dso_symbol_lookup.hpp +++ b/include/dso_symbol_lookup.hpp @@ -11,24 +11,29 @@ #include +struct ddog_prof_ProfilesDictionary; + namespace ddprof { class DsoSymbolLookup { public: SymbolIdx_t get_or_insert(FileAddress_t normalized_addr, const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict, std::string_view addr_type = "elf"); // only binary info - SymbolIdx_t get_or_insert(const Dso &dso, SymbolTable &symbol_table); + SymbolIdx_t get_or_insert(const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); void stats_display() const; private: size_t get_size() const; - SymbolIdx_t get_or_insert_unhandled_type(const Dso &dso, - SymbolTable &symbol_table); + SymbolIdx_t get_or_insert_unhandled_type( + const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); // map of maps --> the aim is to monitor usage of some maps and clear them // together // TODO : find efficient clear on symbol table before we do this diff --git a/include/mapinfo_lookup.hpp b/include/mapinfo_lookup.hpp index b20dd4c32..7a2535a79 100644 --- a/include/mapinfo_lookup.hpp +++ b/include/mapinfo_lookup.hpp @@ -15,6 +15,7 @@ #include struct Dwfl_Module; +struct ddog_prof_ProfilesDictionary; namespace ddprof { @@ -22,7 +23,8 @@ class MapInfoLookup { public: MapInfoIdx_t get_or_insert(pid_t pid, MapInfoTable &mapinfo_table, const Dso &dso, - std::optional build_id); + std::optional build_id, + const ddog_prof_ProfilesDictionary *dict); void erase(pid_t pid) { // table elements are not removed (TODO to gain memory usage) _mapinfo_pidmap.erase(pid); diff --git a/include/mapinfo_table.hpp b/include/mapinfo_table.hpp index 2157ad98b..5810066ee 100644 --- a/include/mapinfo_table.hpp +++ b/include/mapinfo_table.hpp @@ -11,6 +11,8 @@ #include #include +struct ddog_prof_Mapping2; + namespace ddprof { class MapInfo { public: @@ -26,6 +28,7 @@ class MapInfo { Offset_t _offset{0}; std::string _sopath; BuildIdStr _build_id; + ddog_prof_Mapping2 *_mapping_id{nullptr}; }; using MapInfoTable = std::vector; diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index 6da352556..e9457e37b 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -53,6 +53,13 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, EventAggregationModePos value_pos, Symbolizer *symbolizer, DDProfPProf *pprof); +DDRes pprof_aggregate_interned_sample( + const UnwindOutput *uw_output, const SymbolHdr &symbol_hdr, + const DDProfValuePack &pack, const PerfWatcher *watcher, + const FileInfoVector &file_infos, bool show_samples, + EventAggregationModePos value_pos, Symbolizer *symbolizer, + DDProfPProf *pprof); + DDRes pprof_reset(DDProfPProf *pprof); DDRes pprof_write_profile(const DDProfPProf *pprof, int fd); diff --git a/include/runtime_symbol_lookup.hpp b/include/runtime_symbol_lookup.hpp index f059b365b..1ef85e7c1 100644 --- a/include/runtime_symbol_lookup.hpp +++ b/include/runtime_symbol_lookup.hpp @@ -15,6 +15,8 @@ #include #include +struct ddog_prof_ProfilesDictionary; + namespace ddprof { class RuntimeSymbolLookup { @@ -30,10 +32,12 @@ class RuntimeSymbolLookup { SymbolIdx_t get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict, std::string_view jitdump_path); SymbolIdx_t get_or_insert(pid_t pid, ProcessAddress_t pc, - SymbolTable &symbol_table); + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); void erase(pid_t pid) { _pid_map.erase(pid); } @@ -77,10 +81,12 @@ class RuntimeSymbolLookup { // Symbols are cached with the process's address. // DDRes fill_from_jitdump(std::string_view jitdump_path, pid_t pid, - SymbolMap &symbol_map, SymbolTable &symbol_table); + SymbolMap &symbol_map, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); DDRes fill_from_perfmap(int pid, SymbolMap &symbol_map, - SymbolTable &symbol_table); + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); UniqueFile perfmaps_open(int pid, const char *path_to_perfmap); @@ -111,7 +117,8 @@ class RuntimeSymbolLookup { static bool insert_or_replace(std::string_view symbol, ProcessAddress_t address, Offset_t code_size, SymbolMap &symbol_map, - SymbolTable &symbol_table); + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); static constexpr std::array _ignored_symbols_start = {{ diff --git a/include/symbol.hpp b/include/symbol.hpp index 62cd32ad6..84245a3cf 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -7,7 +7,10 @@ #include "ddprof_defs.hpp" -#include +struct ddog_prof_Function2; +struct ddog_prof_StringHeader; + +struct ddog_prof_Function2; // Symbol // Information relating to a given location @@ -17,20 +20,18 @@ class Symbol { public: Symbol() : _lineno(0) {} - Symbol(std::string symname, std::string demangled_name, uint32_t lineno, - std::string srcpath) - : _symname(std::move(symname)), - _demangled_name(std::move(demangled_name)), _lineno(lineno), - _srcpath(std::move(srcpath)) {} - - // OUTPUT OF ADDRINFO - std::string _symname; - - // DEMANGLING CACHE - std::string _demangled_name; + Symbol(ddog_prof_StringHeader *name_id, ddog_prof_StringHeader *file_id, + uint32_t lineno, ddog_prof_Function2 *function_id) + : _name_id(name_id), + _file_id(file_id), + _lineno(lineno), + _function_id(function_id) {} // OUTPUT OF LINE INFO + ddog_prof_StringHeader *_name_id{nullptr}; + ddog_prof_StringHeader *_file_id{nullptr}; uint32_t _lineno; - std::string _srcpath; + + ddog_prof_Function2 *_function_id{nullptr}; }; } // namespace ddprof diff --git a/include/symbol_hdr.hpp b/include/symbol_hdr.hpp index be56930e6..066aa7265 100644 --- a/include/symbol_hdr.hpp +++ b/include/symbol_hdr.hpp @@ -42,6 +42,10 @@ struct SymbolHdr { void display_stats() const { _dso_symbol_lookup.stats_display(); } void cycle() { _runtime_symbol_lookup.cycle(); } + const ddog_prof_ProfilesDictionary *profiles_dictionary() const { + return _profiles_dictionary ? *_profiles_dictionary : nullptr; + } + void clear(pid_t pid) { _base_frame_symbol_lookup.erase(pid); // mappings are only relevant in the context of a given pid. diff --git a/include/symbol_helper.hpp b/include/symbol_helper.hpp index 292939c83..38e706238 100644 --- a/include/symbol_helper.hpp +++ b/include/symbol_helper.hpp @@ -6,6 +6,7 @@ #include "unwind_state.hpp" #include +#include #include #include #include @@ -16,6 +17,8 @@ std::vector collect_symbols(UnwindState &state, blaze_symbolizer *symbolizer) { std::vector symbols; auto &symbol_table = state.symbol_hdr._symbol_table; + const ddog_prof_ProfilesDictionary *dict = + state.symbol_hdr.profiles_dictionary(); for (size_t iloc = 0; iloc < state.output.locs.size(); ++iloc) { std::string demangled_name; if (state.output.locs[iloc].symbol_idx == k_symbol_idx_null) { @@ -40,7 +43,19 @@ std::vector collect_symbols(UnwindState &state, } else { // Lookup the symbol from the symbol table. auto &symbol = symbol_table[state.output.locs[iloc].symbol_idx]; - demangled_name = symbol._demangled_name; + if (!dict || !symbol._name_id) { + demangled_name = "unknown"; + } else { + ddog_CharSlice slice{nullptr, 0}; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_get_str(&slice, dict, symbol._name_id); + if (status.err != nullptr) { + ddog_prof_Status_drop(&status); + demangled_name = "unknown"; + } else { + demangled_name = std::string(slice.ptr, slice.len); + } + } } symbols.push_back(demangled_name); } diff --git a/src/base_frame_symbol_lookup.cc b/src/base_frame_symbol_lookup.cc index 4be393be2..843270b48 100644 --- a/src/base_frame_symbol_lookup.cc +++ b/src/base_frame_symbol_lookup.cc @@ -5,6 +5,7 @@ #include "base_frame_symbol_lookup.hpp" +#include "ddog_profiling_utils.hpp" #include "dso_type.hpp" #include "logger.hpp" @@ -14,23 +15,25 @@ namespace ddprof { namespace { -Symbol symbol_from_pid(pid_t pid) { - return {{}, {}, 0, absl::StrCat("pid_", pid)}; +Symbol symbol_from_pid(pid_t pid, const ddog_prof_ProfilesDictionary *dict) { + return make_symbol(std::string(), std::string(), 0, + absl::StrCat("pid_", pid), dict); } } // namespace SymbolIdx_t BaseFrameSymbolLookup::insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr) { + DsoHdr &dso_hdr, + const ddog_prof_ProfilesDictionary *dict) { SymbolIdx_t symbol_idx = -1; DsoHdr::DsoFindRes const find_res = dso_hdr.dso_find_first_std_executable(pid); if (find_res.second && has_relevant_path(find_res.first->second._type)) { // todo : how to tie lifetime of DSO to this ? - symbol_idx = - dso_symbol_lookup.get_or_insert(find_res.first->second, symbol_table); + symbol_idx = dso_symbol_lookup.get_or_insert(find_res.first->second, + symbol_table, dict); _bin_map.insert({pid, symbol_idx}); const std::filesystem::path path(find_res.first->second._filename); const std::string base_name = path.filename().string(); @@ -42,7 +45,8 @@ BaseFrameSymbolLookup::insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, const std::filesystem::path path(exe_name); const std::string base_name = path.filename().string(); symbol_idx = symbol_table.size(); - symbol_table.emplace_back(Symbol({}, base_name, 0, exe_name)); + symbol_table.emplace_back( + make_symbol(std::string(), base_name, 0, exe_name, dict)); _bin_map.insert({pid, symbol_idx}); _exe_name_map.insert({pid, base_name}); } @@ -53,7 +57,8 @@ BaseFrameSymbolLookup::insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, SymbolIdx_t BaseFrameSymbolLookup::get_or_insert(pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr) { + DsoHdr &dso_hdr, + const ddog_prof_ProfilesDictionary *dict) { auto const it_bin = _bin_map.find(pid); auto const it_pid = _pid_map.find(pid); @@ -65,7 +70,7 @@ BaseFrameSymbolLookup::get_or_insert(pid_t pid, SymbolTable &symbol_table, if (it_pid == _pid_map.end() || ++it_pid->second._nb_bin_lookups < k_nb_bin_lookups) { symbol_idx = - insert_bin_symbol(pid, symbol_table, dso_symbol_lookup, dso_hdr); + insert_bin_symbol(pid, symbol_table, dso_symbol_lookup, dso_hdr, dict); } } if (symbol_idx == -1 && it_pid != _pid_map.end()) { @@ -75,7 +80,7 @@ BaseFrameSymbolLookup::get_or_insert(pid_t pid, SymbolTable &symbol_table, // First time we fail on this pid : insert a pid info in symbol table if (symbol_idx == -1) { symbol_idx = symbol_table.size(); - symbol_table.push_back(symbol_from_pid(pid)); + symbol_table.push_back(symbol_from_pid(pid, dict)); _pid_map.emplace(pid, PidSymbol(symbol_idx)); } return symbol_idx; diff --git a/src/common_mapinfo_lookup.cc b/src/common_mapinfo_lookup.cc index bacaa7612..c7fde1f67 100644 --- a/src/common_mapinfo_lookup.cc +++ b/src/common_mapinfo_lookup.cc @@ -5,6 +5,8 @@ #include "common_mapinfo_lookup.hpp" +#include "ddog_profiling_utils.hpp" + namespace ddprof { namespace { MapInfo mapinfo_from_common(CommonMapInfoLookup::MappingErrors lookup_case) { @@ -20,7 +22,7 @@ MapInfo mapinfo_from_common(CommonMapInfoLookup::MappingErrors lookup_case) { MapInfoIdx_t CommonMapInfoLookup::get_or_insert( CommonMapInfoLookup::MappingErrors lookup_case, - MapInfoTable &mapinfo_table) { + MapInfoTable &mapinfo_table, const ddog_prof_ProfilesDictionary *dict) { auto const it = _map.find(lookup_case); MapInfoIdx_t res; if (it != _map.end()) { @@ -28,6 +30,7 @@ MapInfoIdx_t CommonMapInfoLookup::get_or_insert( } else { // insert things res = mapinfo_table.size(); mapinfo_table.push_back(mapinfo_from_common(lookup_case)); + mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); _map.insert({lookup_case, res}); } return res; diff --git a/src/common_symbol_lookup.cc b/src/common_symbol_lookup.cc index 301b8384c..a7129c59d 100644 --- a/src/common_symbol_lookup.cc +++ b/src/common_symbol_lookup.cc @@ -5,23 +5,29 @@ #include "common_symbol_lookup.hpp" +#include "ddog_profiling_utils.hpp" + namespace ddprof { namespace { -Symbol symbol_from_common(SymbolErrors lookup_case) { - return {std::string(), std::string{k_common_frame_names[lookup_case]}, 0, - std::string()}; +Symbol symbol_from_common(SymbolErrors lookup_case, + const ddog_prof_ProfilesDictionary *dict) { + return make_symbol(std::string(), + std::string{k_common_frame_names[lookup_case]}, 0, + std::string(), dict); } } // namespace -SymbolIdx_t CommonSymbolLookup::get_or_insert(SymbolErrors lookup_case, - SymbolTable &symbol_table) { +SymbolIdx_t +CommonSymbolLookup::get_or_insert(SymbolErrors lookup_case, + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { auto const it = _map.find(lookup_case); SymbolIdx_t symbol_idx; if (it != _map.end()) { symbol_idx = it->second; } else { // insert things symbol_idx = symbol_table.size(); - symbol_table.push_back(symbol_from_common(lookup_case)); + symbol_table.push_back(symbol_from_common(lookup_case, dict)); _map.insert({lookup_case, symbol_idx}); } return symbol_idx; diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index 1444166b2..df846a40e 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -7,6 +7,9 @@ #include "ddres.hpp" #include "demangler/demangler.hpp" +#include "logger.hpp" + +#include "datadog/blazesym.h" namespace ddprof { namespace { @@ -25,10 +28,113 @@ std::string_view get_or_insert_demangled_sym( } } // namespace -void write_function(const Symbol &symbol, ddog_prof_Function *ffi_func) { - ffi_func->name = to_CharSlice(symbol._demangled_name); +ddog_prof_StringId2 intern_string(const ddog_prof_ProfilesDictionary *dict, + std::string_view str) { + if (!dict || str.empty()) { + return DDOG_PROF_STRINGID2_EMPTY; + } + ddog_prof_StringId2 string_id = nullptr; + ddog_prof_Status status = ddog_prof_ProfilesDictionary_insert_str( + &string_id, dict, to_CharSlice(str), DDOG_PROF_UTF8_OPTION_ASSUME); + if (status.err != nullptr) { + LG_WRN("Failed to intern string: %s", status.err); + ddog_prof_Status_drop(&status); + return DDOG_PROF_STRINGID2_EMPTY; + } + return string_id; +} + +ddog_prof_FunctionId2 +intern_function_ids(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 name_id, ddog_prof_StringId2 file_id, + ddog_prof_StringId2 system_name_id) { + if (!dict) { + return nullptr; + } + ddog_prof_Function2 function = { + .name = name_id, + .system_name = system_name_id, + .file_name = file_id, + }; + ddog_prof_FunctionId2 function_id = nullptr; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_insert_function(&function_id, dict, + &function); + if (status.err != nullptr) { + LG_WRN("Failed to intern function: %s", status.err); + ddog_prof_Status_drop(&status); + return nullptr; + } + return function_id; +} + +ddog_prof_FunctionId2 intern_function(const ddog_prof_ProfilesDictionary *dict, + std::string_view demangled_name, + std::string_view file_name, + std::string_view system_name) { + ddog_prof_StringId2 name_id = intern_string(dict, demangled_name); + ddog_prof_StringId2 file_id = intern_string(dict, file_name); + ddog_prof_StringId2 system_name_id = intern_string(dict, system_name); + return intern_function_ids(dict, name_id, file_id, system_name_id); +} + +ddog_prof_MappingId2 intern_mapping(const ddog_prof_ProfilesDictionary *dict, + const MapInfo &mapinfo) { + if (!dict) { + return nullptr; + } + ddog_prof_Mapping2 mapping = { + .memory_start = mapinfo._low_addr, + .memory_limit = mapinfo._high_addr, + .file_offset = mapinfo._offset, + .filename = intern_string(dict, mapinfo._sopath), + .build_id = intern_string(dict, mapinfo._build_id), + }; + ddog_prof_MappingId2 mapping_id = nullptr; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_insert_mapping(&mapping_id, dict, &mapping); + if (status.err != nullptr) { + LG_WRN("Failed to intern mapping: %s", status.err); + ddog_prof_Status_drop(&status); + return nullptr; + } + return mapping_id; +} + +Symbol make_symbol(std::string symname, std::string demangled_name, + uint32_t lineno, std::string srcpath, + const ddog_prof_ProfilesDictionary *dict) { + [[maybe_unused]] std::string ignored_symname = std::move(symname); + ddog_prof_StringId2 name_id = intern_string(dict, demangled_name); + ddog_prof_StringId2 file_id = intern_string(dict, srcpath); + ddog_prof_FunctionId2 function_id = + intern_function_ids(dict, name_id, file_id, DDOG_PROF_STRINGID2_EMPTY); + return Symbol(name_id, file_id, lineno, function_id); +} + +namespace { +ddog_CharSlice get_dict_string(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 string_id) { + if (!dict || !string_id) { + return {.ptr = nullptr, .len = 0}; + } + ddog_CharSlice result{nullptr, 0}; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_get_str(&result, dict, string_id); + if (status.err != nullptr) { + ddog_prof_Status_drop(&status); + return {.ptr = nullptr, .len = 0}; + } + return result; +} +} // namespace + +void write_function(const Symbol &symbol, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Function *ffi_func) { + ffi_func->name = get_dict_string(dict, symbol._name_id); ffi_func->system_name = {.ptr = nullptr, .len = 0}; - ffi_func->filename = to_CharSlice(symbol._srcpath); + ffi_func->filename = get_dict_string(dict, symbol._file_id); } void write_function(std::string_view demangled_name, std::string_view file_name, @@ -47,9 +153,11 @@ void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping) { } void write_location(const FunLoc &loc, const MapInfo &mapinfo, - const Symbol &symbol, ddog_prof_Location *ffi_location) { + const Symbol &symbol, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Location *ffi_location) { write_mapping(mapinfo, &ffi_location->mapping); - write_function(symbol, &ffi_location->function); + write_function(symbol, dict, &ffi_location->function); ffi_location->address = loc.elf_addr; ffi_location->line = symbol._lineno; } @@ -64,6 +172,56 @@ void write_location(ProcessAddress_t ip_or_elf_addr, ffi_location->line = lineno; } +void write_location2(const FunLoc &loc, const MapInfo &mapinfo, + const Symbol &symbol, ddog_prof_Location2 *ffi_location) { + ffi_location->mapping = mapinfo._mapping_id; + ffi_location->function = symbol._function_id; + ffi_location->address = loc.elf_addr; + ffi_location->line = symbol._lineno; +} + +DDRes write_location2_from_location(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location &src, + ddog_prof_Location2 *dst) { + if (!dict) { + return ddres_error(DD_WHAT_PPROF); + } + auto to_sv = [](ddog_CharSlice slice) -> std::string_view { + if (!slice.ptr || slice.len == 0) { + return {}; + } + return {slice.ptr, slice.len}; + }; + const std::string_view fn_name = to_sv(src.function.name); + const std::string_view fn_file = to_sv(src.function.filename); + const std::string_view fn_system = to_sv(src.function.system_name); + ddog_prof_FunctionId2 function_id = + intern_function(dict, fn_name, fn_file, fn_system); + if (function_id == nullptr) { + return ddres_error(DD_WHAT_PPROF); + } + ddog_prof_Mapping2 mapping = { + .memory_start = src.mapping.memory_start, + .memory_limit = src.mapping.memory_limit, + .file_offset = src.mapping.file_offset, + .filename = intern_string(dict, to_sv(src.mapping.filename)), + .build_id = intern_string(dict, to_sv(src.mapping.build_id)), + }; + ddog_prof_MappingId2 mapping_id = nullptr; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_insert_mapping(&mapping_id, dict, &mapping); + if (status.err != nullptr) { + LG_WRN("Failed to intern mapping: %s", status.err); + ddog_prof_Status_drop(&status); + return ddres_error(DD_WHAT_PPROF); + } + dst->mapping = mapping_id; + dst->function = function_id; + dst->address = src.address; + dst->line = src.line; + return {}; +} + DDRes write_location_blaze( ElfAddress_t elf_addr, ddprof::HeterogeneousLookupStringMap &demangled_names, diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 1e1b907e1..6906a14e9 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -252,7 +252,7 @@ DDRes aggregate_livealloc_stack( alloc_info.second._value, static_cast(std::max(0, alloc_info.second._count)), 0}; - DDRES_CHECK_FWD(pprof_aggregate( + DDRES_CHECK_FWD(pprof_aggregate_interned_sample( &alloc_info.first, symbol_hdr, pack, watcher, ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), ctx.params.show_samples, kLiveSumPos, ctx.worker_ctx.symbolizer, pprof)); diff --git a/src/dso_symbol_lookup.cc b/src/dso_symbol_lookup.cc index 9a957c913..9c0f4e953 100644 --- a/src/dso_symbol_lookup.cc +++ b/src/dso_symbol_lookup.cc @@ -5,6 +5,7 @@ #include "dso_symbol_lookup.hpp" +#include "ddog_profiling_utils.hpp" #include "ddprof_file_info-i.hpp" #include "dso_type.hpp" #include "logger.hpp" @@ -17,23 +18,26 @@ namespace ddprof { namespace { -Symbol symbol_from_unhandled_dso(const Dso &dso) { - return {std::string(), std::string(), 0, dso_type_str(dso._type)}; +Symbol symbol_from_unhandled_dso(const Dso &dso, + const ddog_prof_ProfilesDictionary *dict) { + return make_symbol(std::string(), std::string(), 0, + dso_type_str(dso._type), dict); } Symbol symbol_from_dso(ElfAddress_t normalized_addr, const Dso &dso, - std::string_view addr_type) { + std::string_view addr_type, + const ddog_prof_ProfilesDictionary *dict) { // address that means something for our user (addr) std::string const dso_dbg_str = normalized_addr ? absl::StrFormat("[%#x:%s]", normalized_addr, addr_type) : std::filesystem::path(dso.format_filename()).filename().string(); - return {dso_dbg_str, dso_dbg_str, 0, dso.format_filename()}; + return make_symbol(dso_dbg_str, dso_dbg_str, 0, dso.format_filename(), dict); } } // namespace -SymbolIdx_t -DsoSymbolLookup::get_or_insert_unhandled_type(const Dso &dso, - SymbolTable &symbol_table) { +SymbolIdx_t DsoSymbolLookup::get_or_insert_unhandled_type( + const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { auto const it = _map_unhandled_dso.find(dso._type); SymbolIdx_t symbol_idx; @@ -41,20 +45,19 @@ DsoSymbolLookup::get_or_insert_unhandled_type(const Dso &dso, symbol_idx = it->second; } else { symbol_idx = symbol_table.size(); - symbol_table.push_back(symbol_from_unhandled_dso(dso)); + symbol_table.push_back(symbol_from_unhandled_dso(dso, dict)); _map_unhandled_dso.insert({dso._type, symbol_idx}); } return symbol_idx; } -SymbolIdx_t DsoSymbolLookup::get_or_insert(FileAddress_t normalized_addr, - const Dso &dso, - SymbolTable &symbol_table, - std::string_view addr_type) { +SymbolIdx_t DsoSymbolLookup::get_or_insert( + FileAddress_t normalized_addr, const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict, std::string_view addr_type) { // Only add address information for relevant dso types if (!has_relevant_path(dso._type) && dso._type != DsoType::kVdso && dso._type != DsoType::kVsysCall) { - return get_or_insert_unhandled_type(dso, symbol_table); + return get_or_insert_unhandled_type(dso, symbol_table, dict); } // Note: using file ID could be more generic AddressMap &addr_lookup = _map_dso_path[dso._filename]; @@ -64,15 +67,17 @@ SymbolIdx_t DsoSymbolLookup::get_or_insert(FileAddress_t normalized_addr, symbol_idx = it->second; } else { // insert things symbol_idx = symbol_table.size(); - symbol_table.push_back(symbol_from_dso(normalized_addr, dso, addr_type)); + symbol_table.push_back( + symbol_from_dso(normalized_addr, dso, addr_type, dict)); addr_lookup.insert({normalized_addr, symbol_idx}); } return symbol_idx; } -SymbolIdx_t DsoSymbolLookup::get_or_insert(const Dso &dso, - SymbolTable &symbol_table) { - return get_or_insert(0, dso, symbol_table); +SymbolIdx_t +DsoSymbolLookup::get_or_insert(const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { + return get_or_insert(0, dso, symbol_table, dict); } void DsoSymbolLookup::stats_display() const { diff --git a/src/mapinfo_lookup.cc b/src/mapinfo_lookup.cc index 1e7dd3e92..ee7093c2f 100644 --- a/src/mapinfo_lookup.cc +++ b/src/mapinfo_lookup.cc @@ -5,14 +5,16 @@ #include "mapinfo_lookup.hpp" +#include "ddog_profiling_utils.hpp" #include "ddres.hpp" namespace ddprof { -MapInfoIdx_t MapInfoLookup::get_or_insert(pid_t pid, - MapInfoTable &mapinfo_table, - const Dso &dso, - std::optional build_id) { +MapInfoIdx_t +MapInfoLookup::get_or_insert(pid_t pid, MapInfoTable &mapinfo_table, + const Dso &dso, + std::optional build_id, + const ddog_prof_ProfilesDictionary *dict) { MapInfoAddrMap &addr_map = _mapinfo_pidmap[pid]; auto it = addr_map.find(dso._start); @@ -25,6 +27,7 @@ MapInfoIdx_t MapInfoLookup::get_or_insert(pid_t pid, mapinfo_table.emplace_back(dso._start, dso._end, dso._offset, std::move(sname_str), build_id ? *build_id : BuildIdStr{}); + mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); addr_map.emplace(dso._start, map_info_idx); return map_info_idx; } diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 2539bf345..dd4b88f95 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -33,6 +33,13 @@ namespace ddprof { namespace { constexpr size_t k_max_pprof_labels{8}; +constexpr std::string_view k_container_id_label = "container_id"sv; +constexpr std::string_view k_process_id_label = "process_id"sv; +constexpr std::string_view k_process_name_label = "process_name"sv; +constexpr std::string_view k_thread_id_label = "thread id"sv; +constexpr std::string_view k_thread_name_label = "thread_name"sv; +constexpr std::string_view k_tracepoint_label = "tracepoint_type"sv; + constexpr int k_max_value_types = DDPROF_PWT_LENGTH * static_cast(kNbEventAggregationModes); @@ -41,6 +48,163 @@ struct ActiveIdsResult { PerfWatcher *default_watcher = nullptr; }; +struct LabelKeyIds { + const ddog_prof_ProfilesDictionary *dict = nullptr; + ddog_prof_StringId2 container_id = DDOG_PROF_STRINGID2_EMPTY; + ddog_prof_StringId2 process_id = DDOG_PROF_STRINGID2_EMPTY; + ddog_prof_StringId2 process_name = DDOG_PROF_STRINGID2_EMPTY; + ddog_prof_StringId2 thread_id = DDOG_PROF_STRINGID2_EMPTY; + ddog_prof_StringId2 thread_name = DDOG_PROF_STRINGID2_EMPTY; + ddog_prof_StringId2 tracepoint_type = DDOG_PROF_STRINGID2_EMPTY; +}; + +const LabelKeyIds & +get_label_key_ids(const ddog_prof_ProfilesDictionary *dict) { + static LabelKeyIds ids{}; + if (ids.dict != dict) { + ids = {}; + ids.dict = dict; + ids.container_id = intern_string(dict, k_container_id_label); + ids.process_id = intern_string(dict, k_process_id_label); + ids.process_name = intern_string(dict, k_process_name_label); + ids.thread_id = intern_string(dict, k_thread_id_label); + ids.thread_name = intern_string(dict, k_thread_name_label); + ids.tracepoint_type = intern_string(dict, k_tracepoint_label); + } + return ids; +} + +std::string_view pid_str(pid_t pid, + std::unordered_map &pid_strs); + +std::string_view to_string_view(ddog_CharSlice slice) { + if (!slice.ptr || slice.len == 0) { + return {}; + } + return {slice.ptr, slice.len}; +} + +ddog_prof_StringId intern_profile_string(ddog_prof_Profile *profile, + std::string_view value) { + if (value.empty()) { + return ddog_prof_Profile_interned_empty_string(); + } + ddog_prof_StringId_Result res = + ddog_prof_Profile_intern_string(profile, to_CharSlice(value)); + if (res.tag != DDOG_PROF_STRING_ID_RESULT_OK_GENERATIONAL_ID_STRING_ID) { + ddog_Error_drop(&res.err); + return ddog_prof_Profile_interned_empty_string(); + } + return res.ok; +} + +DDRes intern_profile_labelset( + ddog_prof_Profile *profile, const UnwindOutput &uw_output, + const PerfWatcher &watcher, + std::unordered_map &pid_strs, + ddog_prof_LabelSetId *labelset_id) { + std::array labels{}; + size_t labels_num = 0; + + auto push_label = [&](std::string_view key, std::string_view value) -> DDRes { + ddog_prof_StringId key_id = intern_profile_string(profile, key); + ddog_prof_StringId value_id = intern_profile_string(profile, value); + ddog_prof_LabelId_Result label_res = + ddog_prof_Profile_intern_label_str(profile, key_id, value_id); + if (label_res.tag != DDOG_PROF_LABEL_ID_RESULT_OK_GENERATIONAL_ID_LABEL_ID) { + ddog_Error_drop(&label_res.err); + return ddres_error(DD_WHAT_PPROF); + } + labels[labels_num++] = label_res.ok; + return {}; + }; + + if (!uw_output.container_id.empty()) { + DDRES_CHECK_FWD( + push_label(k_container_id_label, uw_output.container_id)); + } + + if (!watcher.suppress_pid || !watcher.suppress_tid) { + DDRES_CHECK_FWD(push_label(k_process_id_label, + pid_str(uw_output.pid, pid_strs))); + } + if (!watcher.suppress_tid) { + DDRES_CHECK_FWD(push_label(k_thread_id_label, + pid_str(uw_output.tid, pid_strs))); + } + if (watcher_has_tracepoint(&watcher)) { + if (!watcher.tracepoint_label.empty()) { + DDRES_CHECK_FWD( + push_label(k_tracepoint_label, watcher.tracepoint_label)); + } else { + DDRES_CHECK_FWD( + push_label(k_tracepoint_label, watcher.tracepoint_event)); + } + } + if (!uw_output.exe_name.empty()) { + DDRES_CHECK_FWD(push_label(k_process_name_label, uw_output.exe_name)); + } + if (!uw_output.thread_name.empty()) { + DDRES_CHECK_FWD(push_label(k_thread_name_label, uw_output.thread_name)); + } + + DDPROF_DCHECK_FATAL(labels_num <= labels.size(), + "pprof_aggregate - label buffer exceeded"); + ddog_prof_LabelSetId_Result labelset_res = ddog_prof_Profile_intern_labelset( + profile, + {.ptr = labels.data(), .len = static_cast(labels_num)}); + if (labelset_res.tag != + DDOG_PROF_LABEL_SET_ID_RESULT_OK_GENERATIONAL_ID_LABEL_SET_ID) { + ddog_Error_drop(&labelset_res.err); + return ddres_error(DD_WHAT_PPROF); + } + *labelset_id = labelset_res.ok; + return {}; +} + +DDRes intern_profile_location_id(ddog_prof_Profile *profile, + const ddog_prof_Location &location, + ddog_prof_LocationId *location_id) { + ddog_prof_StringId fn_name = + intern_profile_string(profile, to_string_view(location.function.name)); + ddog_prof_StringId fn_system = intern_profile_string( + profile, to_string_view(location.function.system_name)); + ddog_prof_StringId fn_file = + intern_profile_string(profile, to_string_view(location.function.filename)); + ddog_prof_FunctionId_Result function_res = + ddog_prof_Profile_intern_function(profile, fn_name, fn_system, fn_file); + if (function_res.tag != + DDOG_PROF_FUNCTION_ID_RESULT_OK_GENERATIONAL_ID_FUNCTION_ID) { + ddog_Error_drop(&function_res.err); + return ddres_error(DD_WHAT_PPROF); + } + + ddog_prof_StringId map_filename = + intern_profile_string(profile, to_string_view(location.mapping.filename)); + ddog_prof_StringId map_build_id = + intern_profile_string(profile, to_string_view(location.mapping.build_id)); + ddog_prof_MappingId_Result mapping_res = ddog_prof_Profile_intern_mapping( + profile, location.mapping.memory_start, location.mapping.memory_limit, + location.mapping.file_offset, map_filename, map_build_id); + if (mapping_res.tag != + DDOG_PROF_MAPPING_ID_RESULT_OK_GENERATIONAL_ID_MAPPING_ID) { + ddog_Error_drop(&mapping_res.err); + return ddres_error(DD_WHAT_PPROF); + } + + ddog_prof_LocationId_Result loc_res = + ddog_prof_Profile_intern_location_with_mapping_id( + profile, mapping_res.ok, function_res.ok, location.address, + location.line); + if (loc_res.tag != + DDOG_PROF_LOCATION_ID_RESULT_OK_GENERATIONAL_ID_LOCATION_ID) { + ddog_Error_drop(&loc_res.err); + return ddres_error(DD_WHAT_PPROF); + } + *location_id = loc_res.ok; + return {}; +} + std::string_view pid_str(pid_t pid, std::unordered_map &pid_strs) { auto it = pid_strs.find(pid); @@ -236,19 +400,13 @@ ProfValueTypes compute_pprof_values(const ActiveIdsResult &active_ids) { } size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, + const ddog_prof_ProfilesDictionary *dict, std::unordered_map &pid_strs, - std::span labels) { - constexpr std::string_view k_container_id_label = "container_id"sv; - constexpr std::string_view k_process_id_label = "process_id"sv; - constexpr std::string_view k_process_name_label = "process_name"sv; - // This naming has an impact on backend side (hence the inconsistency with - // process_id) - constexpr std::string_view k_thread_id_label = "thread id"sv; - constexpr std::string_view k_thread_name_label = "thread_name"sv; - constexpr std::string_view k_tracepoint_label = "tracepoint_type"sv; + std::span labels) { + const LabelKeyIds &label_ids = get_label_key_ids(dict); size_t labels_num = 0; if (!uw_output.container_id.empty()) { - labels[labels_num].key = to_CharSlice(k_container_id_label); + labels[labels_num].key = label_ids.container_id; labels[labels_num].str = to_CharSlice(uw_output.container_id); ++labels_num; } @@ -257,17 +415,17 @@ size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, // (TID;PID) tuples, so except for symbol table overhead it doesn't matter // much if TID implies PID for clarity. if (!watcher.suppress_pid || !watcher.suppress_tid) { - labels[labels_num].key = to_CharSlice(k_process_id_label); + labels[labels_num].key = label_ids.process_id; labels[labels_num].str = to_CharSlice(pid_str(uw_output.pid, pid_strs)); ++labels_num; } if (!watcher.suppress_tid) { - labels[labels_num].key = to_CharSlice(k_thread_id_label); + labels[labels_num].key = label_ids.thread_id; labels[labels_num].str = to_CharSlice(pid_str(uw_output.tid, pid_strs)); ++labels_num; } if (watcher_has_tracepoint(&watcher)) { - labels[labels_num].key = to_CharSlice(k_tracepoint_label); + labels[labels_num].key = label_ids.tracepoint_type; // If the label is given, use that as the tracepoint type. Otherwise, // default to the event name if (!watcher.tracepoint_label.empty()) { @@ -278,12 +436,12 @@ size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, ++labels_num; } if (!uw_output.exe_name.empty()) { - labels[labels_num].key = to_CharSlice(k_process_name_label); + labels[labels_num].key = label_ids.process_name; labels[labels_num].str = to_CharSlice(uw_output.exe_name); ++labels_num; } if (!uw_output.thread_name.empty()) { - labels[labels_num].key = to_CharSlice(k_thread_name_label); + labels[labels_num].key = label_ids.thread_name; labels[labels_num].str = to_CharSlice(uw_output.thread_name); ++labels_num; } @@ -308,7 +466,9 @@ std::span adjust_locations(const PerfWatcher *watcher, DDRes process_symbolization( std::span locs, const SymbolHdr &symbol_hdr, const FileInfoVector &file_infos, Symbolizer *symbolizer, + const ddog_prof_ProfilesDictionary *dict, std::array &locations_buff, + std::array &locations2_buff, Symbolizer::BlazeResultsWrapper &session_results, unsigned &write_index) { unsigned index = 0; @@ -325,7 +485,12 @@ DDRes process_symbolization( const FunLoc &loc = locs[index]; write_location(loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], - &locations_buff[write_index++]); + symbol_hdr.profiles_dictionary(), + &locations_buff[write_index]); + write_location2(loc, mapinfo_table[loc.map_info_idx], + symbol_table[loc.symbol_idx], + &locations2_buff[write_index]); + ++write_index; ++index; continue; } @@ -352,11 +517,17 @@ DDRes process_symbolization( } } // Perform symbolization for all collected addresses + const unsigned start_write_index = write_index; const DDRes res = symbolizer->symbolize_pprof( elf_addresses, file_id, current_file_path, mapinfo_table[locs[start_index].map_info_idx], std::span{locations_buff}, write_index, session_results); + for (unsigned i = start_write_index; i < write_index; ++i) { + DDRES_CHECK_FWD( + write_location2_from_location(dict, locations_buff[i], + &locations2_buff[i])); + } if (IsDDResNotOK(res)) { if (IsDDResFatal(res)) { DDRES_RETURN_ERROR_LOG(DD_WHAT_SYMBOLIZER, "Failed to symbolize pprof"); @@ -372,7 +543,11 @@ DDRes process_symbolization( std::span{locations_buff.data(), write_index})) { // Write a common frame to indicate an incomplete stack write_location(0, k_common_frame_names[incomplete_stack], {}, 0, {}, - &locations_buff[write_index++]); + &locations_buff[write_index]); + DDRES_CHECK_FWD( + write_location2_from_location(dict, locations_buff[write_index], + &locations2_buff[write_index])); + ++write_index; } // Write the binary frame if it exists and is valid @@ -381,7 +556,12 @@ DDRes process_symbolization( const FunLoc &loc = locs.back(); write_location(loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], - &locations_buff[write_index++]); + symbol_hdr.profiles_dictionary(), + &locations_buff[write_index]); + write_location2(loc, mapinfo_table[loc.map_info_idx], + symbol_table[loc.symbol_idx], + &locations2_buff[write_index]); + ++write_index; } return {}; } @@ -530,24 +710,27 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, } std::array locations_buff; + std::array locations2_buff; std::span locs{uw_output->locs}; locs = adjust_locations(watcher, locs); // Blaze results should remain alive until we aggregate the pprof data Symbolizer::BlazeResultsWrapper session_results; unsigned write_index = 0; - DDRES_CHECK_FWD(process_symbolization(locs, symbol_hdr, file_infos, - symbolizer, locations_buff, - session_results, write_index)); - std::array labels{}; + DDRES_CHECK_FWD(process_symbolization( + locs, symbol_hdr, file_infos, symbolizer, + symbol_hdr.profiles_dictionary(), locations_buff, locations2_buff, + session_results, write_index)); + std::array labels{}; // Create the labels for the sample. Two samples are the same only when // their locations _and_ all labels are identical, so we admit a very limited // number of labels at present - const size_t labels_num = - prepare_labels(*uw_output, *watcher, pprof->_pid_str, std::span{labels}); + const size_t labels_num = prepare_labels( + *uw_output, *watcher, symbol_hdr.profiles_dictionary(), pprof->_pid_str, + std::span{labels}); - ddog_prof_Sample const sample = { - .locations = {.ptr = locations_buff.data(), .len = write_index}, + ddog_prof_Sample2 const sample = { + .locations = {.ptr = locations2_buff.data(), .len = write_index}, .values = {.ptr = values, .len = pprof->_nb_values}, .labels = {.ptr = labels.data(), .len = labels_num}, }; @@ -557,12 +740,11 @@ DDRes pprof_aggregate(const UnwindOutput *uw_output, pack.value, uw_output->pid, uw_output->tid, value_pos, *watcher); } - auto res = ddog_prof_Profile_add(profile, sample, pack.timestamp); - if (res.tag != DDOG_PROF_PROFILE_RESULT_OK) { - defer { ddog_Error_drop(&res.err); }; - auto msg = ddog_Error_message(&res.err); - DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile: %*s", - static_cast(msg.len), msg.ptr); + auto res = ddog_prof_Profile_add2(profile, sample, pack.timestamp); + if (res.err != nullptr) { + defer { ddog_prof_Status_drop(&res); }; + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile: %s", + res.err); } return {}; } @@ -578,4 +760,67 @@ DDRes pprof_reset(DDProfPProf *pprof) { pprof->_pid_str.clear(); return {}; } + +DDRes pprof_aggregate_interned_sample( + const UnwindOutput *uw_output, const SymbolHdr &symbol_hdr, + const DDProfValuePack &pack, const PerfWatcher *watcher, + const FileInfoVector &file_infos, bool show_samples, + EventAggregationModePos value_pos, Symbolizer *symbolizer, + DDProfPProf *pprof) { + const PProfIndices &pprof_indices = watcher->pprof_indices[value_pos]; + ddog_prof_Profile *profile = &pprof->_profile; + int64_t values[k_max_value_types] = {}; + assert(pprof_indices.pprof_index != -1); + values[pprof_indices.pprof_index] = pack.value; + if (watcher_has_countable_sample_type(watcher)) { + assert(pprof_indices.pprof_count_index != -1); + values[pprof_indices.pprof_count_index] = pack.count; + } + + std::array locations_buff; + std::array locations2_buff; + std::span locs{uw_output->locs}; + locs = adjust_locations(watcher, locs); + + Symbolizer::BlazeResultsWrapper session_results; + unsigned write_index = 0; + DDRES_CHECK_FWD(process_symbolization( + locs, symbol_hdr, file_infos, symbolizer, + symbol_hdr.profiles_dictionary(), locations_buff, locations2_buff, + session_results, write_index)); + + ddog_prof_LabelSetId labelset_id{}; + DDRES_CHECK_FWD(intern_profile_labelset(profile, *uw_output, *watcher, + pprof->_pid_str, &labelset_id)); + + std::array location_ids{}; + for (unsigned i = 0; i < write_index; ++i) { + DDRES_CHECK_FWD( + intern_profile_location_id(profile, locations_buff[i], + &location_ids[i])); + } + ddog_prof_StackTraceId_Result stacktrace_res = + ddog_prof_Profile_intern_stacktrace( + profile, {.ptr = location_ids.data(), .len = write_index}); + if (stacktrace_res.tag != + DDOG_PROF_STACK_TRACE_ID_RESULT_OK_GENERATIONAL_ID_STACK_TRACE_ID) { + ddog_Error_drop(&stacktrace_res.err); + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to intern stacktrace"); + } + + if (show_samples) { + ddprof_print_sample(std::span{locations_buff.data(), write_index}, + pack.value, uw_output->pid, uw_output->tid, value_pos, + *watcher); + } + + ddog_VoidResult res = ddog_prof_Profile_intern_sample( + profile, stacktrace_res.ok, {.ptr = values, .len = pprof->_nb_values}, + labelset_id, pack.timestamp); + if (res.tag != DDOG_VOID_RESULT_OK) { + ddog_Error_drop(&res.err); + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile"); + } + return {}; +} } // namespace ddprof diff --git a/src/runtime_symbol_lookup.cc b/src/runtime_symbol_lookup.cc index 193e269d8..b58301ebe 100644 --- a/src/runtime_symbol_lookup.cc +++ b/src/runtime_symbol_lookup.cc @@ -5,6 +5,7 @@ #include "runtime_symbol_lookup.hpp" +#include "ddog_profiling_utils.hpp" #include "ddres.hpp" #include "defer.hpp" #include "jit/jitdump.hpp" @@ -45,11 +46,10 @@ RuntimeSymbolLookup::perfmaps_open(int pid, const char *path_to_perfmap = "") { return UniqueFile{fopen(buf, "r")}; } -bool RuntimeSymbolLookup::insert_or_replace(std::string_view symbol, - ProcessAddress_t address, - Offset_t code_size, - SymbolMap &symbol_map, - SymbolTable &symbol_table) { +bool RuntimeSymbolLookup::insert_or_replace( + std::string_view symbol, ProcessAddress_t address, Offset_t code_size, + SymbolMap &symbol_map, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { if (should_skip_symbol(symbol)) { return false; } @@ -70,27 +70,26 @@ bool RuntimeSymbolLookup::insert_or_replace(std::string_view symbol, symbol_map.emplace_hint( find_res.first, address, SymbolSpan(address + code_size - 1, symbol_table.size())); - symbol_table.emplace_back(std::string(symbol), std::string(symbol), 0, - "jit"); + symbol_table.emplace_back(make_symbol(std::string(symbol), + std::string(symbol), 0, "jit", + dict)); } else { // todo managing range erase (we can overall with other syms) SymbolIdx_t const existing = find_res.first->second.get_symbol_idx(); -#ifdef DEBUG - LG_DBG("Existyng sym -- %s (%lx-%lx)", - symbol_table[existing]._demangled_name.c_str(), - find_res.first->first, find_res.first->second.get_end()); - LG_DBG("New sym -- %s (%lx-%lx)", code_load.func_name.c_str(), - code_load.code_addr, code_load.code_size + code_load.code_addr); -#endif - if (symbol_table[existing]._demangled_name == symbol) { + ddog_prof_StringId2 name_id = intern_string(dict, symbol); + if (symbol_table[existing]._name_id == name_id) { find_res.first->second.set_end(address + code_size - 1); } else { // remove current element (as start can be different) symbol_map.erase(find_res.first); symbol_map.emplace(address, SymbolSpan(address + code_size - 1, existing)); - symbol_table[existing]._demangled_name = symbol; - symbol_table[existing]._symname = symbol; + Symbol &existing_symbol = symbol_table[existing]; + existing_symbol._name_id = name_id; + existing_symbol._file_id = intern_string(dict, "jit"); + existing_symbol._function_id = intern_function_ids( + dict, existing_symbol._name_id, existing_symbol._file_id, + DDOG_PROF_STRINGID2_EMPTY); } } @@ -100,9 +99,9 @@ namespace { bool is_absolute_path(std::string_view path) { return path.front() == '/'; } } // namespace -DDRes RuntimeSymbolLookup::fill_from_jitdump(std::string_view jitdump_path, - pid_t pid, SymbolMap &symbol_map, - SymbolTable &symbol_table) { +DDRes RuntimeSymbolLookup::fill_from_jitdump( + std::string_view jitdump_path, pid_t pid, SymbolMap &symbol_map, + SymbolTable &symbol_table, const ddog_prof_ProfilesDictionary *dict) { const std::string path = is_absolute_path(jitdump_path) ? absl::Substitute("$0/proc/$1/root$2", _path_to_proc, pid, jitdump_path) @@ -126,7 +125,7 @@ DDRes RuntimeSymbolLookup::fill_from_jitdump(std::string_view jitdump_path, for (const JITRecordCodeLoad &code_load : jitdump.code_load) { insert_or_replace(code_load.func_name, code_load.code_addr, - code_load.code_size, symbol_map, symbol_table); + code_load.code_size, symbol_map, symbol_table, dict); } // todo we can add file and inlined functions with debug info return {}; @@ -139,8 +138,9 @@ bool RuntimeSymbolLookup::should_skip_symbol(std::string_view symbol) { }); } -DDRes RuntimeSymbolLookup::fill_from_perfmap(int pid, SymbolMap &symbol_map, - SymbolTable &symbol_table) { +DDRes RuntimeSymbolLookup::fill_from_perfmap( + int pid, SymbolMap &symbol_map, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { auto pmf{perfmaps_open(pid, "/tmp")}; if (!pmf) { LG_DBG("Unable to read perfmap file (PID%d)", pid); @@ -166,7 +166,8 @@ DDRes RuntimeSymbolLookup::fill_from_perfmap(int pid, SymbolMap &symbol_map, std::strtoul(address_buff, nullptr, hexadecimal_base); Offset_t const code_size = std::strtoul(size_buff, nullptr, hexadecimal_base); - insert_or_replace(buffer, address, code_size, symbol_map, symbol_table); + insert_or_replace(buffer, address, code_size, symbol_map, symbol_table, + dict); } free(line); return {}; @@ -175,6 +176,7 @@ DDRes RuntimeSymbolLookup::fill_from_perfmap(int pid, SymbolMap &symbol_map, SymbolIdx_t RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict, std::string_view jitdump_path) { SymbolInfo &symbol_info = _pid_map[pid]; SymbolMap::FindRes find_res = symbol_info._map.find_closest(pc); @@ -182,7 +184,7 @@ RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, // refresh as we expect there to be new symbols ++_stats._nb_jit_reads; if (IsDDResFatal(fill_from_jitdump(jitdump_path, pid, symbol_info._map, - symbol_table))) { + symbol_table, dict))) { // Some warnings can be expected with incomplete files flag_lookup_failure(symbol_info, jitdump_path); return -1; @@ -198,14 +200,15 @@ RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, } SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, - SymbolTable &symbol_table) { + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { SymbolInfo &symbol_info = _pid_map[pid]; SymbolMap::FindRes find_res = symbol_info._map.find_closest(pc); // Only check the file if we did not get failures in this cycle (for this pid) if (!find_res.second && !has_lookup_failure(symbol_info, "perfmap")) { ++_stats._nb_jit_reads; - fill_from_perfmap(pid, symbol_info._map, symbol_table); + fill_from_perfmap(pid, symbol_info._map, symbol_table, dict); find_res = symbol_info._map.find_closest(pc); } if (!find_res.second) { diff --git a/src/symbol_hdr.cc b/src/symbol_hdr.cc new file mode 100644 index 000000000..a182c00e1 --- /dev/null +++ b/src/symbol_hdr.cc @@ -0,0 +1,35 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. This product includes software +// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present +// Datadog, Inc. + +#include "symbol_hdr.hpp" + +#include + +namespace ddprof { + +void ProfilesDictionaryDeleter::operator()( + ::ddog_prof_ProfilesDictionaryHandle *handle) const { + if (handle) { + ::ddog_prof_ProfilesDictionary_drop(handle); + delete handle; + } +} + +SymbolHdr::SymbolHdr(std::string_view path_to_proc) + : _runtime_symbol_lookup(path_to_proc) { + auto *handle = new ::ddog_prof_ProfilesDictionaryHandle(); + ::ddog_prof_Status status = ::ddog_prof_ProfilesDictionary_new(handle); + if (status.err != nullptr) { + LG_WRN("Failed to create ProfilesDictionary for string interning: %s", + status.err); + ::ddog_prof_Status_drop(&status); + delete handle; + } else { + _profiles_dictionary.reset(handle); + } +} + +} // namespace ddprof + diff --git a/src/unwind_dwfl.cc b/src/unwind_dwfl.cc index 87bd3c455..73bc75f0f 100644 --- a/src/unwind_dwfl.cc +++ b/src/unwind_dwfl.cc @@ -213,8 +213,10 @@ int frame_cb(Dwfl_Frame *dwfl_frame, void *arg) { DDRes add_unsymbolized_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, const DDProfMod &ddprof_mod, FileInfoId_t file_info_id) { + const ddog_prof_ProfilesDictionary *dict = + us->symbol_hdr.profiles_dictionary(); MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( - us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id); + us->pid, us->symbol_hdr._mapinfo_table, dso, ddprof_mod._build_id, dict); return add_frame(k_symbol_idx_null, file_info_id, map_idx, pc, pc - ddprof_mod._sym_bias, us); } @@ -226,13 +228,15 @@ DDRes add_runtime_symbol_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, SymbolTable &symbol_table = unwind_symbol_hdr._symbol_table; RuntimeSymbolLookup &runtime_symbol_lookup = unwind_symbol_hdr._runtime_symbol_lookup; + const ddog_prof_ProfilesDictionary *dict = + unwind_symbol_hdr.profiles_dictionary(); SymbolIdx_t symbol_idx = k_symbol_idx_null; if (jitdump_path.empty()) { symbol_idx = - runtime_symbol_lookup.get_or_insert(dso._pid, pc, symbol_table); + runtime_symbol_lookup.get_or_insert(dso._pid, pc, symbol_table, dict); } else { symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - dso._pid, pc, symbol_table, jitdump_path); + dso._pid, pc, symbol_table, dict, jitdump_path); } if (symbol_idx == k_symbol_idx_null) { add_dso_frame(us, dso, pc, "pc"); @@ -240,7 +244,7 @@ DDRes add_runtime_symbol_frame(UnwindState *us, const Dso &dso, ElfAddress_t pc, } MapInfoIdx_t const map_idx = us->symbol_hdr._mapinfo_lookup.get_or_insert( - us->pid, us->symbol_hdr._mapinfo_table, dso, {}); + us->pid, us->symbol_hdr._mapinfo_table, dso, {}, dict); return add_frame(symbol_idx, k_file_info_undef, map_idx, pc, pc - dso.start() + dso.offset(), us); diff --git a/src/unwind_helper.cc b/src/unwind_helper.cc index e2e0b89cd..eb3ddcd50 100644 --- a/src/unwind_helper.cc +++ b/src/unwind_helper.cc @@ -35,9 +35,11 @@ DDRes add_frame(SymbolIdx_t symbol_idx, FileInfoId_t file_info_id, } if (map_idx == -1) { // just add an empty element for mapping info + const ddog_prof_ProfilesDictionary *dict = + us->symbol_hdr.profiles_dictionary(); map_idx = us->symbol_hdr._common_mapinfo_lookup.get_or_insert( CommonMapInfoLookup::MappingErrors::empty, - us->symbol_hdr._mapinfo_table); + us->symbol_hdr._mapinfo_table, dict); } output->locs.emplace_back(FunLoc{.ip = pc, .elf_addr = elf_addr, @@ -48,25 +50,31 @@ DDRes add_frame(SymbolIdx_t symbol_idx, FileInfoId_t file_info_id, } void add_common_frame(UnwindState *us, SymbolErrors lookup_case) { - add_frame_without_mapping(us, - us->symbol_hdr._common_symbol_lookup.get_or_insert( - lookup_case, us->symbol_hdr._symbol_table)); + const ddog_prof_ProfilesDictionary *dict = + us->symbol_hdr.profiles_dictionary(); + add_frame_without_mapping( + us, us->symbol_hdr._common_symbol_lookup.get_or_insert( + lookup_case, us->symbol_hdr._symbol_table, dict)); } void add_dso_frame(UnwindState *us, const Dso &dso, ElfAddress_t normalized_addr, std::string_view addr_type) { + const ddog_prof_ProfilesDictionary *dict = + us->symbol_hdr.profiles_dictionary(); add_frame_without_mapping( us, us->symbol_hdr._dso_symbol_lookup.get_or_insert( - normalized_addr, dso, us->symbol_hdr._symbol_table, addr_type)); + normalized_addr, dso, us->symbol_hdr._symbol_table, dict, addr_type)); } void add_virtual_base_frame(UnwindState *us) { + const ddog_prof_ProfilesDictionary *dict = + us->symbol_hdr.profiles_dictionary(); add_frame_without_mapping( us, us->symbol_hdr._base_frame_symbol_lookup.get_or_insert( us->pid, us->symbol_hdr._symbol_table, - us->symbol_hdr._dso_symbol_lookup, us->dso_hdr)); + us->symbol_hdr._dso_symbol_lookup, us->dso_hdr, dict)); } void add_error_frame(const Dso *dso, UnwindState *us, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 27cb074e4..99514d4f0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -294,6 +294,7 @@ set(ALLOCATION_TRACKER_UT_SRCS ../src/common_mapinfo_lookup.cc ../src/common_symbol_lookup.cc ../src/create_elf.cc + ../src/ddog_profiling_utils.cc ../src/ddprof_process.cc ../src/ddprof_stats.cc ../src/dso_symbol_lookup.cc @@ -359,7 +360,10 @@ add_unit_test(timer-ut timer-ut.cc ../src/tsc_clock.cc ../src/perf.cc) add_unit_test(ddprof_file_info-ut ddprof_file_info-ut.cc) add_unit_test(runtime_symbol_lookup-ut runtime_symbol_lookup-ut.cc ../src/runtime_symbol_lookup.cc - ../src/symbol_map.cc ../src/jit/jitdump.cc) + ../src/ddog_profiling_utils.cc ../src/symbol_hdr.cc + ../src/demangler/demangler.cc ../src/symbol_map.cc + ../src/jit/jitdump.cc + LIBRARIES Datadog::Profiling llvm-demangle) add_unit_test(ddprof_cpumask-ut ddprof_cpumask-ut.cc ../src/ddprof_cpumask.cc) diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 9ead0d151..577a4df5f 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -157,7 +157,8 @@ TEST(DDProfExporter, simple) { FileInfoVector file_infos; SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; - fill_unwind_symbols(table, mapinfo_table, mock_output); + fill_unwind_symbols(table, mapinfo_table, mock_output, + symbol_hdr.profiles_dictionary()); DDProfContext ctx = {}; ctx.watchers.push_back(*ewatcher_from_str("sCPU")); res = pprof_create_profile(&pprofs, ctx, diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 3f402eabb..1a992f64c 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -62,7 +62,8 @@ TEST(DDProfPProf, aggregate) { SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; FileInfoVector file_infos; - fill_unwind_symbols(table, mapinfo_table, mock_output); + fill_unwind_symbols(table, mapinfo_table, mock_output, + symbol_hdr.profiles_dictionary()); DDProfPProf pprof; DDProfContext ctx = {}; @@ -91,7 +92,8 @@ TEST(DDProfPProf, just_live) { SymbolTable &table = symbol_hdr._symbol_table; MapInfoTable &mapinfo_table = symbol_hdr._mapinfo_table; - fill_unwind_symbols(table, mapinfo_table, mock_output); + fill_unwind_symbols(table, mapinfo_table, mock_output, + symbol_hdr.profiles_dictionary()); DDProfPProf pprof; DDProfContext ctx = {}; { diff --git a/test/runtime_symbol_lookup-ut.cc b/test/runtime_symbol_lookup-ut.cc index cb36bedda..094e5e17c 100644 --- a/test/runtime_symbol_lookup-ut.cc +++ b/test/runtime_symbol_lookup-ut.cc @@ -7,57 +7,87 @@ #include "loghandle.hpp" #include "runtime_symbol_lookup.hpp" +#include "symbol_hdr.hpp" #include "symbol_table.hpp" +#include #include namespace ddprof { +namespace { +std::string dict_string(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 string_id) { + if (!dict || !string_id) { + return {}; + } + ddog_CharSlice slice{nullptr, 0}; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_get_str(&slice, dict, string_id); + if (status.err != nullptr) { + ddog_prof_Status_drop(&status); + return {}; + } + return std::string(slice.ptr, slice.len); +} +} // namespace + TEST(runtime_symbol_lookup, no_map) { + SymbolHdr symbol_hdr; SymbolTable symbol_table; RuntimeSymbolLookup runtime_symbol_lookup(UNIT_TEST_DATA); ProcessAddress_t pc = 0x7FB0614BB980; // no pid 43 SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(43, pc, symbol_table); + runtime_symbol_lookup.get_or_insert(43, pc, symbol_table, + symbol_hdr.profiles_dictionary()); // We expect no symbols to be found for this pid ASSERT_EQ(symbol_idx, -1); } TEST(runtime_symbol_lookup, parse_map) { + SymbolHdr symbol_hdr; SymbolTable symbol_table; RuntimeSymbolLookup runtime_symbol_lookup(UNIT_TEST_DATA); // reads a file with symbols generated from .NET ProcessAddress_t pc = 0x7FB0614BB980; SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(42, pc, symbol_table); + runtime_symbol_lookup.get_or_insert(42, pc, symbol_table, + symbol_hdr.profiles_dictionary()); ASSERT_NE(symbol_idx, -1); - ASSERT_TRUE(symbol_table[symbol_idx]._symname.find( - "RuntimeEnvironmentInfo::get_OsPlatform") != + const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._name_id); + ASSERT_TRUE(sym_name.find("RuntimeEnvironmentInfo::get_OsPlatform") != std::string::npos); } TEST(runtime_symbol_lookup, overflow) { + SymbolHdr symbol_hdr; SymbolTable symbol_table; RuntimeSymbolLookup runtime_symbol_lookup(UNIT_TEST_DATA); // reads a file with symbols generated from .NET { ProcessAddress_t pc = 0x00007FB06149E6A0; SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(1, pc, symbol_table); + runtime_symbol_lookup.get_or_insert(1, pc, symbol_table, + symbol_hdr.profiles_dictionary()); ASSERT_NE(symbol_idx, -1); - LG_NFO("%s", symbol_table[symbol_idx]._symname.c_str()); - ASSERT_TRUE(symbol_table[symbol_idx]._symname.size() <= 300); + const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._name_id); + LG_NFO("%s", sym_name.c_str()); + ASSERT_TRUE(sym_name.size() <= 300); } { ProcessAddress_t pc = 0xFFFFFFFFFFFFFFFE; SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(1, pc, symbol_table); + runtime_symbol_lookup.get_or_insert(1, pc, symbol_table, + symbol_hdr.profiles_dictionary()); ASSERT_EQ(symbol_idx, -1); } } TEST(runtime_symbol_lookup, jitdump_simple) { + SymbolHdr symbol_hdr; pid_t mypid = getpid(); SymbolTable symbol_table; RuntimeSymbolLookup runtime_symbol_lookup(""); @@ -65,13 +95,15 @@ TEST(runtime_symbol_lookup, jitdump_simple) { std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "jit-simple-julia.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - ASSERT_EQ(std::string("julia_b_11"), - symbol_table[symbol_idx]._demangled_name); + const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._name_id); + ASSERT_EQ(std::string("julia_b_11"), sym_name); } TEST(runtime_symbol_lookup, double_load) { + SymbolHdr symbol_hdr; // ensure we don't increase number of symbols when we load several times pid_t mypid = getpid(); SymbolTable symbol_table; @@ -80,17 +112,18 @@ TEST(runtime_symbol_lookup, double_load) { std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "jit-simple-julia.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_EQ(symbol_idx, -1); auto current_table_size = symbol_table.size(); symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); auto new_table_size = symbol_table.size(); // Check that we did not grow in number of symbols (as they are the same) ASSERT_EQ(current_table_size, new_table_size); } TEST(runtime_symbol_lookup, jitdump_partial) { + SymbolHdr symbol_hdr; // Test what happens when the file is altered pid_t mypid = getpid(); SymbolTable symbol_table; @@ -100,36 +133,38 @@ TEST(runtime_symbol_lookup, jitdump_partial) { std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "jit-julia-partial.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_EQ(symbol_idx, -1); } { std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "jit-dotnet-partial.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_EQ(symbol_idx, -1); ASSERT_NE(symbol_table.size(), 0); } } TEST(runtime_symbol_lookup, jitdump_bad_file) { + SymbolHdr symbol_hdr; pid_t mypid = getpid(); SymbolTable symbol_table; RuntimeSymbolLookup runtime_symbol_lookup(""); ProcessAddress_t pc = 0xbadbeef; std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "bad_file.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_EQ(symbol_idx, -1); // this should not trigger another read symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_EQ(symbol_idx, -1); } TEST(runtime_symbol_lookup, relative_path) { + SymbolHdr symbol_hdr; std::string jit_path = std::string(".debug/jit/llvm-something/jit-1560413.dump"); // specify a fake /proc directory @@ -137,9 +172,11 @@ TEST(runtime_symbol_lookup, relative_path) { ProcessAddress_t pc = 0x7e1304b00a30; SymbolTable symbol_table; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - 42, pc, symbol_table, jit_path); + 42, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - EXPECT_EQ(symbol_table[symbol_idx]._symname, "julia_b_11"); + const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._name_id); + EXPECT_EQ(sym_name, "julia_b_11"); { RuntimeSymbolLookup::Stats stats = runtime_symbol_lookup.get_stats(); EXPECT_EQ(stats._symbol_count, 20); @@ -147,6 +184,7 @@ TEST(runtime_symbol_lookup, relative_path) { } TEST(runtime_symbol_lookup, jitdump_vs_perfmap) { + SymbolHdr symbol_hdr; pid_t mypid = 8; // check that we are loading the same symbol on both sides std::string expected_sym = @@ -163,9 +201,11 @@ TEST(runtime_symbol_lookup, jitdump_vs_perfmap) { std::string jit_path = std::string(UNIT_TEST_DATA) + "/" + "jit-dotnet-8.dump"; SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( - mypid, pc, symbol_table, jit_path); + mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - EXPECT_EQ(symbol_table[symbol_idx]._symname, expected_sym); + const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._name_id); + EXPECT_EQ(sym_name, expected_sym); { RuntimeSymbolLookup::Stats stats = runtime_symbol_lookup.get_stats(); EXPECT_EQ(stats._symbol_count, 20809); @@ -175,9 +215,12 @@ TEST(runtime_symbol_lookup, jitdump_vs_perfmap) { RuntimeSymbolLookup runtime_symbol_lookup_perfmap(UNIT_TEST_DATA); SymbolTable symbol_table_perfmap; symbol_idx = runtime_symbol_lookup_perfmap.get_or_insert( - mypid, pc, symbol_table_perfmap); + mypid, pc, symbol_table_perfmap, symbol_hdr.profiles_dictionary()); ASSERT_NE(symbol_idx, -1); - EXPECT_EQ(symbol_table_perfmap[symbol_idx]._symname, expected_sym); + const std::string sym_name_perfmap = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table_perfmap[symbol_idx]._name_id); + EXPECT_EQ(sym_name_perfmap, expected_sym); { RuntimeSymbolLookup::Stats stats = runtime_symbol_lookup_perfmap.get_stats(); diff --git a/test/unwind_output_mock.hpp b/test/unwind_output_mock.hpp index 8d667735a..23eac86a3 100644 --- a/test/unwind_output_mock.hpp +++ b/test/unwind_output_mock.hpp @@ -5,6 +5,7 @@ #pragma once +#include "ddog_profiling_utils.hpp" #include "mapinfo_table.hpp" #include "symbol_table.hpp" #include "unwind_output.hpp" @@ -25,18 +26,23 @@ static const char *s_src_paths[K_MOCK_LOC_SIZE] = { static const char *s_so_paths[] = {"bar.0.so"}; -static inline void fill_symbol_table_1(SymbolTable &symbol_table) { +static inline void +fill_symbol_table_1(SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { for (unsigned i = 0; i < K_MOCK_LOC_SIZE; ++i) { - symbol_table.emplace_back(std::string(s_syn_names[i]), - std::string(s_func_names[i]), 10 * i, - std::string(s_src_paths[i])); + symbol_table.emplace_back(make_symbol( + std::string(s_syn_names[i]), std::string(s_func_names[i]), 10 * i, + std::string(s_src_paths[i]), dict)); } } -static inline void fill_mapinfo_table_1(MapInfoTable &mapinfo_table) { +static inline void +fill_mapinfo_table_1(MapInfoTable &mapinfo_table, + const ddog_prof_ProfilesDictionary *dict) { for (unsigned i = 0; i < K_MOCK_LOC_SIZE; ++i) { mapinfo_table.emplace_back(100 + i, 200 + i, 10 + i, std::string{s_so_paths[0]}, BuildIdStr{}); + mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); } } @@ -54,9 +60,10 @@ static inline void fill_unwind_output_1(UnwindOutput &uw_output) { static inline void fill_unwind_symbols(SymbolTable &symbol_table, MapInfoTable &mapinfo_table, - UnwindOutput &uw_output) { - fill_symbol_table_1(symbol_table); - fill_mapinfo_table_1(mapinfo_table); + UnwindOutput &uw_output, + const ddog_prof_ProfilesDictionary *dict) { + fill_symbol_table_1(symbol_table, dict); + fill_mapinfo_table_1(mapinfo_table, dict); fill_unwind_output_1(uw_output); } From 65238ee31f5efaf8089b98f06ec0d5bc8b63197d Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 10:24:48 +0100 Subject: [PATCH 4/9] String interning - intern labels --- include/pprof/ddprof_pprof.hpp | 39 ++++- src/ddprof_worker.cc | 4 +- src/pprof/ddprof_pprof.cc | 282 +++++++++++++++------------------ test/ddprof_exporter-ut.cc | 6 +- test/ddprof_pprof-ut.cc | 12 +- 5 files changed, 167 insertions(+), 176 deletions(-) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index e9457e37b..7c570e987 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -22,14 +22,46 @@ class Symbolizer; struct SymbolHdr; struct DDProfPProf { + struct LabelKeyIds { + ddog_prof_StringId container_id{}; + ddog_prof_StringId process_id{}; + ddog_prof_StringId process_name{}; + ddog_prof_StringId thread_id{}; + ddog_prof_StringId thread_name{}; + ddog_prof_StringId tracepoint_type{}; + bool initialized{false}; + }; + + struct LabelKeyPair { + ddog_prof_StringId key{}; + ddog_prof_StringId value{}; + }; + + struct LabelKeyPairHash { + size_t operator()(const LabelKeyPair &pair) const noexcept; + }; + /* single profile gathering several value types */ ddog_prof_Profile _profile{}; unsigned _nb_values = 0; Tags _tags; // avoid re-creating strings for all pid numbers std::unordered_map _pid_str; + std::unordered_map _pid_string_ids; + HeterogeneousLookupStringMap _label_value_ids; + std::unordered_map + _label_id_cache; + LabelKeyIds _label_key_ids; }; +inline bool operator==(const DDProfPProf::LabelKeyPair &lhs, + const DDProfPProf::LabelKeyPair &rhs) { + return lhs.key.generation.id == rhs.key.generation.id && + lhs.key.id.offset == rhs.key.id.offset && + lhs.value.generation.id == rhs.value.generation.id && + lhs.value.id.offset == rhs.value.id.offset; +} + struct DDProfValuePack { int64_t value; uint64_t count; @@ -46,13 +78,6 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx, * @param watcher_idx matches the registered order at profile creation * @param pprof */ -DDRes pprof_aggregate(const UnwindOutput *uw_output, - const SymbolHdr &symbol_hdr, const DDProfValuePack &pack, - const PerfWatcher *watcher, - const FileInfoVector &file_infos, bool show_samples, - EventAggregationModePos value_pos, Symbolizer *symbolizer, - DDProfPProf *pprof); - DDRes pprof_aggregate_interned_sample( const UnwindOutput *uw_output, const SymbolHdr &symbol_hdr, const DDProfValuePack &pack, const PerfWatcher *watcher, diff --git a/src/ddprof_worker.cc b/src/ddprof_worker.cc index 6906a14e9..5ad769d7c 100644 --- a/src/ddprof_worker.cc +++ b/src/ddprof_worker.cc @@ -72,7 +72,7 @@ DDRes report_lost_events(DDProfContext &ctx) { LG_NTC("Reporting %lu lost samples (cumulated lost value: %lu) for " "watcher #%d", nb_lost, value, watcher_idx); - DDRES_CHECK_FWD(pprof_aggregate( + DDRES_CHECK_FWD(pprof_aggregate_interned_sample( &us->output, us->symbol_hdr, {value, nb_lost, 0}, watcher, ctx.worker_ctx.us->dso_hdr.get_file_info_vector(), false, kSumPos, ctx.worker_ctx.symbolizer, @@ -453,7 +453,7 @@ DDRes ddprof_pr_sample(DDProfContext &ctx, perf_event_sample *sample, const DDProfValuePack pack{static_cast(sample_val), 1, timestamp}; - DDRES_CHECK_FWD(pprof_aggregate( + DDRES_CHECK_FWD(pprof_aggregate_interned_sample( &us->output, us->symbol_hdr, pack, watcher, us->dso_hdr.get_file_info_vector(), ctx.params.show_samples, kSumPos, ctx.worker_ctx.symbolizer, pprof)); diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index dd4b88f95..79a8a647a 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -11,6 +11,7 @@ #include "ddprof_defs.hpp" #include "ddres.hpp" #include "defer.hpp" +#include "hash_helper.hpp" #include "pevent_lib.hpp" #include "symbol_hdr.hpp" #include "symbolizer.hpp" @@ -30,6 +31,16 @@ using namespace std::string_view_literals; namespace ddprof { +size_t DDProfPProf::LabelKeyPairHash::operator()( + const DDProfPProf::LabelKeyPair &pair) const noexcept { + size_t seed = 0; + hash_combine(seed, pair.key.generation.id); + hash_combine(seed, pair.key.id.offset); + hash_combine(seed, pair.value.generation.id); + hash_combine(seed, pair.value.id.offset); + return seed; +} + namespace { constexpr size_t k_max_pprof_labels{8}; @@ -48,32 +59,6 @@ struct ActiveIdsResult { PerfWatcher *default_watcher = nullptr; }; -struct LabelKeyIds { - const ddog_prof_ProfilesDictionary *dict = nullptr; - ddog_prof_StringId2 container_id = DDOG_PROF_STRINGID2_EMPTY; - ddog_prof_StringId2 process_id = DDOG_PROF_STRINGID2_EMPTY; - ddog_prof_StringId2 process_name = DDOG_PROF_STRINGID2_EMPTY; - ddog_prof_StringId2 thread_id = DDOG_PROF_STRINGID2_EMPTY; - ddog_prof_StringId2 thread_name = DDOG_PROF_STRINGID2_EMPTY; - ddog_prof_StringId2 tracepoint_type = DDOG_PROF_STRINGID2_EMPTY; -}; - -const LabelKeyIds & -get_label_key_ids(const ddog_prof_ProfilesDictionary *dict) { - static LabelKeyIds ids{}; - if (ids.dict != dict) { - ids = {}; - ids.dict = dict; - ids.container_id = intern_string(dict, k_container_id_label); - ids.process_id = intern_string(dict, k_process_id_label); - ids.process_name = intern_string(dict, k_process_name_label); - ids.thread_id = intern_string(dict, k_thread_id_label); - ids.thread_name = intern_string(dict, k_thread_name_label); - ids.tracepoint_type = intern_string(dict, k_tracepoint_label); - } - return ids; -} - std::string_view pid_str(pid_t pid, std::unordered_map &pid_strs); @@ -98,54 +83,136 @@ ddog_prof_StringId intern_profile_string(ddog_prof_Profile *profile, return res.ok; } +void reset_label_caches(DDProfPProf *pprof) { + pprof->_pid_string_ids.clear(); + pprof->_label_value_ids.clear(); + pprof->_label_id_cache.clear(); + pprof->_label_key_ids = {}; +} + +void init_label_key_ids(DDProfPProf *pprof) { + if (pprof->_label_key_ids.initialized) { + return; + } + ddog_prof_Profile *profile = &pprof->_profile; + pprof->_label_key_ids.container_id = + intern_profile_string(profile, k_container_id_label); + pprof->_label_key_ids.process_id = + intern_profile_string(profile, k_process_id_label); + pprof->_label_key_ids.process_name = + intern_profile_string(profile, k_process_name_label); + pprof->_label_key_ids.thread_id = + intern_profile_string(profile, k_thread_id_label); + pprof->_label_key_ids.thread_name = + intern_profile_string(profile, k_thread_name_label); + pprof->_label_key_ids.tracepoint_type = + intern_profile_string(profile, k_tracepoint_label); + pprof->_label_key_ids.initialized = true; +} + +ddog_prof_StringId cached_label_value_id(DDProfPProf *pprof, + ddog_prof_Profile *profile, + std::string_view value) { + if (value.empty()) { + return ddog_prof_Profile_interned_empty_string(); + } + auto it = pprof->_label_value_ids.find(value); + if (it != pprof->_label_value_ids.end()) { + return it->second; + } + ddog_prof_StringId id = intern_profile_string(profile, value); + pprof->_label_value_ids.emplace(std::string(value), id); + return id; +} + +ddog_prof_StringId cached_pid_string_id(DDProfPProf *pprof, + ddog_prof_Profile *profile, pid_t pid) { + auto it = pprof->_pid_string_ids.find(pid); + if (it != pprof->_pid_string_ids.end()) { + return it->second; + } + const std::string_view pid_value = pid_str(pid, pprof->_pid_str); + ddog_prof_StringId id = intern_profile_string(profile, pid_value); + pprof->_pid_string_ids.emplace(pid, id); + return id; +} + +DDRes get_label_id(DDProfPProf *pprof, ddog_prof_Profile *profile, + ddog_prof_StringId key_id, ddog_prof_StringId value_id, + ddog_prof_LabelId *label_id) { + const DDProfPProf::LabelKeyPair pair{key_id, value_id}; + auto it = pprof->_label_id_cache.find(pair); + if (it != pprof->_label_id_cache.end()) { + *label_id = it->second; + return {}; + } + ddog_prof_LabelId_Result label_res = + ddog_prof_Profile_intern_label_str(profile, key_id, value_id); + if (label_res.tag != DDOG_PROF_LABEL_ID_RESULT_OK_GENERATIONAL_ID_LABEL_ID) { + ddog_Error_drop(&label_res.err); + return ddres_error(DD_WHAT_PPROF); + } + *label_id = label_res.ok; + pprof->_label_id_cache.emplace(pair, label_res.ok); + return {}; +} + DDRes intern_profile_labelset( - ddog_prof_Profile *profile, const UnwindOutput &uw_output, - const PerfWatcher &watcher, - std::unordered_map &pid_strs, + ddog_prof_Profile *profile, DDProfPProf *pprof, + const UnwindOutput &uw_output, const PerfWatcher &watcher, ddog_prof_LabelSetId *labelset_id) { std::array labels{}; size_t labels_num = 0; - auto push_label = [&](std::string_view key, std::string_view value) -> DDRes { - ddog_prof_StringId key_id = intern_profile_string(profile, key); - ddog_prof_StringId value_id = intern_profile_string(profile, value); - ddog_prof_LabelId_Result label_res = - ddog_prof_Profile_intern_label_str(profile, key_id, value_id); - if (label_res.tag != DDOG_PROF_LABEL_ID_RESULT_OK_GENERATIONAL_ID_LABEL_ID) { - ddog_Error_drop(&label_res.err); - return ddres_error(DD_WHAT_PPROF); - } - labels[labels_num++] = label_res.ok; + auto push_label_ids = [&](ddog_prof_StringId key_id, + ddog_prof_StringId value_id) -> DDRes { + ddog_prof_LabelId label_id{}; + DDRES_CHECK_FWD(get_label_id(pprof, profile, key_id, value_id, &label_id)); + labels[labels_num++] = label_id; return {}; }; if (!uw_output.container_id.empty()) { DDRES_CHECK_FWD( - push_label(k_container_id_label, uw_output.container_id)); + push_label_ids(pprof->_label_key_ids.container_id, + cached_label_value_id(pprof, profile, + uw_output.container_id))); } if (!watcher.suppress_pid || !watcher.suppress_tid) { - DDRES_CHECK_FWD(push_label(k_process_id_label, - pid_str(uw_output.pid, pid_strs))); + DDRES_CHECK_FWD( + push_label_ids(pprof->_label_key_ids.process_id, + cached_pid_string_id(pprof, profile, uw_output.pid))); } if (!watcher.suppress_tid) { - DDRES_CHECK_FWD(push_label(k_thread_id_label, - pid_str(uw_output.tid, pid_strs))); + DDRES_CHECK_FWD( + push_label_ids(pprof->_label_key_ids.thread_id, + cached_pid_string_id(pprof, profile, uw_output.tid))); } if (watcher_has_tracepoint(&watcher)) { if (!watcher.tracepoint_label.empty()) { DDRES_CHECK_FWD( - push_label(k_tracepoint_label, watcher.tracepoint_label)); + push_label_ids(pprof->_label_key_ids.tracepoint_type, + cached_label_value_id(pprof, profile, + watcher.tracepoint_label))); } else { DDRES_CHECK_FWD( - push_label(k_tracepoint_label, watcher.tracepoint_event)); + push_label_ids(pprof->_label_key_ids.tracepoint_type, + cached_label_value_id(pprof, profile, + watcher.tracepoint_event))); } } if (!uw_output.exe_name.empty()) { - DDRES_CHECK_FWD(push_label(k_process_name_label, uw_output.exe_name)); + DDRES_CHECK_FWD( + push_label_ids(pprof->_label_key_ids.process_name, + cached_label_value_id(pprof, profile, + uw_output.exe_name))); } if (!uw_output.thread_name.empty()) { - DDRES_CHECK_FWD(push_label(k_thread_name_label, uw_output.thread_name)); + DDRES_CHECK_FWD( + push_label_ids(pprof->_label_key_ids.thread_name, + cached_label_value_id(pprof, profile, + uw_output.thread_name))); } DDPROF_DCHECK_FATAL(labels_num <= labels.size(), @@ -399,57 +466,6 @@ ProfValueTypes compute_pprof_values(const ActiveIdsResult &active_ids) { return result; } -size_t prepare_labels(const UnwindOutput &uw_output, const PerfWatcher &watcher, - const ddog_prof_ProfilesDictionary *dict, - std::unordered_map &pid_strs, - std::span labels) { - const LabelKeyIds &label_ids = get_label_key_ids(dict); - size_t labels_num = 0; - if (!uw_output.container_id.empty()) { - labels[labels_num].key = label_ids.container_id; - labels[labels_num].str = to_CharSlice(uw_output.container_id); - ++labels_num; - } - - // Add any configured labels. Note that TID alone has the same cardinality as - // (TID;PID) tuples, so except for symbol table overhead it doesn't matter - // much if TID implies PID for clarity. - if (!watcher.suppress_pid || !watcher.suppress_tid) { - labels[labels_num].key = label_ids.process_id; - labels[labels_num].str = to_CharSlice(pid_str(uw_output.pid, pid_strs)); - ++labels_num; - } - if (!watcher.suppress_tid) { - labels[labels_num].key = label_ids.thread_id; - labels[labels_num].str = to_CharSlice(pid_str(uw_output.tid, pid_strs)); - ++labels_num; - } - if (watcher_has_tracepoint(&watcher)) { - labels[labels_num].key = label_ids.tracepoint_type; - // If the label is given, use that as the tracepoint type. Otherwise, - // default to the event name - if (!watcher.tracepoint_label.empty()) { - labels[labels_num].str = to_CharSlice(watcher.tracepoint_label); - } else { - labels[labels_num].str = to_CharSlice(watcher.tracepoint_event); - } - ++labels_num; - } - if (!uw_output.exe_name.empty()) { - labels[labels_num].key = label_ids.process_name; - labels[labels_num].str = to_CharSlice(uw_output.exe_name); - ++labels_num; - } - if (!uw_output.thread_name.empty()) { - labels[labels_num].key = label_ids.thread_name; - labels[labels_num].str = to_CharSlice(uw_output.thread_name); - ++labels_num; - } - DDPROF_DCHECK_FATAL(labels_num <= labels.size(), - "pprof_aggregate - label buffer exceeded"); - return labels_num; -} - std::span adjust_locations(const PerfWatcher *watcher, std::span locs) { if (watcher->options.nb_frames_to_skip < locs.size()) { @@ -655,6 +671,9 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx, status.err); } + reset_label_caches(pprof); + init_label_key_ids(pprof); + // Add relevant tags { bool const include_kernel = @@ -688,67 +707,12 @@ DDRes pprof_free_profile(DDProfPProf *pprof) { ddog_prof_Profile_drop(&pprof->_profile); pprof->_profile = {}; pprof->_nb_values = 0; + pprof->_pid_str.clear(); + reset_label_caches(pprof); return {}; } // Assumption of API is that sample is valid in a single type -DDRes pprof_aggregate(const UnwindOutput *uw_output, - const SymbolHdr &symbol_hdr, const DDProfValuePack &pack, - const PerfWatcher *watcher, - const FileInfoVector &file_infos, bool show_samples, - EventAggregationModePos value_pos, Symbolizer *symbolizer, - DDProfPProf *pprof) { - - const PProfIndices &pprof_indices = watcher->pprof_indices[value_pos]; - ddog_prof_Profile *profile = &pprof->_profile; - int64_t values[k_max_value_types] = {}; - assert(pprof_indices.pprof_index != -1); - values[pprof_indices.pprof_index] = pack.value; - if (watcher_has_countable_sample_type(watcher)) { - assert(pprof_indices.pprof_count_index != -1); - values[pprof_indices.pprof_count_index] = pack.count; - } - - std::array locations_buff; - std::array locations2_buff; - std::span locs{uw_output->locs}; - locs = adjust_locations(watcher, locs); - - // Blaze results should remain alive until we aggregate the pprof data - Symbolizer::BlazeResultsWrapper session_results; - unsigned write_index = 0; - DDRES_CHECK_FWD(process_symbolization( - locs, symbol_hdr, file_infos, symbolizer, - symbol_hdr.profiles_dictionary(), locations_buff, locations2_buff, - session_results, write_index)); - std::array labels{}; - // Create the labels for the sample. Two samples are the same only when - // their locations _and_ all labels are identical, so we admit a very limited - // number of labels at present - const size_t labels_num = prepare_labels( - *uw_output, *watcher, symbol_hdr.profiles_dictionary(), pprof->_pid_str, - std::span{labels}); - - ddog_prof_Sample2 const sample = { - .locations = {.ptr = locations2_buff.data(), .len = write_index}, - .values = {.ptr = values, .len = pprof->_nb_values}, - .labels = {.ptr = labels.data(), .len = labels_num}, - }; - - if (show_samples) { - ddprof_print_sample(std::span{locations_buff.data(), write_index}, - pack.value, uw_output->pid, uw_output->tid, value_pos, - *watcher); - } - auto res = ddog_prof_Profile_add2(profile, sample, pack.timestamp); - if (res.err != nullptr) { - defer { ddog_prof_Status_drop(&res); }; - DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile: %s", - res.err); - } - return {}; -} - DDRes pprof_reset(DDProfPProf *pprof) { auto res = ddog_prof_Profile_reset(&pprof->_profile); if (res.tag != DDOG_PROF_PROFILE_RESULT_OK) { @@ -758,6 +722,8 @@ DDRes pprof_reset(DDProfPProf *pprof) { static_cast(msg.len), msg.ptr); } pprof->_pid_str.clear(); + reset_label_caches(pprof); + init_label_key_ids(pprof); return {}; } @@ -790,8 +756,8 @@ DDRes pprof_aggregate_interned_sample( session_results, write_index)); ddog_prof_LabelSetId labelset_id{}; - DDRES_CHECK_FWD(intern_profile_labelset(profile, *uw_output, *watcher, - pprof->_pid_str, &labelset_id)); + DDRES_CHECK_FWD(intern_profile_labelset(profile, pprof, *uw_output, *watcher, + &labelset_id)); std::array location_ids{}; for (unsigned i = 0; i < write_index; ++i) { diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 577a4df5f..603da41a6 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -164,9 +164,9 @@ TEST(DDProfExporter, simple) { res = pprof_create_profile(&pprofs, ctx, symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(IsDDResOK(res)); - res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[0], file_infos, false, kSumPos, - ctx.worker_ctx.symbolizer, &pprofs); + res = pprof_aggregate_interned_sample( + &mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[0], file_infos, + false, kSumPos, ctx.worker_ctx.symbolizer, &pprofs); EXPECT_TRUE(IsDDResOK(res)); } { diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 1a992f64c..4a727e998 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -73,9 +73,9 @@ TEST(DDProfPProf, aggregate) { symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index != -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index != -1); - res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[0], file_infos, false, kSumPos, - ctx.worker_ctx.symbolizer, &pprof); + res = pprof_aggregate_interned_sample( + &mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[0], file_infos, + false, kSumPos, ctx.worker_ctx.symbolizer, &pprof); EXPECT_TRUE(IsDDResOK(res)); @@ -117,9 +117,9 @@ TEST(DDProfPProf, just_live) { EXPECT_TRUE(ctx.watchers[1].pprof_indices[kLiveSumPos].pprof_count_index != -1); FileInfoVector file_infos; - res = pprof_aggregate(&mock_output, symbol_hdr, {1000, 1, 0}, - &ctx.watchers[1], file_infos, false, kLiveSumPos, - ctx.worker_ctx.symbolizer, &pprof); + res = pprof_aggregate_interned_sample( + &mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[1], file_infos, + false, kLiveSumPos, ctx.worker_ctx.symbolizer, &pprof); EXPECT_TRUE(IsDDResOK(res)); test_pprof(&pprof); res = pprof_free_profile(&pprof); From 08d929daaf69aaa1a8c932929840a9ff2ddf81a7 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 10:38:34 +0100 Subject: [PATCH 5/9] String interning - clean up double interning on labels --- include/pprof/ddprof_pprof.hpp | 41 ++--- include/symbol_hdr.hpp | 8 +- src/pprof/ddprof_pprof.cc | 266 ++++++--------------------------- 3 files changed, 58 insertions(+), 257 deletions(-) diff --git a/include/pprof/ddprof_pprof.hpp b/include/pprof/ddprof_pprof.hpp index 7c570e987..ca1836a61 100644 --- a/include/pprof/ddprof_pprof.hpp +++ b/include/pprof/ddprof_pprof.hpp @@ -22,23 +22,15 @@ class Symbolizer; struct SymbolHdr; struct DDProfPProf { - struct LabelKeyIds { - ddog_prof_StringId container_id{}; - ddog_prof_StringId process_id{}; - ddog_prof_StringId process_name{}; - ddog_prof_StringId thread_id{}; - ddog_prof_StringId thread_name{}; - ddog_prof_StringId tracepoint_type{}; - bool initialized{false}; - }; - - struct LabelKeyPair { - ddog_prof_StringId key{}; - ddog_prof_StringId value{}; - }; - - struct LabelKeyPairHash { - size_t operator()(const LabelKeyPair &pair) const noexcept; + // Label key IDs are stored in the dictionary (process-scoped) + // They are initialized once when SymbolHdr is created + struct DictLabelKeyIds { + ddog_prof_StringId2 container_id{}; + ddog_prof_StringId2 process_id{}; + ddog_prof_StringId2 process_name{}; + ddog_prof_StringId2 thread_id{}; + ddog_prof_StringId2 thread_name{}; + ddog_prof_StringId2 tracepoint_type{}; }; /* single profile gathering several value types */ @@ -47,21 +39,10 @@ struct DDProfPProf { Tags _tags; // avoid re-creating strings for all pid numbers std::unordered_map _pid_str; - std::unordered_map _pid_string_ids; - HeterogeneousLookupStringMap _label_value_ids; - std::unordered_map - _label_id_cache; - LabelKeyIds _label_key_ids; + // Dictionary label key IDs (set from SymbolHdr) + DictLabelKeyIds _dict_label_key_ids; }; -inline bool operator==(const DDProfPProf::LabelKeyPair &lhs, - const DDProfPProf::LabelKeyPair &rhs) { - return lhs.key.generation.id == rhs.key.generation.id && - lhs.key.id.offset == rhs.key.id.offset && - lhs.value.generation.id == rhs.value.generation.id && - lhs.value.id.offset == rhs.value.id.offset; -} - struct DDProfValuePack { int64_t value; uint64_t count; diff --git a/include/symbol_hdr.hpp b/include/symbol_hdr.hpp index 066aa7265..cb60002fc 100644 --- a/include/symbol_hdr.hpp +++ b/include/symbol_hdr.hpp @@ -53,6 +53,11 @@ struct SymbolHdr { _runtime_symbol_lookup.erase(pid); } + // String interning dictionary (persists across profile exports) + // MUST be declared first so it is destroyed last - Symbol and MapInfo + // objects store pointers into this dictionary. + ProfilesDictionaryPtr _profiles_dictionary; + // Cache symbol associations BaseFrameSymbolLookup _base_frame_symbol_lookup; CommonSymbolLookup _common_symbol_lookup; @@ -67,9 +72,6 @@ struct SymbolHdr { // The mapping table MapInfoTable _mapinfo_table; - - // String interning dictionary (persists across profile exports) - ProfilesDictionaryPtr _profiles_dictionary; }; } // namespace ddprof diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 79a8a647a..8e4b27531 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -11,7 +11,6 @@ #include "ddprof_defs.hpp" #include "ddres.hpp" #include "defer.hpp" -#include "hash_helper.hpp" #include "pevent_lib.hpp" #include "symbol_hdr.hpp" #include "symbolizer.hpp" @@ -31,16 +30,6 @@ using namespace std::string_view_literals; namespace ddprof { -size_t DDProfPProf::LabelKeyPairHash::operator()( - const DDProfPProf::LabelKeyPair &pair) const noexcept { - size_t seed = 0; - hash_combine(seed, pair.key.generation.id); - hash_combine(seed, pair.key.id.offset); - hash_combine(seed, pair.value.generation.id); - hash_combine(seed, pair.value.id.offset); - return seed; -} - namespace { constexpr size_t k_max_pprof_labels{8}; @@ -62,214 +51,58 @@ struct ActiveIdsResult { std::string_view pid_str(pid_t pid, std::unordered_map &pid_strs); -std::string_view to_string_view(ddog_CharSlice slice) { - if (!slice.ptr || slice.len == 0) { - return {}; - } - return {slice.ptr, slice.len}; -} - -ddog_prof_StringId intern_profile_string(ddog_prof_Profile *profile, - std::string_view value) { - if (value.empty()) { - return ddog_prof_Profile_interned_empty_string(); - } - ddog_prof_StringId_Result res = - ddog_prof_Profile_intern_string(profile, to_CharSlice(value)); - if (res.tag != DDOG_PROF_STRING_ID_RESULT_OK_GENERATIONAL_ID_STRING_ID) { - ddog_Error_drop(&res.err); - return ddog_prof_Profile_interned_empty_string(); - } - return res.ok; -} - -void reset_label_caches(DDProfPProf *pprof) { - pprof->_pid_string_ids.clear(); - pprof->_label_value_ids.clear(); - pprof->_label_id_cache.clear(); - pprof->_label_key_ids = {}; -} - -void init_label_key_ids(DDProfPProf *pprof) { - if (pprof->_label_key_ids.initialized) { - return; - } - ddog_prof_Profile *profile = &pprof->_profile; - pprof->_label_key_ids.container_id = - intern_profile_string(profile, k_container_id_label); - pprof->_label_key_ids.process_id = - intern_profile_string(profile, k_process_id_label); - pprof->_label_key_ids.process_name = - intern_profile_string(profile, k_process_name_label); - pprof->_label_key_ids.thread_id = - intern_profile_string(profile, k_thread_id_label); - pprof->_label_key_ids.thread_name = - intern_profile_string(profile, k_thread_name_label); - pprof->_label_key_ids.tracepoint_type = - intern_profile_string(profile, k_tracepoint_label); - pprof->_label_key_ids.initialized = true; -} - -ddog_prof_StringId cached_label_value_id(DDProfPProf *pprof, - ddog_prof_Profile *profile, - std::string_view value) { - if (value.empty()) { - return ddog_prof_Profile_interned_empty_string(); - } - auto it = pprof->_label_value_ids.find(value); - if (it != pprof->_label_value_ids.end()) { - return it->second; - } - ddog_prof_StringId id = intern_profile_string(profile, value); - pprof->_label_value_ids.emplace(std::string(value), id); - return id; -} - -ddog_prof_StringId cached_pid_string_id(DDProfPProf *pprof, - ddog_prof_Profile *profile, pid_t pid) { - auto it = pprof->_pid_string_ids.find(pid); - if (it != pprof->_pid_string_ids.end()) { - return it->second; - } - const std::string_view pid_value = pid_str(pid, pprof->_pid_str); - ddog_prof_StringId id = intern_profile_string(profile, pid_value); - pprof->_pid_string_ids.emplace(pid, id); - return id; +void init_dict_label_key_ids(DDProfPProf::DictLabelKeyIds &label_keys, + const ddog_prof_ProfilesDictionary *dict) { + label_keys.container_id = intern_string(dict, k_container_id_label); + label_keys.process_id = intern_string(dict, k_process_id_label); + label_keys.process_name = intern_string(dict, k_process_name_label); + label_keys.thread_id = intern_string(dict, k_thread_id_label); + label_keys.thread_name = intern_string(dict, k_thread_name_label); + label_keys.tracepoint_type = intern_string(dict, k_tracepoint_label); } -DDRes get_label_id(DDProfPProf *pprof, ddog_prof_Profile *profile, - ddog_prof_StringId key_id, ddog_prof_StringId value_id, - ddog_prof_LabelId *label_id) { - const DDProfPProf::LabelKeyPair pair{key_id, value_id}; - auto it = pprof->_label_id_cache.find(pair); - if (it != pprof->_label_id_cache.end()) { - *label_id = it->second; - return {}; - } - ddog_prof_LabelId_Result label_res = - ddog_prof_Profile_intern_label_str(profile, key_id, value_id); - if (label_res.tag != DDOG_PROF_LABEL_ID_RESULT_OK_GENERATIONAL_ID_LABEL_ID) { - ddog_Error_drop(&label_res.err); - return ddres_error(DD_WHAT_PPROF); - } - *label_id = label_res.ok; - pprof->_label_id_cache.emplace(pair, label_res.ok); - return {}; -} - -DDRes intern_profile_labelset( - ddog_prof_Profile *profile, DDProfPProf *pprof, - const UnwindOutput &uw_output, const PerfWatcher &watcher, - ddog_prof_LabelSetId *labelset_id) { - std::array labels{}; +size_t prepare_labels2(const UnwindOutput &uw_output, const PerfWatcher &watcher, + std::unordered_map &pid_strs, + const DDProfPProf::DictLabelKeyIds &label_keys, + std::span labels) { size_t labels_num = 0; - auto push_label_ids = [&](ddog_prof_StringId key_id, - ddog_prof_StringId value_id) -> DDRes { - ddog_prof_LabelId label_id{}; - DDRES_CHECK_FWD(get_label_id(pprof, profile, key_id, value_id, &label_id)); - labels[labels_num++] = label_id; - return {}; + auto push_label = [&](ddog_prof_StringId2 key_id, std::string_view value) { + labels[labels_num++] = { + .key = key_id, + .str = to_CharSlice(value), + .num = 0, + .num_unit = {.ptr = nullptr, .len = 0}, + }; }; if (!uw_output.container_id.empty()) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.container_id, - cached_label_value_id(pprof, profile, - uw_output.container_id))); + push_label(label_keys.container_id, uw_output.container_id); } if (!watcher.suppress_pid || !watcher.suppress_tid) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.process_id, - cached_pid_string_id(pprof, profile, uw_output.pid))); + push_label(label_keys.process_id, pid_str(uw_output.pid, pid_strs)); } if (!watcher.suppress_tid) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.thread_id, - cached_pid_string_id(pprof, profile, uw_output.tid))); + push_label(label_keys.thread_id, pid_str(uw_output.tid, pid_strs)); } if (watcher_has_tracepoint(&watcher)) { if (!watcher.tracepoint_label.empty()) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.tracepoint_type, - cached_label_value_id(pprof, profile, - watcher.tracepoint_label))); + push_label(label_keys.tracepoint_type, watcher.tracepoint_label); } else { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.tracepoint_type, - cached_label_value_id(pprof, profile, - watcher.tracepoint_event))); + push_label(label_keys.tracepoint_type, watcher.tracepoint_event); } } if (!uw_output.exe_name.empty()) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.process_name, - cached_label_value_id(pprof, profile, - uw_output.exe_name))); + push_label(label_keys.process_name, uw_output.exe_name); } if (!uw_output.thread_name.empty()) { - DDRES_CHECK_FWD( - push_label_ids(pprof->_label_key_ids.thread_name, - cached_label_value_id(pprof, profile, - uw_output.thread_name))); + push_label(label_keys.thread_name, uw_output.thread_name); } DDPROF_DCHECK_FATAL(labels_num <= labels.size(), "pprof_aggregate - label buffer exceeded"); - ddog_prof_LabelSetId_Result labelset_res = ddog_prof_Profile_intern_labelset( - profile, - {.ptr = labels.data(), .len = static_cast(labels_num)}); - if (labelset_res.tag != - DDOG_PROF_LABEL_SET_ID_RESULT_OK_GENERATIONAL_ID_LABEL_SET_ID) { - ddog_Error_drop(&labelset_res.err); - return ddres_error(DD_WHAT_PPROF); - } - *labelset_id = labelset_res.ok; - return {}; -} - -DDRes intern_profile_location_id(ddog_prof_Profile *profile, - const ddog_prof_Location &location, - ddog_prof_LocationId *location_id) { - ddog_prof_StringId fn_name = - intern_profile_string(profile, to_string_view(location.function.name)); - ddog_prof_StringId fn_system = intern_profile_string( - profile, to_string_view(location.function.system_name)); - ddog_prof_StringId fn_file = - intern_profile_string(profile, to_string_view(location.function.filename)); - ddog_prof_FunctionId_Result function_res = - ddog_prof_Profile_intern_function(profile, fn_name, fn_system, fn_file); - if (function_res.tag != - DDOG_PROF_FUNCTION_ID_RESULT_OK_GENERATIONAL_ID_FUNCTION_ID) { - ddog_Error_drop(&function_res.err); - return ddres_error(DD_WHAT_PPROF); - } - - ddog_prof_StringId map_filename = - intern_profile_string(profile, to_string_view(location.mapping.filename)); - ddog_prof_StringId map_build_id = - intern_profile_string(profile, to_string_view(location.mapping.build_id)); - ddog_prof_MappingId_Result mapping_res = ddog_prof_Profile_intern_mapping( - profile, location.mapping.memory_start, location.mapping.memory_limit, - location.mapping.file_offset, map_filename, map_build_id); - if (mapping_res.tag != - DDOG_PROF_MAPPING_ID_RESULT_OK_GENERATIONAL_ID_MAPPING_ID) { - ddog_Error_drop(&mapping_res.err); - return ddres_error(DD_WHAT_PPROF); - } - - ddog_prof_LocationId_Result loc_res = - ddog_prof_Profile_intern_location_with_mapping_id( - profile, mapping_res.ok, function_res.ok, location.address, - location.line); - if (loc_res.tag != - DDOG_PROF_LOCATION_ID_RESULT_OK_GENERATIONAL_ID_LOCATION_ID) { - ddog_Error_drop(&loc_res.err); - return ddres_error(DD_WHAT_PPROF); - } - *location_id = loc_res.ok; - return {}; + return labels_num; } std::string_view pid_str(pid_t pid, @@ -671,8 +504,7 @@ DDRes pprof_create_profile(DDProfPProf *pprof, DDProfContext &ctx, status.err); } - reset_label_caches(pprof); - init_label_key_ids(pprof); + init_dict_label_key_ids(pprof->_dict_label_key_ids, *dict); // Add relevant tags { @@ -708,11 +540,9 @@ DDRes pprof_free_profile(DDProfPProf *pprof) { pprof->_profile = {}; pprof->_nb_values = 0; pprof->_pid_str.clear(); - reset_label_caches(pprof); return {}; } -// Assumption of API is that sample is valid in a single type DDRes pprof_reset(DDProfPProf *pprof) { auto res = ddog_prof_Profile_reset(&pprof->_profile); if (res.tag != DDOG_PROF_PROFILE_RESULT_OK) { @@ -722,8 +552,6 @@ DDRes pprof_reset(DDProfPProf *pprof) { static_cast(msg.len), msg.ptr); } pprof->_pid_str.clear(); - reset_label_caches(pprof); - init_label_key_ids(pprof); return {}; } @@ -755,24 +583,10 @@ DDRes pprof_aggregate_interned_sample( symbol_hdr.profiles_dictionary(), locations_buff, locations2_buff, session_results, write_index)); - ddog_prof_LabelSetId labelset_id{}; - DDRES_CHECK_FWD(intern_profile_labelset(profile, pprof, *uw_output, *watcher, - &labelset_id)); - - std::array location_ids{}; - for (unsigned i = 0; i < write_index; ++i) { - DDRES_CHECK_FWD( - intern_profile_location_id(profile, locations_buff[i], - &location_ids[i])); - } - ddog_prof_StackTraceId_Result stacktrace_res = - ddog_prof_Profile_intern_stacktrace( - profile, {.ptr = location_ids.data(), .len = write_index}); - if (stacktrace_res.tag != - DDOG_PROF_STACK_TRACE_ID_RESULT_OK_GENERATIONAL_ID_STACK_TRACE_ID) { - ddog_Error_drop(&stacktrace_res.err); - DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to intern stacktrace"); - } + std::array labels{}; + const size_t labels_num = + prepare_labels2(*uw_output, *watcher, pprof->_pid_str, + pprof->_dict_label_key_ids, std::span{labels}); if (show_samples) { ddprof_print_sample(std::span{locations_buff.data(), write_index}, @@ -780,12 +594,16 @@ DDRes pprof_aggregate_interned_sample( *watcher); } - ddog_VoidResult res = ddog_prof_Profile_intern_sample( - profile, stacktrace_res.ok, {.ptr = values, .len = pprof->_nb_values}, - labelset_id, pack.timestamp); - if (res.tag != DDOG_VOID_RESULT_OK) { - ddog_Error_drop(&res.err); - DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile"); + ddog_prof_Sample2 const sample = { + .locations = {.ptr = locations2_buff.data(), .len = write_index}, + .values = {.ptr = values, .len = pprof->_nb_values}, + .labels = {.ptr = labels.data(), .len = labels_num}, + }; + + ddog_prof_Status res = ddog_prof_Profile_add2(profile, sample, pack.timestamp); + if (res.err != nullptr) { + defer { ddog_prof_Status_drop(&res); }; + DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile: %s", res.err); } return {}; } From cba8cdd633abc82b97efa62f3c8835193e5cbec4 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 10:43:50 +0100 Subject: [PATCH 6/9] String interning - remove usage of strings in favour of libdatadog storage --- include/ddog_profiling_utils.hpp | 9 +++++++++ src/ddog_profiling_utils.cc | 34 ++++++++++++++++++++++++++++++++ src/pprof/ddprof_pprof.cc | 30 ++++++++++++++++------------ 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/include/ddog_profiling_utils.hpp b/include/ddog_profiling_utils.hpp index 0d4e79712..a6ff49529 100644 --- a/include/ddog_profiling_utils.hpp +++ b/include/ddog_profiling_utils.hpp @@ -26,6 +26,15 @@ inline ddog_CharSlice to_CharSlice(std::string_view str) { ddog_prof_StringId2 intern_string(const ddog_prof_ProfilesDictionary *dict, std::string_view str); +std::string_view get_string(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 string_id); + +std::string_view get_location2_function_name( + const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc); + +std::string_view get_location2_mapping_filename( + const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc); + ddog_prof_FunctionId2 intern_function_ids(const ddog_prof_ProfilesDictionary *dict, ddog_prof_StringId2 name_id, ddog_prof_StringId2 file_id, diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index df846a40e..360dc50e0 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -44,6 +44,40 @@ ddog_prof_StringId2 intern_string(const ddog_prof_ProfilesDictionary *dict, return string_id; } +std::string_view get_string(const ddog_prof_ProfilesDictionary *dict, + ddog_prof_StringId2 string_id) { + if (!dict || !string_id) { + return {}; + } + ddog_CharSlice result{nullptr, 0}; + ddog_prof_Status status = + ddog_prof_ProfilesDictionary_get_str(&result, dict, string_id); + if (status.err != nullptr) { + ddog_prof_Status_drop(&status); + return {}; + } + if (!result.ptr || result.len == 0) { + return {}; + } + return {result.ptr, result.len}; +} + +std::string_view get_location2_function_name( + const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc) { + if (!loc.function) { + return {}; + } + return get_string(dict, loc.function->name); +} + +std::string_view get_location2_mapping_filename( + const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc) { + if (!loc.mapping) { + return {}; + } + return get_string(dict, loc.mapping->filename); +} + ddog_prof_FunctionId2 intern_function_ids(const ddog_prof_ProfilesDictionary *dict, ddog_prof_StringId2 name_id, ddog_prof_StringId2 file_id, diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 8e4b27531..198c85b0a 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -122,7 +122,8 @@ bool is_ld(const std::string_view path) { return path.starts_with("ld-"); } -bool is_stack_complete(std::span locations) { +bool is_stack_complete(std::span locations, + const ddog_prof_ProfilesDictionary *dict) { static constexpr std::array s_expected_root_frames{ // Consider empty as OK (to avoid false incomplete frames) // If we have no symbols, we could still retrieve them in the backend. @@ -144,21 +145,21 @@ bool is_stack_complete(std::span locations) { } const auto &root_loc = locations.back(); - const std::string_view root_mapping{root_loc.mapping.filename.ptr, - root_loc.mapping.filename.len}; + const std::string_view root_mapping = + get_location2_mapping_filename(dict, root_loc); // If we are in ld.so (eg. during lib init before main) consider the stack as // complete if (is_ld(root_mapping)) { return true; } - const std::string_view root_func = - std::string_view(root_loc.function.name.ptr, root_loc.function.name.len); + const std::string_view root_func = get_location2_function_name(dict, root_loc); return std::find(s_expected_root_frames.begin(), s_expected_root_frames.end(), root_func) != s_expected_root_frames.end(); } -void ddprof_print_sample(std::span locations, +void ddprof_print_sample(std::span locations, + const ddog_prof_ProfilesDictionary *dict, uint64_t value, pid_t pid, pid_t tid, EventAggregationModePos value_mode_pos, const PerfWatcher &watcher) { @@ -172,11 +173,12 @@ void ddprof_print_sample(std::span locations, if (loc_it != locations.rbegin()) { buf += ";"; } - // Access function name and source file from location - const std::string_view function_name{loc_it->function.name.ptr, - loc_it->function.name.len}; - const std::string_view source_file{loc_it->function.filename.ptr, - loc_it->function.filename.len}; + const std::string_view function_name = + get_location2_function_name(dict, *loc_it); + std::string_view source_file; + if (loc_it->function) { + source_file = get_string(dict, loc_it->function->file_name); + } if (!function_name.empty()) { // Append the function name, trimming at the first '(' if present. buf += function_name.substr(0, function_name.find('(')); @@ -389,7 +391,8 @@ DDRes process_symbolization( // check if unwinding stops on a frame that makes sense if (write_index < (kMaxStackDepth - 1) && write_index >= 1 && !is_stack_complete( - std::span{locations_buff.data(), write_index})) { + std::span{locations2_buff.data(), write_index}, + dict)) { // Write a common frame to indicate an incomplete stack write_location(0, k_common_frame_names[incomplete_stack], {}, 0, {}, &locations_buff[write_index]); @@ -589,7 +592,8 @@ DDRes pprof_aggregate_interned_sample( pprof->_dict_label_key_ids, std::span{labels}); if (show_samples) { - ddprof_print_sample(std::span{locations_buff.data(), write_index}, + ddprof_print_sample(std::span{locations2_buff.data(), write_index}, + symbol_hdr.profiles_dictionary(), pack.value, uw_output->pid, uw_output->tid, value_pos, *watcher); } From 9e927fb4e057789a44631ffab05e7d6491b2762f Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 10:54:55 +0100 Subject: [PATCH 7/9] String interning - plug libdatadog dict in the symbolizer APIs --- include/ddog_profiling_utils.hpp | 32 ++----- include/symbolizer.hpp | 10 ++- src/ddog_profiling_utils.cc | 140 ++++++------------------------- src/pprof/ddprof_pprof.cc | 37 +++----- src/symbolizer.cc | 24 ++---- 5 files changed, 57 insertions(+), 186 deletions(-) diff --git a/include/ddog_profiling_utils.hpp b/include/ddog_profiling_utils.hpp index a6ff49529..4a959cd72 100644 --- a/include/ddog_profiling_utils.hpp +++ b/include/ddog_profiling_utils.hpp @@ -52,35 +52,17 @@ Symbol make_symbol(std::string symname, std::string demangled_name, uint32_t lineno, std::string srcpath, const ddog_prof_ProfilesDictionary *dict); -void write_function(const Symbol &symbol, - const ddog_prof_ProfilesDictionary *dict, - ddog_prof_Function *ffi_func); - -void write_function(std::string_view demangled_name, std::string_view file_name, - ddog_prof_Function *ffi_func); - -void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping); - -void write_location(const FunLoc &loc, const MapInfo &mapinfo, - const Symbol &symbol, - const ddog_prof_ProfilesDictionary *dict, - ddog_prof_Location *ffi_location); - -void write_location(ProcessAddress_t ip_or_elf_addr, - std::string_view demangled_name, std::string_view file_name, - uint32_t lineno, const MapInfo &mapinfo, - ddog_prof_Location *ffi_location); - void write_location2(const FunLoc &loc, const MapInfo &mapinfo, const Symbol &symbol, ddog_prof_Location2 *ffi_location); -DDRes write_location2_from_location(const ddog_prof_ProfilesDictionary *dict, - const ddog_prof_Location &src, - ddog_prof_Location2 *dst); - -DDRes write_location_blaze( +DDRes write_location2_blaze( ElfAddress_t elf_addr, ddprof::HeterogeneousLookupStringMap &demangled_names, const MapInfo &mapinfo, const blaze_sym &blaze_sym, unsigned &cur_loc, - std::span locations_buff); + const ddog_prof_ProfilesDictionary *dict, + std::span locations_buff); + +void write_location2_no_sym(ElfAddress_t ip, const MapInfo &mapinfo, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Location2 *ffi_location); } // namespace ddprof diff --git a/include/symbolizer.hpp b/include/symbolizer.hpp index 61fec58ec..72edf03cb 100644 --- a/include/symbolizer.hpp +++ b/include/symbolizer.hpp @@ -16,7 +16,8 @@ #include #include -struct ddog_prof_Location; +struct ddog_prof_Location2; +struct ddog_prof_ProfilesDictionary; namespace ddprof { class Symbolizer { @@ -62,17 +63,18 @@ class Symbolizer { /// assumption is that all addresses are from this source file /// Parameters /// addrs - Elf address - /// process_addrs - Process address (only used for pprof reporting) /// file_id - a way to identify this file in a unique way /// elf_src - a path to the source file (idealy stable) /// map_info - the mapping information to write to the pprof - /// locations - the output pprof strucure + /// dict - the profiling dictionary for string interning + /// locations - the output pprof structure (Location2 with interned IDs) /// write_index - input / output parameter updated based on what is written /// results - A handle object for lifetime of strings. /// Should be kept until interned strings are no longer needed. DDRes symbolize_pprof(std::span addrs, FileInfoId_t file_id, const std::string &elf_src, const MapInfo &map_info, - std::span locations, + const ddog_prof_ProfilesDictionary *dict, + std::span locations, unsigned &write_index, BlazeResultsWrapper &results); int remove_unvisited(); void reset_unvisited_flag(); diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index 360dc50e0..d84c5ddeb 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -146,66 +146,6 @@ Symbol make_symbol(std::string symname, std::string demangled_name, return Symbol(name_id, file_id, lineno, function_id); } -namespace { -ddog_CharSlice get_dict_string(const ddog_prof_ProfilesDictionary *dict, - ddog_prof_StringId2 string_id) { - if (!dict || !string_id) { - return {.ptr = nullptr, .len = 0}; - } - ddog_CharSlice result{nullptr, 0}; - ddog_prof_Status status = - ddog_prof_ProfilesDictionary_get_str(&result, dict, string_id); - if (status.err != nullptr) { - ddog_prof_Status_drop(&status); - return {.ptr = nullptr, .len = 0}; - } - return result; -} -} // namespace - -void write_function(const Symbol &symbol, - const ddog_prof_ProfilesDictionary *dict, - ddog_prof_Function *ffi_func) { - ffi_func->name = get_dict_string(dict, symbol._name_id); - ffi_func->system_name = {.ptr = nullptr, .len = 0}; - ffi_func->filename = get_dict_string(dict, symbol._file_id); -} - -void write_function(std::string_view demangled_name, std::string_view file_name, - ddog_prof_Function *ffi_func) { - ffi_func->name = to_CharSlice(demangled_name); - ffi_func->system_name = {.ptr = nullptr, .len = 0}; - ffi_func->filename = to_CharSlice(file_name); -} - -void write_mapping(const MapInfo &mapinfo, ddog_prof_Mapping *ffi_mapping) { - ffi_mapping->memory_start = mapinfo._low_addr; - ffi_mapping->memory_limit = mapinfo._high_addr; - ffi_mapping->file_offset = mapinfo._offset; - ffi_mapping->filename = to_CharSlice(mapinfo._sopath); - ffi_mapping->build_id = to_CharSlice(mapinfo._build_id); -} - -void write_location(const FunLoc &loc, const MapInfo &mapinfo, - const Symbol &symbol, - const ddog_prof_ProfilesDictionary *dict, - ddog_prof_Location *ffi_location) { - write_mapping(mapinfo, &ffi_location->mapping); - write_function(symbol, dict, &ffi_location->function); - ffi_location->address = loc.elf_addr; - ffi_location->line = symbol._lineno; -} - -void write_location(ProcessAddress_t ip_or_elf_addr, - std::string_view demangled_name, std::string_view file_name, - uint32_t lineno, const MapInfo &mapinfo, - ddog_prof_Location *ffi_location) { - write_mapping(mapinfo, &ffi_location->mapping); - write_function(demangled_name, file_name, &ffi_location->function); - ffi_location->address = ip_or_elf_addr; - ffi_location->line = lineno; -} - void write_location2(const FunLoc &loc, const MapInfo &mapinfo, const Symbol &symbol, ddog_prof_Location2 *ffi_location) { ffi_location->mapping = mapinfo._mapping_id; @@ -214,53 +154,21 @@ void write_location2(const FunLoc &loc, const MapInfo &mapinfo, ffi_location->line = symbol._lineno; } -DDRes write_location2_from_location(const ddog_prof_ProfilesDictionary *dict, - const ddog_prof_Location &src, - ddog_prof_Location2 *dst) { - if (!dict) { - return ddres_error(DD_WHAT_PPROF); - } - auto to_sv = [](ddog_CharSlice slice) -> std::string_view { - if (!slice.ptr || slice.len == 0) { - return {}; - } - return {slice.ptr, slice.len}; - }; - const std::string_view fn_name = to_sv(src.function.name); - const std::string_view fn_file = to_sv(src.function.filename); - const std::string_view fn_system = to_sv(src.function.system_name); - ddog_prof_FunctionId2 function_id = - intern_function(dict, fn_name, fn_file, fn_system); - if (function_id == nullptr) { - return ddres_error(DD_WHAT_PPROF); - } - ddog_prof_Mapping2 mapping = { - .memory_start = src.mapping.memory_start, - .memory_limit = src.mapping.memory_limit, - .file_offset = src.mapping.file_offset, - .filename = intern_string(dict, to_sv(src.mapping.filename)), - .build_id = intern_string(dict, to_sv(src.mapping.build_id)), - }; - ddog_prof_MappingId2 mapping_id = nullptr; - ddog_prof_Status status = - ddog_prof_ProfilesDictionary_insert_mapping(&mapping_id, dict, &mapping); - if (status.err != nullptr) { - LG_WRN("Failed to intern mapping: %s", status.err); - ddog_prof_Status_drop(&status); - return ddres_error(DD_WHAT_PPROF); - } - dst->mapping = mapping_id; - dst->function = function_id; - dst->address = src.address; - dst->line = src.line; - return {}; +void write_location2_no_sym(ElfAddress_t ip, const MapInfo &mapinfo, + const ddog_prof_ProfilesDictionary *dict, + ddog_prof_Location2 *ffi_location) { + ffi_location->mapping = mapinfo._mapping_id; + ffi_location->function = intern_function(dict, {}, mapinfo._sopath); + ffi_location->address = ip; + ffi_location->line = 0; } -DDRes write_location_blaze( +DDRes write_location2_blaze( ElfAddress_t elf_addr, ddprof::HeterogeneousLookupStringMap &demangled_names, const MapInfo &mapinfo, const blaze_sym &blaze_sym, unsigned &cur_loc, - std::span locations_buff) { + const ddog_prof_ProfilesDictionary *dict, + std::span locations_buff) { if (cur_loc >= locations_buff.size()) { return ddres_warn(DD_WHAT_UW_MAX_DEPTH); } @@ -269,31 +177,35 @@ DDRes write_location_blaze( for (int i = blaze_sym.inlined_cnt - 1; i >= 0 && cur_loc < kMaxStackDepth; --i) { const blaze_symbolize_inlined_fn *inlined_fn = blaze_sym.inlined + i; - ddog_prof_Location &ffi_location = locations_buff[cur_loc]; + ddog_prof_Location2 &ffi_location = locations_buff[cur_loc]; const std::string_view demangled_name = inlined_fn->name ? get_or_insert_demangled_sym(inlined_fn->name, demangled_names) : undef_inlined; - write_location(elf_addr, demangled_name, - inlined_fn->code_info.file - ? std::string_view(inlined_fn->code_info.file) - : mapinfo._sopath, - inlined_fn->code_info.line, mapinfo, &ffi_location); + const std::string_view file_name = inlined_fn->code_info.file + ? std::string_view(inlined_fn->code_info.file) + : mapinfo._sopath; + ffi_location.mapping = mapinfo._mapping_id; + ffi_location.function = intern_function(dict, demangled_name, file_name); + ffi_location.address = elf_addr; + ffi_location.line = inlined_fn->code_info.line; ++cur_loc; } if (cur_loc >= locations_buff.size()) { return ddres_warn(DD_WHAT_UW_MAX_DEPTH); } - ddog_prof_Location &ffi_location = locations_buff[cur_loc]; + ddog_prof_Location2 &ffi_location = locations_buff[cur_loc]; const std::string_view demangled_name = blaze_sym.name ? get_or_insert_demangled_sym(blaze_sym.name, demangled_names) : undef; - write_location(elf_addr, demangled_name, - blaze_sym.code_info.file - ? std::string_view{blaze_sym.code_info.file} - : std::string_view{mapinfo._sopath}, - blaze_sym.code_info.line, mapinfo, &ffi_location); + const std::string_view file_name = blaze_sym.code_info.file + ? std::string_view{blaze_sym.code_info.file} + : std::string_view{mapinfo._sopath}; + ffi_location.mapping = mapinfo._mapping_id; + ffi_location.function = intern_function(dict, demangled_name, file_name); + ffi_location.address = elf_addr; + ffi_location.line = blaze_sym.code_info.line; ++cur_loc; return {}; } diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 198c85b0a..8f88b3c31 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -318,7 +318,6 @@ DDRes process_symbolization( std::span locs, const SymbolHdr &symbol_hdr, const FileInfoVector &file_infos, Symbolizer *symbolizer, const ddog_prof_ProfilesDictionary *dict, - std::array &locations_buff, std::array &locations2_buff, Symbolizer::BlazeResultsWrapper &session_results, unsigned &write_index) { unsigned index = 0; @@ -330,14 +329,10 @@ DDRes process_symbolization( // We wait for the incomplete frame to be added if needed. // By removing the incomplete frame and pushing logic to BE // we can simplify this loop - while (index < locs.size() - 1 && write_index < locations_buff.size()) { + while (index < locs.size() - 1 && write_index < locations2_buff.size()) { if (locs[index].symbol_idx != k_symbol_idx_null) { // Location already symbolized const FunLoc &loc = locs[index]; - write_location(loc, mapinfo_table[loc.map_info_idx], - symbol_table[loc.symbol_idx], - symbol_hdr.profiles_dictionary(), - &locations_buff[write_index]); write_location2(loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], &locations2_buff[write_index]); @@ -368,17 +363,11 @@ DDRes process_symbolization( } } // Perform symbolization for all collected addresses - const unsigned start_write_index = write_index; const DDRes res = symbolizer->symbolize_pprof( elf_addresses, file_id, current_file_path, - mapinfo_table[locs[start_index].map_info_idx], - std::span{locations_buff}, write_index, + mapinfo_table[locs[start_index].map_info_idx], dict, + std::span{locations2_buff}, write_index, session_results); - for (unsigned i = start_write_index; i < write_index; ++i) { - DDRES_CHECK_FWD( - write_location2_from_location(dict, locations_buff[i], - &locations2_buff[i])); - } if (IsDDResNotOK(res)) { if (IsDDResFatal(res)) { DDRES_RETURN_ERROR_LOG(DD_WHAT_SYMBOLIZER, "Failed to symbolize pprof"); @@ -394,11 +383,12 @@ DDRes process_symbolization( std::span{locations2_buff.data(), write_index}, dict)) { // Write a common frame to indicate an incomplete stack - write_location(0, k_common_frame_names[incomplete_stack], {}, 0, {}, - &locations_buff[write_index]); - DDRES_CHECK_FWD( - write_location2_from_location(dict, locations_buff[write_index], - &locations2_buff[write_index])); + ddog_prof_Location2 &incomplete_loc = locations2_buff[write_index]; + incomplete_loc.mapping = nullptr; + incomplete_loc.function = + intern_function(dict, k_common_frame_names[incomplete_stack], {}); + incomplete_loc.address = 0; + incomplete_loc.line = 0; ++write_index; } @@ -406,10 +396,6 @@ DDRes process_symbolization( if (write_index < kMaxStackDepth && locs.back().symbol_idx != k_symbol_idx_null) { const FunLoc &loc = locs.back(); - write_location(loc, mapinfo_table[loc.map_info_idx], - symbol_table[loc.symbol_idx], - symbol_hdr.profiles_dictionary(), - &locations_buff[write_index]); write_location2(loc, mapinfo_table[loc.map_info_idx], symbol_table[loc.symbol_idx], &locations2_buff[write_index]); @@ -574,7 +560,6 @@ DDRes pprof_aggregate_interned_sample( values[pprof_indices.pprof_count_index] = pack.count; } - std::array locations_buff; std::array locations2_buff; std::span locs{uw_output->locs}; locs = adjust_locations(watcher, locs); @@ -583,8 +568,8 @@ DDRes pprof_aggregate_interned_sample( unsigned write_index = 0; DDRES_CHECK_FWD(process_symbolization( locs, symbol_hdr, file_infos, symbolizer, - symbol_hdr.profiles_dictionary(), locations_buff, locations2_buff, - session_results, write_index)); + symbol_hdr.profiles_dictionary(), locations2_buff, session_results, + write_index)); std::array labels{}; const size_t labels_num = diff --git a/src/symbolizer.cc b/src/symbolizer.cc index 35492658f..c41008728 100644 --- a/src/symbolizer.cc +++ b/src/symbolizer.cc @@ -13,15 +13,6 @@ #include namespace ddprof { -namespace { -inline void write_location_no_sym(ElfAddress_t ip, const MapInfo &mapinfo, - ddog_prof_Location *ffi_location) { - write_mapping(mapinfo, &ffi_location->mapping); - // write empty with empty function name, to enable remote symbolization - write_function({}, mapinfo._sopath, &ffi_location->function); - ffi_location->address = ip; -} -} // namespace int Symbolizer::remove_unvisited() { // Remove all unvisited blaze_symbolizer instances from the map @@ -56,7 +47,8 @@ DDRes Symbolizer::symbolize_pprof(std::span elf_addrs, FileInfoId_t file_id, const std::string &elf_src, const MapInfo &map_info, - std::span locations, + const ddog_prof_ProfilesDictionary *dict, + std::span locations, unsigned &write_index, BlazeResultsWrapper &results) { if (elf_addrs.empty() || elf_src.empty()) { @@ -95,21 +87,19 @@ DDRes Symbolizer::symbolize_pprof(std::span elf_addrs, "Symbolizer: Mismatch between size of returned " "symbols and size of given elf addresses"); results.blaze_results.push_back(blaze_res); - // Demangling cache based on stability of unordered map - // This will be moved to the backend for (size_t i = 0; i < blaze_res->cnt && i < elf_addrs.size(); ++i) { const blaze_sym *cur_sym = blaze_res->syms + i; if (cur_sym->addr == 0) { // Some binaries expose a single symbol at address 0 (ex: // DD_AGENT_V1). Avoid emitting it so the backend still attempts // symbolication. - write_location_no_sym(elf_addrs[i], map_info, - &locations[write_index++]); + write_location2_no_sym(elf_addrs[i], map_info, dict, + &locations[write_index++]); continue; } - DDRES_CHECK_FWD(write_location_blaze( + DDRES_CHECK_FWD(write_location2_blaze( elf_addrs[i], symbolizer_wrapper.demangled_names, map_info, - *cur_sym, write_index, locations)); + *cur_sym, write_index, dict, locations)); } return {}; } @@ -119,7 +109,7 @@ DDRes Symbolizer::symbolize_pprof(std::span elf_addrs, // This can happen when file descriptors are exhausted // OR symbolization is disabled for (auto el : elf_addrs) { - write_location_no_sym(el, map_info, &locations[write_index++]); + write_location2_no_sym(el, map_info, dict, &locations[write_index++]); } return {}; From a2d730734b2b40af67e1c9aba0f8bda24ccb7cb1 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 11:17:00 +0100 Subject: [PATCH 8/9] String interning - Simplify the symbol object --- include/base_frame_symbol_lookup.hpp | 3 +- include/ddog_profiling_utils.hpp | 10 +++--- include/dso_symbol_lookup.hpp | 6 ++-- include/mapinfo_lookup.hpp | 3 +- include/symbol.hpp | 15 ++------- include/symbol_helper.hpp | 8 ++--- src/base_frame_symbol_lookup.cc | 24 ++++++-------- src/common_mapinfo_lookup.cc | 7 +++-- src/ddog_profiling_utils.cc | 21 ++++++------- src/dso_symbol_lookup.cc | 4 +-- src/mapinfo_lookup.cc | 6 ++-- src/pprof/ddprof_pprof.cc | 22 +++++++------ src/runtime_symbol_lookup.cc | 29 +++++++---------- src/symbol_hdr.cc | 1 - src/unwind_helper.cc | 5 +-- test/CMakeLists.txt | 15 ++++++--- test/ddprof_exporter-ut.cc | 2 +- test/ddprof_pprof-ut.cc | 8 ++--- test/runtime_symbol_lookup-ut.cc | 47 ++++++++++++++-------------- test/unwind_output_mock.hpp | 17 +++++----- 20 files changed, 121 insertions(+), 132 deletions(-) diff --git a/include/base_frame_symbol_lookup.hpp b/include/base_frame_symbol_lookup.hpp index aa7880ae3..52dd5af0c 100644 --- a/include/base_frame_symbol_lookup.hpp +++ b/include/base_frame_symbol_lookup.hpp @@ -15,8 +15,7 @@ namespace ddprof { class BaseFrameSymbolLookup { public: SymbolIdx_t get_or_insert(pid_t pid, SymbolTable &symbol_table, - DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr, + DsoSymbolLookup &dso_symbol_lookup, DsoHdr &dso_hdr, const ddog_prof_ProfilesDictionary *dict); // Erase symbol lookup for this pid (warning symbols still exist) diff --git a/include/ddog_profiling_utils.hpp b/include/ddog_profiling_utils.hpp index 4a959cd72..b4e5f53dc 100644 --- a/include/ddog_profiling_utils.hpp +++ b/include/ddog_profiling_utils.hpp @@ -29,11 +29,13 @@ ddog_prof_StringId2 intern_string(const ddog_prof_ProfilesDictionary *dict, std::string_view get_string(const ddog_prof_ProfilesDictionary *dict, ddog_prof_StringId2 string_id); -std::string_view get_location2_function_name( - const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc); +std::string_view +get_location2_function_name(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location2 &loc); -std::string_view get_location2_mapping_filename( - const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc); +std::string_view +get_location2_mapping_filename(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location2 &loc); ddog_prof_FunctionId2 intern_function_ids(const ddog_prof_ProfilesDictionary *dict, diff --git a/include/dso_symbol_lookup.hpp b/include/dso_symbol_lookup.hpp index 4ed20793f..01932aff0 100644 --- a/include/dso_symbol_lookup.hpp +++ b/include/dso_symbol_lookup.hpp @@ -31,9 +31,9 @@ class DsoSymbolLookup { private: size_t get_size() const; - SymbolIdx_t get_or_insert_unhandled_type( - const Dso &dso, SymbolTable &symbol_table, - const ddog_prof_ProfilesDictionary *dict); + SymbolIdx_t + get_or_insert_unhandled_type(const Dso &dso, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict); // map of maps --> the aim is to monitor usage of some maps and clear them // together // TODO : find efficient clear on symbol table before we do this diff --git a/include/mapinfo_lookup.hpp b/include/mapinfo_lookup.hpp index 7a2535a79..082d67745 100644 --- a/include/mapinfo_lookup.hpp +++ b/include/mapinfo_lookup.hpp @@ -22,8 +22,7 @@ namespace ddprof { class MapInfoLookup { public: MapInfoIdx_t get_or_insert(pid_t pid, MapInfoTable &mapinfo_table, - const Dso &dso, - std::optional build_id, + const Dso &dso, std::optional build_id, const ddog_prof_ProfilesDictionary *dict); void erase(pid_t pid) { // table elements are not removed (TODO to gain memory usage) diff --git a/include/symbol.hpp b/include/symbol.hpp index 84245a3cf..762d453aa 100644 --- a/include/symbol.hpp +++ b/include/symbol.hpp @@ -7,9 +7,6 @@ #include "ddprof_defs.hpp" -struct ddog_prof_Function2; -struct ddog_prof_StringHeader; - struct ddog_prof_Function2; // Symbol @@ -20,18 +17,10 @@ class Symbol { public: Symbol() : _lineno(0) {} - Symbol(ddog_prof_StringHeader *name_id, ddog_prof_StringHeader *file_id, - uint32_t lineno, ddog_prof_Function2 *function_id) - : _name_id(name_id), - _file_id(file_id), - _lineno(lineno), - _function_id(function_id) {} + Symbol(uint32_t lineno, ddog_prof_Function2 *function_id) + : _lineno(lineno), _function_id(function_id) {} - // OUTPUT OF LINE INFO - ddog_prof_StringHeader *_name_id{nullptr}; - ddog_prof_StringHeader *_file_id{nullptr}; uint32_t _lineno; - ddog_prof_Function2 *_function_id{nullptr}; }; } // namespace ddprof diff --git a/include/symbol_helper.hpp b/include/symbol_helper.hpp index 38e706238..8662954c5 100644 --- a/include/symbol_helper.hpp +++ b/include/symbol_helper.hpp @@ -6,8 +6,8 @@ #include "unwind_state.hpp" #include -#include #include +#include #include #include @@ -43,12 +43,12 @@ std::vector collect_symbols(UnwindState &state, } else { // Lookup the symbol from the symbol table. auto &symbol = symbol_table[state.output.locs[iloc].symbol_idx]; - if (!dict || !symbol._name_id) { + if (!dict || !symbol._function_id) { demangled_name = "unknown"; } else { ddog_CharSlice slice{nullptr, 0}; - ddog_prof_Status status = - ddog_prof_ProfilesDictionary_get_str(&slice, dict, symbol._name_id); + ddog_prof_Status status = ddog_prof_ProfilesDictionary_get_str( + &slice, dict, symbol._function_id->name); if (status.err != nullptr) { ddog_prof_Status_drop(&status); demangled_name = "unknown"; diff --git a/src/base_frame_symbol_lookup.cc b/src/base_frame_symbol_lookup.cc index 843270b48..c30b26f1f 100644 --- a/src/base_frame_symbol_lookup.cc +++ b/src/base_frame_symbol_lookup.cc @@ -16,16 +16,14 @@ namespace ddprof { namespace { Symbol symbol_from_pid(pid_t pid, const ddog_prof_ProfilesDictionary *dict) { - return make_symbol(std::string(), std::string(), 0, - absl::StrCat("pid_", pid), dict); + return make_symbol(std::string(), std::string(), 0, absl::StrCat("pid_", pid), + dict); } } // namespace -SymbolIdx_t -BaseFrameSymbolLookup::insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, - DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr, - const ddog_prof_ProfilesDictionary *dict) { +SymbolIdx_t BaseFrameSymbolLookup::insert_bin_symbol( + pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, + DsoHdr &dso_hdr, const ddog_prof_ProfilesDictionary *dict) { SymbolIdx_t symbol_idx = -1; DsoHdr::DsoFindRes const find_res = @@ -54,11 +52,9 @@ BaseFrameSymbolLookup::insert_bin_symbol(pid_t pid, SymbolTable &symbol_table, return symbol_idx; } -SymbolIdx_t -BaseFrameSymbolLookup::get_or_insert(pid_t pid, SymbolTable &symbol_table, - DsoSymbolLookup &dso_symbol_lookup, - DsoHdr &dso_hdr, - const ddog_prof_ProfilesDictionary *dict) { +SymbolIdx_t BaseFrameSymbolLookup::get_or_insert( + pid_t pid, SymbolTable &symbol_table, DsoSymbolLookup &dso_symbol_lookup, + DsoHdr &dso_hdr, const ddog_prof_ProfilesDictionary *dict) { auto const it_bin = _bin_map.find(pid); auto const it_pid = _pid_map.find(pid); @@ -69,8 +65,8 @@ BaseFrameSymbolLookup::get_or_insert(pid_t pid, SymbolTable &symbol_table, // attempt k nb times to look for binary info if (it_pid == _pid_map.end() || ++it_pid->second._nb_bin_lookups < k_nb_bin_lookups) { - symbol_idx = - insert_bin_symbol(pid, symbol_table, dso_symbol_lookup, dso_hdr, dict); + symbol_idx = insert_bin_symbol(pid, symbol_table, dso_symbol_lookup, + dso_hdr, dict); } } if (symbol_idx == -1 && it_pid != _pid_map.end()) { diff --git a/src/common_mapinfo_lookup.cc b/src/common_mapinfo_lookup.cc index c7fde1f67..f9798f976 100644 --- a/src/common_mapinfo_lookup.cc +++ b/src/common_mapinfo_lookup.cc @@ -21,8 +21,8 @@ MapInfo mapinfo_from_common(CommonMapInfoLookup::MappingErrors lookup_case) { } // namespace MapInfoIdx_t CommonMapInfoLookup::get_or_insert( - CommonMapInfoLookup::MappingErrors lookup_case, - MapInfoTable &mapinfo_table, const ddog_prof_ProfilesDictionary *dict) { + CommonMapInfoLookup::MappingErrors lookup_case, MapInfoTable &mapinfo_table, + const ddog_prof_ProfilesDictionary *dict) { auto const it = _map.find(lookup_case); MapInfoIdx_t res; if (it != _map.end()) { @@ -30,7 +30,8 @@ MapInfoIdx_t CommonMapInfoLookup::get_or_insert( } else { // insert things res = mapinfo_table.size(); mapinfo_table.push_back(mapinfo_from_common(lookup_case)); - mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); + mapinfo_table.back()._mapping_id = + intern_mapping(dict, mapinfo_table.back()); _map.insert({lookup_case, res}); } return res; diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index d84c5ddeb..ddbdc1074 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -62,16 +62,18 @@ std::string_view get_string(const ddog_prof_ProfilesDictionary *dict, return {result.ptr, result.len}; } -std::string_view get_location2_function_name( - const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc) { +std::string_view +get_location2_function_name(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location2 &loc) { if (!loc.function) { return {}; } return get_string(dict, loc.function->name); } -std::string_view get_location2_mapping_filename( - const ddog_prof_ProfilesDictionary *dict, const ddog_prof_Location2 &loc) { +std::string_view +get_location2_mapping_filename(const ddog_prof_ProfilesDictionary *dict, + const ddog_prof_Location2 &loc) { if (!loc.mapping) { return {}; } @@ -91,9 +93,8 @@ intern_function_ids(const ddog_prof_ProfilesDictionary *dict, .file_name = file_id, }; ddog_prof_FunctionId2 function_id = nullptr; - ddog_prof_Status status = - ddog_prof_ProfilesDictionary_insert_function(&function_id, dict, - &function); + ddog_prof_Status status = ddog_prof_ProfilesDictionary_insert_function( + &function_id, dict, &function); if (status.err != nullptr) { LG_WRN("Failed to intern function: %s", status.err); ddog_prof_Status_drop(&status); @@ -139,11 +140,9 @@ Symbol make_symbol(std::string symname, std::string demangled_name, uint32_t lineno, std::string srcpath, const ddog_prof_ProfilesDictionary *dict) { [[maybe_unused]] std::string ignored_symname = std::move(symname); - ddog_prof_StringId2 name_id = intern_string(dict, demangled_name); - ddog_prof_StringId2 file_id = intern_string(dict, srcpath); ddog_prof_FunctionId2 function_id = - intern_function_ids(dict, name_id, file_id, DDOG_PROF_STRINGID2_EMPTY); - return Symbol(name_id, file_id, lineno, function_id); + intern_function(dict, demangled_name, srcpath); + return Symbol(lineno, function_id); } void write_location2(const FunLoc &loc, const MapInfo &mapinfo, diff --git a/src/dso_symbol_lookup.cc b/src/dso_symbol_lookup.cc index 9c0f4e953..788d8225c 100644 --- a/src/dso_symbol_lookup.cc +++ b/src/dso_symbol_lookup.cc @@ -20,8 +20,8 @@ namespace { Symbol symbol_from_unhandled_dso(const Dso &dso, const ddog_prof_ProfilesDictionary *dict) { - return make_symbol(std::string(), std::string(), 0, - dso_type_str(dso._type), dict); + return make_symbol(std::string(), std::string(), 0, dso_type_str(dso._type), + dict); } Symbol symbol_from_dso(ElfAddress_t normalized_addr, const Dso &dso, diff --git a/src/mapinfo_lookup.cc b/src/mapinfo_lookup.cc index ee7093c2f..65aafa9ba 100644 --- a/src/mapinfo_lookup.cc +++ b/src/mapinfo_lookup.cc @@ -12,8 +12,7 @@ namespace ddprof { MapInfoIdx_t MapInfoLookup::get_or_insert(pid_t pid, MapInfoTable &mapinfo_table, - const Dso &dso, - std::optional build_id, + const Dso &dso, std::optional build_id, const ddog_prof_ProfilesDictionary *dict) { MapInfoAddrMap &addr_map = _mapinfo_pidmap[pid]; auto it = addr_map.find(dso._start); @@ -27,7 +26,8 @@ MapInfoLookup::get_or_insert(pid_t pid, MapInfoTable &mapinfo_table, mapinfo_table.emplace_back(dso._start, dso._end, dso._offset, std::move(sname_str), build_id ? *build_id : BuildIdStr{}); - mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); + mapinfo_table.back()._mapping_id = + intern_mapping(dict, mapinfo_table.back()); addr_map.emplace(dso._start, map_info_idx); return map_info_idx; } diff --git a/src/pprof/ddprof_pprof.cc b/src/pprof/ddprof_pprof.cc index 8f88b3c31..e40118b71 100644 --- a/src/pprof/ddprof_pprof.cc +++ b/src/pprof/ddprof_pprof.cc @@ -61,7 +61,8 @@ void init_dict_label_key_ids(DDProfPProf::DictLabelKeyIds &label_keys, label_keys.tracepoint_type = intern_string(dict, k_tracepoint_label); } -size_t prepare_labels2(const UnwindOutput &uw_output, const PerfWatcher &watcher, +size_t prepare_labels2(const UnwindOutput &uw_output, + const PerfWatcher &watcher, std::unordered_map &pid_strs, const DDProfPProf::DictLabelKeyIds &label_keys, std::span labels) { @@ -153,7 +154,8 @@ bool is_stack_complete(std::span locations, return true; } - const std::string_view root_func = get_location2_function_name(dict, root_loc); + const std::string_view root_func = + get_location2_function_name(dict, root_loc); return std::find(s_expected_root_frames.begin(), s_expected_root_frames.end(), root_func) != s_expected_root_frames.end(); } @@ -566,10 +568,10 @@ DDRes pprof_aggregate_interned_sample( Symbolizer::BlazeResultsWrapper session_results; unsigned write_index = 0; - DDRES_CHECK_FWD(process_symbolization( - locs, symbol_hdr, file_infos, symbolizer, - symbol_hdr.profiles_dictionary(), locations2_buff, session_results, - write_index)); + DDRES_CHECK_FWD( + process_symbolization(locs, symbol_hdr, file_infos, symbolizer, + symbol_hdr.profiles_dictionary(), locations2_buff, + session_results, write_index)); std::array labels{}; const size_t labels_num = @@ -578,9 +580,8 @@ DDRes pprof_aggregate_interned_sample( if (show_samples) { ddprof_print_sample(std::span{locations2_buff.data(), write_index}, - symbol_hdr.profiles_dictionary(), - pack.value, uw_output->pid, uw_output->tid, value_pos, - *watcher); + symbol_hdr.profiles_dictionary(), pack.value, + uw_output->pid, uw_output->tid, value_pos, *watcher); } ddog_prof_Sample2 const sample = { @@ -589,7 +590,8 @@ DDRes pprof_aggregate_interned_sample( .labels = {.ptr = labels.data(), .len = labels_num}, }; - ddog_prof_Status res = ddog_prof_Profile_add2(profile, sample, pack.timestamp); + ddog_prof_Status res = + ddog_prof_Profile_add2(profile, sample, pack.timestamp); if (res.err != nullptr) { defer { ddog_prof_Status_drop(&res); }; DDRES_RETURN_ERROR_LOG(DD_WHAT_PPROF, "Unable to add profile: %s", res.err); diff --git a/src/runtime_symbol_lookup.cc b/src/runtime_symbol_lookup.cc index b58301ebe..670fec988 100644 --- a/src/runtime_symbol_lookup.cc +++ b/src/runtime_symbol_lookup.cc @@ -70,14 +70,14 @@ bool RuntimeSymbolLookup::insert_or_replace( symbol_map.emplace_hint( find_res.first, address, SymbolSpan(address + code_size - 1, symbol_table.size())); - symbol_table.emplace_back(make_symbol(std::string(symbol), - std::string(symbol), 0, "jit", - dict)); + symbol_table.emplace_back( + make_symbol(std::string(symbol), std::string(symbol), 0, "jit", dict)); } else { // todo managing range erase (we can overall with other syms) SymbolIdx_t const existing = find_res.first->second.get_symbol_idx(); ddog_prof_StringId2 name_id = intern_string(dict, symbol); - if (symbol_table[existing]._name_id == name_id) { + if (symbol_table[existing]._function_id && + symbol_table[existing]._function_id->name == name_id) { find_res.first->second.set_end(address + code_size - 1); } else { // remove current element (as start can be different) @@ -85,11 +85,7 @@ bool RuntimeSymbolLookup::insert_or_replace( symbol_map.emplace(address, SymbolSpan(address + code_size - 1, existing)); Symbol &existing_symbol = symbol_table[existing]; - existing_symbol._name_id = name_id; - existing_symbol._file_id = intern_string(dict, "jit"); - existing_symbol._function_id = intern_function_ids( - dict, existing_symbol._name_id, existing_symbol._file_id, - DDOG_PROF_STRINGID2_EMPTY); + existing_symbol._function_id = intern_function(dict, symbol, "jit"); } } @@ -173,11 +169,9 @@ DDRes RuntimeSymbolLookup::fill_from_perfmap( return {}; } -SymbolIdx_t -RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, - SymbolTable &symbol_table, - const ddog_prof_ProfilesDictionary *dict, - std::string_view jitdump_path) { +SymbolIdx_t RuntimeSymbolLookup::get_or_insert_jitdump( + pid_t pid, ProcessAddress_t pc, SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict, std::string_view jitdump_path) { SymbolInfo &symbol_info = _pid_map[pid]; SymbolMap::FindRes find_res = symbol_info._map.find_closest(pc); if (!find_res.second && !has_lookup_failure(symbol_info, jitdump_path)) { @@ -199,9 +193,10 @@ RuntimeSymbolLookup::get_or_insert_jitdump(pid_t pid, ProcessAddress_t pc, return find_res.second ? find_res.first->second.get_symbol_idx() : -1; } -SymbolIdx_t RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, - SymbolTable &symbol_table, - const ddog_prof_ProfilesDictionary *dict) { +SymbolIdx_t +RuntimeSymbolLookup::get_or_insert(pid_t pid, ProcessAddress_t pc, + SymbolTable &symbol_table, + const ddog_prof_ProfilesDictionary *dict) { SymbolInfo &symbol_info = _pid_map[pid]; SymbolMap::FindRes find_res = symbol_info._map.find_closest(pc); diff --git a/src/symbol_hdr.cc b/src/symbol_hdr.cc index a182c00e1..4552b5bbe 100644 --- a/src/symbol_hdr.cc +++ b/src/symbol_hdr.cc @@ -32,4 +32,3 @@ SymbolHdr::SymbolHdr(std::string_view path_to_proc) } } // namespace ddprof - diff --git a/src/unwind_helper.cc b/src/unwind_helper.cc index eb3ddcd50..ce88b149f 100644 --- a/src/unwind_helper.cc +++ b/src/unwind_helper.cc @@ -53,8 +53,9 @@ void add_common_frame(UnwindState *us, SymbolErrors lookup_case) { const ddog_prof_ProfilesDictionary *dict = us->symbol_hdr.profiles_dictionary(); add_frame_without_mapping( - us, us->symbol_hdr._common_symbol_lookup.get_or_insert( - lookup_case, us->symbol_hdr._symbol_table, dict)); + us, + us->symbol_hdr._common_symbol_lookup.get_or_insert( + lookup_case, us->symbol_hdr._symbol_table, dict)); } void add_dso_frame(UnwindState *us, const Dso &dso, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 99514d4f0..8d70bd677 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -359,11 +359,16 @@ add_unit_test(timer-ut timer-ut.cc ../src/tsc_clock.cc ../src/perf.cc) add_unit_test(ddprof_file_info-ut ddprof_file_info-ut.cc) -add_unit_test(runtime_symbol_lookup-ut runtime_symbol_lookup-ut.cc ../src/runtime_symbol_lookup.cc - ../src/ddog_profiling_utils.cc ../src/symbol_hdr.cc - ../src/demangler/demangler.cc ../src/symbol_map.cc - ../src/jit/jitdump.cc - LIBRARIES Datadog::Profiling llvm-demangle) +add_unit_test( + runtime_symbol_lookup-ut + runtime_symbol_lookup-ut.cc + ../src/runtime_symbol_lookup.cc + ../src/ddog_profiling_utils.cc + ../src/symbol_hdr.cc + ../src/demangler/demangler.cc + ../src/symbol_map.cc + ../src/jit/jitdump.cc + LIBRARIES Datadog::Profiling llvm-demangle) add_unit_test(ddprof_cpumask-ut ddprof_cpumask-ut.cc ../src/ddprof_cpumask.cc) diff --git a/test/ddprof_exporter-ut.cc b/test/ddprof_exporter-ut.cc index 603da41a6..9a8e17999 100644 --- a/test/ddprof_exporter-ut.cc +++ b/test/ddprof_exporter-ut.cc @@ -162,7 +162,7 @@ TEST(DDProfExporter, simple) { DDProfContext ctx = {}; ctx.watchers.push_back(*ewatcher_from_str("sCPU")); res = pprof_create_profile(&pprofs, ctx, - symbol_hdr._profiles_dictionary.get()); + symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(IsDDResOK(res)); res = pprof_aggregate_interned_sample( &mock_output, symbol_hdr, {1000, 1, 0}, &ctx.watchers[0], file_infos, diff --git a/test/ddprof_pprof-ut.cc b/test/ddprof_pprof-ut.cc index 4a727e998..c0a1a676f 100644 --- a/test/ddprof_pprof-ut.cc +++ b/test/ddprof_pprof-ut.cc @@ -69,8 +69,8 @@ TEST(DDProfPProf, aggregate) { bool ok = watchers_from_str("sCPU", ctx.watchers); EXPECT_TRUE(ok); - DDRes res = pprof_create_profile(&pprof, ctx, - symbol_hdr._profiles_dictionary.get()); + DDRes res = + pprof_create_profile(&pprof, ctx, symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index != -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index != -1); res = pprof_aggregate_interned_sample( @@ -107,8 +107,8 @@ TEST(DDProfPProf, just_live) { log_watcher(&(ctx.watchers[0]), 0); log_watcher(&(ctx.watchers[1]), 1); - DDRes res = pprof_create_profile(&pprof, ctx, - symbol_hdr._profiles_dictionary.get()); + DDRes res = + pprof_create_profile(&pprof, ctx, symbol_hdr._profiles_dictionary.get()); EXPECT_TRUE(IsDDResOK(res)); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_index == -1); EXPECT_TRUE(ctx.watchers[0].pprof_indices[kSumPos].pprof_count_index == -1); diff --git a/test/runtime_symbol_lookup-ut.cc b/test/runtime_symbol_lookup-ut.cc index 094e5e17c..50338fa9a 100644 --- a/test/runtime_symbol_lookup-ut.cc +++ b/test/runtime_symbol_lookup-ut.cc @@ -38,9 +38,8 @@ TEST(runtime_symbol_lookup, no_map) { RuntimeSymbolLookup runtime_symbol_lookup(UNIT_TEST_DATA); ProcessAddress_t pc = 0x7FB0614BB980; // no pid 43 - SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(43, pc, symbol_table, - symbol_hdr.profiles_dictionary()); + SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert( + 43, pc, symbol_table, symbol_hdr.profiles_dictionary()); // We expect no symbols to be found for this pid ASSERT_EQ(symbol_idx, -1); } @@ -51,12 +50,12 @@ TEST(runtime_symbol_lookup, parse_map) { RuntimeSymbolLookup runtime_symbol_lookup(UNIT_TEST_DATA); // reads a file with symbols generated from .NET ProcessAddress_t pc = 0x7FB0614BB980; - SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(42, pc, symbol_table, - symbol_hdr.profiles_dictionary()); + SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert( + 42, pc, symbol_table, symbol_hdr.profiles_dictionary()); ASSERT_NE(symbol_idx, -1); - const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table[symbol_idx]._name_id); + const std::string sym_name = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._function_id->name); ASSERT_TRUE(sym_name.find("RuntimeEnvironmentInfo::get_OsPlatform") != std::string::npos); } @@ -68,20 +67,19 @@ TEST(runtime_symbol_lookup, overflow) { // reads a file with symbols generated from .NET { ProcessAddress_t pc = 0x00007FB06149E6A0; - SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(1, pc, symbol_table, - symbol_hdr.profiles_dictionary()); + SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert( + 1, pc, symbol_table, symbol_hdr.profiles_dictionary()); ASSERT_NE(symbol_idx, -1); - const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table[symbol_idx]._name_id); + const std::string sym_name = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._function_id->name); LG_NFO("%s", sym_name.c_str()); ASSERT_TRUE(sym_name.size() <= 300); } { ProcessAddress_t pc = 0xFFFFFFFFFFFFFFFE; - SymbolIdx_t symbol_idx = - runtime_symbol_lookup.get_or_insert(1, pc, symbol_table, - symbol_hdr.profiles_dictionary()); + SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert( + 1, pc, symbol_table, symbol_hdr.profiles_dictionary()); ASSERT_EQ(symbol_idx, -1); } } @@ -97,8 +95,9 @@ TEST(runtime_symbol_lookup, jitdump_simple) { SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table[symbol_idx]._name_id); + const std::string sym_name = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._function_id->name); ASSERT_EQ(std::string("julia_b_11"), sym_name); } @@ -174,8 +173,9 @@ TEST(runtime_symbol_lookup, relative_path) { SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( 42, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table[symbol_idx]._name_id); + const std::string sym_name = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._function_id->name); EXPECT_EQ(sym_name, "julia_b_11"); { RuntimeSymbolLookup::Stats stats = runtime_symbol_lookup.get_stats(); @@ -203,8 +203,9 @@ TEST(runtime_symbol_lookup, jitdump_vs_perfmap) { SymbolIdx_t symbol_idx = runtime_symbol_lookup.get_or_insert_jitdump( mypid, pc, symbol_table, symbol_hdr.profiles_dictionary(), jit_path); ASSERT_NE(symbol_idx, -1); - const std::string sym_name = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table[symbol_idx]._name_id); + const std::string sym_name = + dict_string(symbol_hdr.profiles_dictionary(), + symbol_table[symbol_idx]._function_id->name); EXPECT_EQ(sym_name, expected_sym); { RuntimeSymbolLookup::Stats stats = runtime_symbol_lookup.get_stats(); @@ -219,7 +220,7 @@ TEST(runtime_symbol_lookup, jitdump_vs_perfmap) { ASSERT_NE(symbol_idx, -1); const std::string sym_name_perfmap = dict_string(symbol_hdr.profiles_dictionary(), - symbol_table_perfmap[symbol_idx]._name_id); + symbol_table_perfmap[symbol_idx]._function_id->name); EXPECT_EQ(sym_name_perfmap, expected_sym); { RuntimeSymbolLookup::Stats stats = diff --git a/test/unwind_output_mock.hpp b/test/unwind_output_mock.hpp index 23eac86a3..baa48491c 100644 --- a/test/unwind_output_mock.hpp +++ b/test/unwind_output_mock.hpp @@ -30,9 +30,9 @@ static inline void fill_symbol_table_1(SymbolTable &symbol_table, const ddog_prof_ProfilesDictionary *dict) { for (unsigned i = 0; i < K_MOCK_LOC_SIZE; ++i) { - symbol_table.emplace_back(make_symbol( - std::string(s_syn_names[i]), std::string(s_func_names[i]), 10 * i, - std::string(s_src_paths[i]), dict)); + symbol_table.emplace_back(make_symbol(std::string(s_syn_names[i]), + std::string(s_func_names[i]), 10 * i, + std::string(s_src_paths[i]), dict)); } } @@ -42,7 +42,8 @@ fill_mapinfo_table_1(MapInfoTable &mapinfo_table, for (unsigned i = 0; i < K_MOCK_LOC_SIZE; ++i) { mapinfo_table.emplace_back(100 + i, 200 + i, 10 + i, std::string{s_so_paths[0]}, BuildIdStr{}); - mapinfo_table.back()._mapping_id = intern_mapping(dict, mapinfo_table.back()); + mapinfo_table.back()._mapping_id = + intern_mapping(dict, mapinfo_table.back()); } } @@ -58,10 +59,10 @@ static inline void fill_unwind_output_1(UnwindOutput &uw_output) { } } -static inline void fill_unwind_symbols(SymbolTable &symbol_table, - MapInfoTable &mapinfo_table, - UnwindOutput &uw_output, - const ddog_prof_ProfilesDictionary *dict) { +static inline void +fill_unwind_symbols(SymbolTable &symbol_table, MapInfoTable &mapinfo_table, + UnwindOutput &uw_output, + const ddog_prof_ProfilesDictionary *dict) { fill_symbol_table_1(symbol_table, dict); fill_mapinfo_table_1(mapinfo_table, dict); fill_unwind_output_1(uw_output); From 341431baa273980193eea0e299d16e9e1c187263 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Feb 2026 14:23:59 +0100 Subject: [PATCH 9/9] Linter fix - const correctness fix --- include/ddog_profiling_utils.hpp | 4 ++-- src/ddog_profiling_utils.cc | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/ddog_profiling_utils.hpp b/include/ddog_profiling_utils.hpp index b4e5f53dc..d4eb88914 100644 --- a/include/ddog_profiling_utils.hpp +++ b/include/ddog_profiling_utils.hpp @@ -50,8 +50,8 @@ ddog_prof_FunctionId2 intern_function(const ddog_prof_ProfilesDictionary *dict, ddog_prof_MappingId2 intern_mapping(const ddog_prof_ProfilesDictionary *dict, const MapInfo &mapinfo); -Symbol make_symbol(std::string symname, std::string demangled_name, - uint32_t lineno, std::string srcpath, +Symbol make_symbol(std::string symname, const std::string &demangled_name, + uint32_t lineno, const std::string &srcpath, const ddog_prof_ProfilesDictionary *dict); void write_location2(const FunLoc &loc, const MapInfo &mapinfo, diff --git a/src/ddog_profiling_utils.cc b/src/ddog_profiling_utils.cc index ddbdc1074..c6c7ec28f 100644 --- a/src/ddog_profiling_utils.cc +++ b/src/ddog_profiling_utils.cc @@ -87,7 +87,7 @@ intern_function_ids(const ddog_prof_ProfilesDictionary *dict, if (!dict) { return nullptr; } - ddog_prof_Function2 function = { + const ddog_prof_Function2 function = { .name = name_id, .system_name = system_name_id, .file_name = file_id, @@ -118,7 +118,7 @@ ddog_prof_MappingId2 intern_mapping(const ddog_prof_ProfilesDictionary *dict, if (!dict) { return nullptr; } - ddog_prof_Mapping2 mapping = { + const ddog_prof_Mapping2 mapping = { .memory_start = mapinfo._low_addr, .memory_limit = mapinfo._high_addr, .file_offset = mapinfo._offset, @@ -136,13 +136,13 @@ ddog_prof_MappingId2 intern_mapping(const ddog_prof_ProfilesDictionary *dict, return mapping_id; } -Symbol make_symbol(std::string symname, std::string demangled_name, - uint32_t lineno, std::string srcpath, +Symbol make_symbol(std::string symname, const std::string &demangled_name, + uint32_t lineno, const std::string &srcpath, const ddog_prof_ProfilesDictionary *dict) { - [[maybe_unused]] std::string ignored_symname = std::move(symname); + [[maybe_unused]] const std::string ignored_symname = std::move(symname); ddog_prof_FunctionId2 function_id = intern_function(dict, demangled_name, srcpath); - return Symbol(lineno, function_id); + return {lineno, function_id}; } void write_location2(const FunLoc &loc, const MapInfo &mapinfo,