From 70b1a33a8b567dbbdccc7c7215e5455072c0dd41 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 13:50:27 -0400 Subject: [PATCH 01/80] build with debug symbol by default --- setup.py | 29 +++++++++++++++-------------- src/native/Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index 1166cc31ac7..e205776b22a 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ HERE = Path(__file__).resolve().parent -COMPILE_MODE = "Release" +COMPILE_MODE = "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn( "The DD_COMPILE_DEBUG environment variable is deprecated and will be deleted, " @@ -62,7 +62,7 @@ ) COMPILE_MODE = "Debug" else: - COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", "Release") + COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", "RelWithDebInfo") FAST_BUILD = os.getenv("DD_FAST_BUILD", "false").lower() in ("1", "yes", "on", "true") if FAST_BUILD: @@ -422,7 +422,7 @@ def run(self): self.build_extension(ext) def build_rust(self): - is_release = True + is_release = COMPILE_MODE.lower() != "debug" build_crate(NATIVE_CRATE, is_release, native_features) target_dir = NATIVE_CRATE / "target" @@ -486,17 +486,18 @@ def is_installed(bin_file): @staticmethod def try_strip_symbols(so_file): - if CURRENT_OS == "Linux" and shutil.which("strip") is not None: - try: - subprocess.run(["strip", "-g", so_file], check=True) - except subprocess.CalledProcessError as e: - print( - "WARNING: stripping '{}' returned non-zero exit status ({}), ignoring".format(so_file, e.returncode) - ) - except Exception as e: - print( - "WARNING: An error occurred while stripping the symbols from '{}', ignoring: {}".format(so_file, e) - ) + pass + # if CURRENT_OS == "Linux" and shutil.which("strip") is not None: + # try: + # subprocess.run(["strip", "-g", so_file], check=True) + # except subprocess.CalledProcessError as e: + # print( + # "WARNING: stripping '{}' returned non-zero exit status ({}), ignoring".format(so_file, e.returncode) + # ) + # except Exception as e: + # print( + # "WARNING: An error occurred while stripping the symbols from '{}', ignoring: {}".format(so_file, e) + # ) def build_extension(self, ext): if isinstance(ext, CMakeExtension): diff --git a/src/native/Cargo.toml b/src/native/Cargo.toml index 924540d06fd..03e14cc91d3 100644 --- a/src/native/Cargo.toml +++ b/src/native/Cargo.toml @@ -6,7 +6,7 @@ resolver = "2" [profile.release] lto = true -strip = "debuginfo" +debug = "line-tables-only" opt-level = 's' codegen-units = 1 From d01f289d9ebb8d7dc75dd672ecf1f82860b5a867 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 15:07:13 -0400 Subject: [PATCH 02/80] strip debug symbols --- setup.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index e205776b22a..37e6fe84951 100644 --- a/setup.py +++ b/setup.py @@ -486,18 +486,64 @@ def is_installed(bin_file): @staticmethod def try_strip_symbols(so_file): - pass - # if CURRENT_OS == "Linux" and shutil.which("strip") is not None: - # try: - # subprocess.run(["strip", "-g", so_file], check=True) - # except subprocess.CalledProcessError as e: - # print( - # "WARNING: stripping '{}' returned non-zero exit status ({}), ignoring".format(so_file, e.returncode) - # ) - # except Exception as e: - # print( - # "WARNING: An error occurred while stripping the symbols from '{}', ignoring: {}".format(so_file, e) - # ) + if COMPILE_MODE.lower() == "debug": + return + + if CURRENT_OS == "Linux": + objcopy = shutil.which("objcopy") + strip = shutil.which("strip") + + if not objcopy: + print("WARNING: objcopy not found, skipping symbol stripping", file=sys.stderr) + return + + if not strip: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + return + + # Try removing the .llvmbc section from the .so file + subprocess.run([objcopy, "--remove-section", ".llvmbc", so_file], check=False) + + # Then keep the debug symbols in a separate file + debug_out = f"{so_file}.debug" + subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) + + # Strip the debug symbols from the .so file + subprocess.run([strip, "-S", so_file], check=True) + + # Link the debug symbols to the .so file + subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) + + elif CURRENT_OS == "Darwin": + dsymutil = shutil.which("dsymutil") + strip = shutil.which("strip") + llvm_objcopy = shutil.which("llvm-objcopy") + + if dsymutil: + # 1) Emit dSYM + subprocess.run([dsymutil, so_file, "-o", so_file.with_suffix(".dSYM")], check=False) + + if strip: + # Strip DWARF + local symbols + subprocess.run([strip, "-S", "-x", so_file], check=True) + else: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + + # optionally prune embedded LLVM bitcode sections + if llvm_objcopy: + subprocess.run( + [ + llvm_objcopy, + "--remove-section", + "__LLVM,__bitcode", + "--remove-section", + "__LLVM,__cmdline", + so_file, + ], + check=False, + ) + else: + print("INFO: llvm-objcopy not found, skipping symbol stripping", file=sys.stderr) def build_extension(self, ext): if isinstance(ext, CMakeExtension): From a2ebbe9c7fcc0e227f4fb27e667caa746b18839f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 15:18:12 -0400 Subject: [PATCH 03/80] strip symbols when not debug --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 37e6fe84951..7dbd96592ff 100644 --- a/setup.py +++ b/setup.py @@ -562,7 +562,7 @@ def build_extension(self, ext): else: super().build_extension(ext) - if COMPILE_MODE.lower() in ("release", "minsizerel"): + if COMPILE_MODE.lower() != "debug": try: self.try_strip_symbols(self.get_ext_fullpath(ext.name)) except Exception as e: From 48d7d415a98202f612552f4f42b829b686e50f3f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 15:33:42 -0400 Subject: [PATCH 04/80] use -g --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7dbd96592ff..21913fa1217 100644 --- a/setup.py +++ b/setup.py @@ -509,7 +509,7 @@ def try_strip_symbols(so_file): subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) # Strip the debug symbols from the .so file - subprocess.run([strip, "-S", so_file], check=True) + subprocess.run([strip, "-g", so_file], check=True) # Link the debug symbols to the .so file subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) From 9353a9e7f51879d46ca412e5c26f56deb9e99e28 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 16:50:09 -0400 Subject: [PATCH 05/80] ignore .debug files --- .gitignore | 2 ++ .../internal/datadog/profiling/dd_wrapper/CMakeLists.txt | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 546d1b689d1..1ba1c0bdd96 100644 --- a/.gitignore +++ b/.gitignore @@ -191,3 +191,5 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt # .env file .env .envrc + +.debug diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index feb285a33aa..a455a8542d5 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -81,7 +81,11 @@ string(TOLOWER ${PLATFORM_SUFFIX} PLATFORM_SUFFIX) # but as long as it encodes the major moving parts, it's fine if(DEFINED EXTENSION_SUFFIX) - set(DD_WRAPPER_TARGET_NAME "dd_wrapper${EXTENSION_SUFFIX}") + # EXTENSION_SUFFIX already contains .so (both Linux and macOS Python extensions use .so) + # but CMake will add its own extension (.so on Linux, .dylib on macOS) + # So we need to strip the .so extension to avoid double extensions + string(REGEX REPLACE "\\.so$" "" EXTENSION_SUFFIX_NO_EXT "${EXTENSION_SUFFIX}") + set(DD_WRAPPER_TARGET_NAME "dd_wrapper${EXTENSION_SUFFIX_NO_EXT}") else() set(DD_WRAPPER_TARGET_NAME "dd_wrapper-${PLATFORM_SUFFIX}") endif() From 00ace36b8b61a1c2fc8089b16b823f0d3728ed71 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 16:59:09 -0400 Subject: [PATCH 06/80] ignore *.debug files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1ba1c0bdd96..5b2d90f935a 100644 --- a/.gitignore +++ b/.gitignore @@ -192,4 +192,4 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt .env .envrc -.debug +*.debug From ddcaefb67f651a8a4a3165d011a7613a2c832f3b Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 12 Aug 2025 17:01:02 -0400 Subject: [PATCH 07/80] strip debug symbols to .debug for libdd_wrapper --- .../profiling/dd_wrapper/CMakeLists.txt | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index a455a8542d5..4312635f1f2 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -81,9 +81,8 @@ string(TOLOWER ${PLATFORM_SUFFIX} PLATFORM_SUFFIX) # but as long as it encodes the major moving parts, it's fine if(DEFINED EXTENSION_SUFFIX) - # EXTENSION_SUFFIX already contains .so (both Linux and macOS Python extensions use .so) - # but CMake will add its own extension (.so on Linux, .dylib on macOS) - # So we need to strip the .so extension to avoid double extensions + # EXTENSION_SUFFIX already contains .so (both Linux and macOS Python extensions use .so) but CMake will add its own + # extension (.so on Linux, .dylib on macOS) So we need to strip the .so extension to avoid double extensions string(REGEX REPLACE "\\.so$" "" EXTENSION_SUFFIX_NO_EXT "${EXTENSION_SUFFIX}") set(DD_WRAPPER_TARGET_NAME "dd_wrapper${EXTENSION_SUFFIX_NO_EXT}") else() @@ -123,6 +122,12 @@ if(LIB_INSTALL_DIR) LIBRARY DESTINATION ${LIB_INSTALL_DIR}/.. ARCHIVE DESTINATION ${LIB_INSTALL_DIR}/.. RUNTIME DESTINATION ${LIB_INSTALL_DIR}/..) + + # Install debug files if they exist (created by objcopy) + install( + FILES $.debug + DESTINATION ${LIB_INSTALL_DIR}/.. + OPTIONAL) endif() # Configure cppcheck @@ -140,6 +145,24 @@ add_cppcheck_target( add_infer_target(dd_wrapper) add_clangtidy_target(dd_wrapper) +# Generate debug files on Linux when not in debug mode We handle this here in CMakefile instead of in setup.py because +# dd_wrapper is built as a shared library to be depended by other native extensions. +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + find_program(OBJCOPY_EXECUTABLE objcopy) + find_program(STRIP_EXECUTABLE strip) + + if(OBJCOPY_EXECUTABLE AND STRIP_EXECUTABLE) + # Create debug file after build + add_custom_command( + TARGET dd_wrapper + POST_BUILD + COMMAND ${OBJCOPY_EXECUTABLE} --only-keep-debug $ $.debug + COMMAND ${STRIP_EXECUTABLE} -g $ + COMMAND ${OBJCOPY_EXECUTABLE} --add-gnu-debuglink $.debug $ + COMMENT "Creating debug symbols for dd_wrapper") + endif() +endif() + # Add the tests if(BUILD_TESTING) enable_testing() From aa38f9d01f926d70bbc006e90836ffe64c1ca098 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 01:19:51 -0400 Subject: [PATCH 08/80] update --- .github/workflows/build_python_3.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index b90c1f657e2..1976d395b6d 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -25,7 +25,7 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.13" - - run: pip install cibuildwheel==2.23.3 + - run: pip install cibuildwheel==3.1.3 - id: set-matrix env: CIBW_BUILD: ${{ inputs.cibw_build }} @@ -99,7 +99,7 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@faf86a6ed7efa889faf6996aa23820831055001a # v2.23.3 + uses: pypa/cibuildwheel@v3.1.3 with: only: ${{ matrix.only }} From 48c49bdfb6abc85221bb652b72b24a6462b9b47b Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 01:25:14 -0400 Subject: [PATCH 09/80] use hash --- .github/workflows/build_python_3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 1976d395b6d..3af9c4c979b 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -99,7 +99,7 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@v3.1.3 + uses: pypa/cibuildwheel@352e01339f0a173aa2a3eb57f01492e341e83865 # v3.1.3 with: only: ${{ matrix.only }} From 2fc6965f9ab33f5a293a07691cc7b7da59979838 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 01:29:02 -0400 Subject: [PATCH 10/80] --archs --- .github/workflows/build_python_3.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 3af9c4c979b..76771fe7b4d 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -32,11 +32,11 @@ jobs: run: | MATRIX_INCLUDE=$( { - cibuildwheel --print-build-identifiers --platform linux --arch x86_64,i686 | jq -cR '{only: ., os: "ubuntu-latest"}' \ - && cibuildwheel --print-build-identifiers --platform linux --arch aarch64 | jq -cR '{only: ., os: "ubuntu-24.04-arm"}' \ - && cibuildwheel --print-build-identifiers --platform windows --arch AMD64,x86 | jq -cR '{only: ., os: "windows-latest"}' \ - && cibuildwheel --print-build-identifiers --platform macos --arch x86_64 | jq -cR '{only: ., os: "macos-13"}' \ - && cibuildwheel --print-build-identifiers --platform macos --arch arm64 | jq -cR '{only: ., os: "macos-latest"}' + cibuildwheel --print-build-identifiers --platform linux --archs x86_64,i686 | jq -cR '{only: ., os: "ubuntu-latest"}' \ + && cibuildwheel --print-build-identifiers --platform linux --archs aarch64 | jq -cR '{only: ., os: "ubuntu-24.04-arm"}' \ + && cibuildwheel --print-build-identifiers --platform windows --archs AMD64,x86 | jq -cR '{only: ., os: "windows-latest"}' \ + && cibuildwheel --print-build-identifiers --platform macos --archs x86_64 | jq -cR '{only: ., os: "macos-13"}' \ + && cibuildwheel --print-build-identifiers --platform macos --archs arm64 | jq -cR '{only: ., os: "macos-latest"}' } | jq -sc ) echo $MATRIX_INCLUDE From f366bb573d5bce1bbd332665b35ad1d496c232ec Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 18:41:39 -0400 Subject: [PATCH 11/80] Revert "update" This reverts commit aa38f9d01f926d70bbc006e90836ffe64c1ca098. --- .github/workflows/build_python_3.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 76771fe7b4d..7a2d6886a43 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -25,7 +25,7 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.13" - - run: pip install cibuildwheel==3.1.3 + - run: pip install cibuildwheel==2.23.3 - id: set-matrix env: CIBW_BUILD: ${{ inputs.cibw_build }} @@ -99,7 +99,7 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@352e01339f0a173aa2a3eb57f01492e341e83865 # v3.1.3 + uses: pypa/cibuildwheel@faf86a6ed7efa889faf6996aa23820831055001a # v2.23.3 with: only: ${{ matrix.only }} From b5abe41a40eba2959e495a14137ba8447382c945 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 18:42:06 -0400 Subject: [PATCH 12/80] Revert "--archs" This reverts commit 2fc6965f9ab33f5a293a07691cc7b7da59979838. --- .github/workflows/build_python_3.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 7a2d6886a43..b90c1f657e2 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -32,11 +32,11 @@ jobs: run: | MATRIX_INCLUDE=$( { - cibuildwheel --print-build-identifiers --platform linux --archs x86_64,i686 | jq -cR '{only: ., os: "ubuntu-latest"}' \ - && cibuildwheel --print-build-identifiers --platform linux --archs aarch64 | jq -cR '{only: ., os: "ubuntu-24.04-arm"}' \ - && cibuildwheel --print-build-identifiers --platform windows --archs AMD64,x86 | jq -cR '{only: ., os: "windows-latest"}' \ - && cibuildwheel --print-build-identifiers --platform macos --archs x86_64 | jq -cR '{only: ., os: "macos-13"}' \ - && cibuildwheel --print-build-identifiers --platform macos --archs arm64 | jq -cR '{only: ., os: "macos-latest"}' + cibuildwheel --print-build-identifiers --platform linux --arch x86_64,i686 | jq -cR '{only: ., os: "ubuntu-latest"}' \ + && cibuildwheel --print-build-identifiers --platform linux --arch aarch64 | jq -cR '{only: ., os: "ubuntu-24.04-arm"}' \ + && cibuildwheel --print-build-identifiers --platform windows --arch AMD64,x86 | jq -cR '{only: ., os: "windows-latest"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch x86_64 | jq -cR '{only: ., os: "macos-13"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch arm64 | jq -cR '{only: ., os: "macos-latest"}' } | jq -sc ) echo $MATRIX_INCLUDE From 530091f2ac98fda03658d4f467f2381d1d54f95b Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 13 Aug 2025 20:51:05 -0400 Subject: [PATCH 13/80] handle this later --- setup.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index 21913fa1217..5b423a4d90c 100644 --- a/setup.py +++ b/setup.py @@ -517,11 +517,11 @@ def try_strip_symbols(so_file): elif CURRENT_OS == "Darwin": dsymutil = shutil.which("dsymutil") strip = shutil.which("strip") - llvm_objcopy = shutil.which("llvm-objcopy") if dsymutil: # 1) Emit dSYM - subprocess.run([dsymutil, so_file, "-o", so_file.with_suffix(".dSYM")], check=False) + print(f"dsymutil {so_file} -o {Path(so_file).with_suffix('.dSYM')}") + subprocess.run([dsymutil, so_file, "-o", Path(so_file).with_suffix(".dSYM")], check=False) if strip: # Strip DWARF + local symbols @@ -529,22 +529,6 @@ def try_strip_symbols(so_file): else: print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) - # optionally prune embedded LLVM bitcode sections - if llvm_objcopy: - subprocess.run( - [ - llvm_objcopy, - "--remove-section", - "__LLVM,__bitcode", - "--remove-section", - "__LLVM,__cmdline", - so_file, - ], - check=False, - ) - else: - print("INFO: llvm-objcopy not found, skipping symbol stripping", file=sys.stderr) - def build_extension(self, ext): if isinstance(ext, CMakeExtension): try: From f4f199f3e3a27ca2ff0ca34d82c9a10b935824ab Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 13:00:52 -0400 Subject: [PATCH 14/80] include debug symbols to be installed/packaged --- MANIFEST.in | 4 ++++ setup.py | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 1baa89a6711..afd6eb74f50 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,7 @@ prune .riot/ prune benchmarks/ prune releasenotes/ + +# Include debug files +global-include *.debug +global-include *.dSYM diff --git a/setup.py b/setup.py index 711ddc7b3ef..d32b4be584f 100644 --- a/setup.py +++ b/setup.py @@ -558,12 +558,13 @@ def try_strip_symbols(so_file): if dsymutil: # 1) Emit dSYM - print(f"dsymutil {so_file} -o {Path(so_file).with_suffix('.dSYM')}") - subprocess.run([dsymutil, so_file, "-o", Path(so_file).with_suffix(".dSYM")], check=False) + dsym_path = Path(so_file).with_suffix(".dSYM") + subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], check=False) if strip: # Strip DWARF + local symbols subprocess.run([strip, "-S", "-x", so_file], check=True) + pass else: print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) @@ -1006,6 +1007,8 @@ def get_exts_for(name): "ddtrace.internal.datadog.profiling": ( ["libdd_wrapper*.*"] + ["ddtrace/internal/datadog/profiling/test/*"] if BUILD_PROFILING_NATIVE_TESTS else [] ), + # Include debug files for native extensions + **({"": ["*.debug", "*.dSYM/*"]}), }, zip_safe=False, # enum34 is an enum backport for earlier versions of python From d291bbd76293ebf47b23d115fe3074eb77c8d5b1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 13:31:40 -0400 Subject: [PATCH 15/80] extract debug symbols into separate zip files --- .github/workflows/build_python_3.yml | 17 +++ .gitignore | 1 + MANIFEST.in | 2 +- docs/debug_symbols.rst | 59 ++++++++++ scripts/extract_debug_symbols.py | 161 +++++++++++++++++++++++++++ setup.py | 10 +- 6 files changed, 245 insertions(+), 5 deletions(-) create mode 100644 docs/debug_symbols.rst create mode 100644 scripts/extract_debug_symbols.py diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index b90c1f657e2..f83c1f5f101 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -103,6 +103,18 @@ jobs: with: only: ${{ matrix.only }} + - name: Create debug symbols directory + run: mkdir -p ./debugwheelhouse + + - name: Extract debug symbols from wheels + run: | + for wheel in ./wheelhouse/*.whl; do + if [ -f "$wheel" ]; then + echo "Extracting debug symbols from $(basename $wheel)..." + python scripts/extract_debug_symbols.py "$wheel" ./debugwheelhouse + fi + done + - name: Validate wheel RECORD files shell: bash run: | @@ -124,3 +136,8 @@ jobs: with: name: wheels-${{ env.ARTIFACT_NAME }} path: ./wheelhouse/*.whl + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: debug-symbols-${{ env.ARTIFACT_NAME }} + path: ./debugwheelhouse/*.zip diff --git a/.gitignore b/.gitignore index 5b2d90f935a..6e3ff138e66 100644 --- a/.gitignore +++ b/.gitignore @@ -193,3 +193,4 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt .envrc *.debug +*.dSYM/ diff --git a/MANIFEST.in b/MANIFEST.in index afd6eb74f50..53781ef1083 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,6 @@ prune .riot/ prune benchmarks/ prune releasenotes/ -# Include debug files +# Include debug files (will be extracted to separate packages later) global-include *.debug global-include *.dSYM diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst new file mode 100644 index 00000000000..b35ffb71899 --- /dev/null +++ b/docs/debug_symbols.rst @@ -0,0 +1,59 @@ +Debug Symbols Packaging +======================= + +dd-trace-py is built with debug symbols by default, and packaged separately from the main wheel files to reduce the size of the primary distribution packages. + +Debug Symbol Files +----------------- + +The project generates debug symbols during the build process: + +- **Linux**: `.debug` files (using `objcopy --only-keep-debug`) +- **macOS**: `.dSYM` bundles (using `dsymutil`) + +These debug symbols are extracted from the main wheels and packaged into separate `.zip` files with the naming convention: + +``` +{original-wheel-name}-debug-symbols.zip +``` + +For example: +- `ddtrace-1.20.0-cp39-cp39-linux_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip` +- `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip` + +Build Process +------------ + +The debug symbols are handled automatically during the CI build process: + +1. Wheels are built with debug symbols included +2. Debug symbols are extracted using the `scripts/extract_debug_symbols.py` script +3. Debug symbols are removed from the main wheel to reduce size +4. Separate debug symbol packages are created and uploaded as artifacts + +Usage +----- + +To use debug symbols for debugging or crash analysis: + +1. Download the appropriate debug symbol package for your platform and Python version +2. Extract the debug symbol files to the same directory as the corresponding `.so` files +3. Your debugger or crash analysis tool should automatically find the debug symbols + +For example, on Linux: +```bash +# Extract debug symbols +unzip ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip + +# Debug symbols will be placed in the same directory structure as the wheel +# The debugger will automatically find them when analyzing crashes +``` + +On macOS: +```bash +# Extract debug symbols +unzip ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip + +# The .dSYM bundles will be placed in the same directory structure +# Xcode and other debugging tools will automatically find them +``` diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py new file mode 100644 index 00000000000..e97c06e2c20 --- /dev/null +++ b/scripts/extract_debug_symbols.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Extract debug symbols from wheels and create separate debug symbol packages. + +This script: +1. Extracts debug symbols (.debug files on Linux, .dSYM bundles on macOS) from wheels +2. Creates separate debug symbol packages +3. Removes debug symbols from the original wheel +""" + +import argparse +import csv +import fnmatch +import io +import os +from pathlib import Path +import sys +from typing import List +from typing import Tuple +import zipfile + + +def get_debug_symbol_patterns(): + """Get file patterns for debug symbols based on platform.""" + return ["*.debug", "*.dSYM/*"] + + +def find_debug_symbols_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: + """Find debug symbols in a wheel file.""" + debug_symbols = [] + patterns = get_debug_symbol_patterns() + + with zipfile.ZipFile(wheel_path, "r") as wheel: + for file_info in wheel.infolist(): + if any(fnmatch.fnmatch(file_info.filename, pattern) for pattern in patterns): + debug_symbols.append((file_info.filename, wheel.read(file_info.filename))) + + return debug_symbols + + +def create_debug_symbols_package(wheel_path: str, debug_symbols: List[Tuple[str, bytes]], output_dir: str): + """Create a separate debug symbols package.""" + wheel_name = Path(wheel_path).stem + debug_package_name = f"{wheel_name}-debug-symbols.zip" + debug_package_path = os.path.join(output_dir, debug_package_name) + + with zipfile.ZipFile(debug_package_path, "w", zipfile.ZIP_DEFLATED) as debug_zip: + for filename, content in debug_symbols: + debug_zip.writestr(filename, content) + + print(f"Created debug symbols package: {debug_package_path}") + return debug_package_path + + +def remove_debug_symbols_from_wheel(wheel_path: str, debug_symbols: List[Tuple[str, bytes]]): + """Remove debug symbols from the original wheel and update RECORD file.""" + if not debug_symbols: + return + + temp_wheel_path = f"{wheel_path}.tmp" + debug_filenames = [filename for filename, _ in debug_symbols] + + # Read existing RECORD content + record_content = None + with zipfile.ZipFile(wheel_path, "r") as wheel: + for file_info in wheel.infolist(): + if file_info.filename.endswith(".dist-info/RECORD"): + record_content = wheel.read(file_info.filename).decode("utf-8") + break + + # Create new wheel without debug symbols + with zipfile.ZipFile(wheel_path, "r") as source_wheel, zipfile.ZipFile( + temp_wheel_path, "w", zipfile.ZIP_DEFLATED + ) as temp_wheel: + for file_info in source_wheel.infolist(): + if file_info.filename in debug_filenames: + continue + elif file_info.filename.endswith(".dist-info/RECORD") and record_content: + # Update RECORD file to remove debug symbol entries + updated_record = update_record_file(record_content, debug_filenames) + temp_wheel.writestr(file_info, updated_record) + else: + temp_wheel.writestr(file_info, source_wheel.read(file_info.filename)) + + # Replace original wheel with cleaned version + os.replace(temp_wheel_path, wheel_path) + print(f"Removed debug symbols from: {wheel_path}") + + +def update_record_file(record_content: str, files_to_remove: List[str]) -> str: + """Update the RECORD file to remove entries for deleted files.""" + records = [] + reader = csv.reader(io.StringIO(record_content)) + + for row in reader: + if not row: + continue + file_path = row[0] + if file_path not in files_to_remove: + records.append(row) + + # Rebuild the RECORD content + output = io.StringIO() + writer = csv.writer(output, lineterminator="\n") + for record in records: + writer.writerow(record) + + return output.getvalue() + + +def process_wheel(wheel_path: str, output_dir: str = None): + """Process a single wheel file.""" + if output_dir is None: + output_dir = os.path.dirname(wheel_path) + + os.makedirs(output_dir, exist_ok=True) + + print(f"Processing wheel: {wheel_path}") + + # Find debug symbols in the wheel + debug_symbols = find_debug_symbols_in_wheel(wheel_path) + + if not debug_symbols: + print("No debug symbols found in wheel") + return None + + print(f"Found {len(debug_symbols)} debug symbol files") + + # Create separate debug symbols package + debug_package_path = create_debug_symbols_package(wheel_path, debug_symbols, output_dir) + + # Remove debug symbols from original wheel + remove_debug_symbols_from_wheel(wheel_path, debug_symbols) + + return debug_package_path + + +def main(): + parser = argparse.ArgumentParser(description="Extract debug symbols from wheels") + parser.add_argument("wheel", help="Path to the wheel file") + parser.add_argument("--output-dir", "-o", help="Output directory for debug symbol packages") + + args = parser.parse_args() + + if not os.path.exists(args.wheel): + print(f"Error: Wheel file not found: {args.wheel}") + sys.exit(1) + + try: + debug_package_path = process_wheel(args.wheel, args.output_dir) + if debug_package_path: + print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") + else: + print("No debug symbols found in wheel") + except Exception as e: + print(f"Error processing wheel: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index d32b4be584f..741861bfa6c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,11 @@ HERE = Path(__file__).resolve().parent -COMPILE_MODE = "RelWithDebInfo" +CURRENT_OS = platform.system() + +# ON Windows, we build with Release by default, and RelWithDebInfo for other platforms +# to generate debug symbols for native extensions. +COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn( "The DD_COMPILE_DEBUG environment variable is deprecated and will be deleted, " @@ -55,7 +59,7 @@ ) COMPILE_MODE = "Debug" else: - COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", "RelWithDebInfo") + COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", COMPILE_MODE) FAST_BUILD = os.getenv("DD_FAST_BUILD", "false").lower() in ("1", "yes", "on", "true") if FAST_BUILD: @@ -79,8 +83,6 @@ BUILD_PROFILING_NATIVE_TESTS = os.getenv("DD_PROFILING_NATIVE_TESTS", "0").lower() in ("1", "yes", "on", "true") -CURRENT_OS = platform.system() - LIBDDWAF_VERSION = "1.27.0" # DEV: update this accordingly when src/native upgrades libdatadog dependency. From 58da60c52ce8f2bb131500579b9d3e5d9100a920 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 14:31:49 -0400 Subject: [PATCH 16/80] set --output-dir --- .github/workflows/build_python_3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index f83c1f5f101..5f19fa4f18e 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -111,7 +111,7 @@ jobs: for wheel in ./wheelhouse/*.whl; do if [ -f "$wheel" ]; then echo "Extracting debug symbols from $(basename $wheel)..." - python scripts/extract_debug_symbols.py "$wheel" ./debugwheelhouse + python scripts/extract_debug_symbols.py "$wheel" --output-dir ./debugwheelhouse fi done From 18a75739140db458e28eace1755ca186536bd43a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 14:32:17 -0400 Subject: [PATCH 17/80] set versions for packages used --- .github/workflows/build_python_3.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 5f19fa4f18e..06bea0f6bc5 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -25,7 +25,9 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.13" - - run: pip install cibuildwheel==2.23.3 + - run: | + pip install cibuildwheel==2.23.3 + pip install "auditwheel>=5.4.0" "pyelftools>=0.29" - id: set-matrix env: CIBW_BUILD: ${{ inputs.cibw_build }} @@ -92,6 +94,10 @@ jobs: with: python-version: "3.13" + - name: Install auditwheel with compatible dependencies + run: | + pip install "auditwheel>=5.4.0" "pyelftools>=0.29" + - name: Set up QEMU if: runner.os == 'Linux' && matrix.os != 'ubuntu-24.04-arm' uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 From f3ef8961cd12c03cb027ee246e08f66478b31d7c Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 14:36:28 -0400 Subject: [PATCH 18/80] this is redundant --- MANIFEST.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 53781ef1083..fd58489b380 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,3 @@ prune .riot/ prune benchmarks/ prune releasenotes/ -# Include debug files (will be extracted to separate packages later) -global-include *.debug -global-include *.dSYM From b95b0b538c4f79236349bb62f7a8b4cec55f6233 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 14:45:56 -0400 Subject: [PATCH 19/80] compile C extensions and Cython extensions with debug symbols when RelWithDebInfo --- setup.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 741861bfa6c..b96dc0790e2 100644 --- a/setup.py +++ b/setup.py @@ -904,6 +904,9 @@ def get_exts_for(name): # Cython is not deprecation-proof "-Wno-deprecated-declarations", ] + elif COMPILE_MODE.lower() == "relwithdebinfo": + # Only add debug symbols, let individual extensions handle optimizations and warnings + debug_compile_args = ["-g"] else: debug_compile_args = [] @@ -936,7 +939,7 @@ def get_exts_for(name): "ddtrace.internal._threads", sources=["ddtrace/internal/_threads.cpp"], extra_compile_args=( - ["-std=c++17", "-Wall", "-Wextra"] + fast_build_args + debug_compile_args + ["-std=c++17", "-Wall", "-Wextra"] + fast_build_args if CURRENT_OS != "Windows" else ["/std:c++20", "/MT"] ), @@ -1030,11 +1033,13 @@ def get_exts_for(name): "ddtrace.internal._rand", sources=["ddtrace/internal/_rand.pyx"], language="c", + extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.internal._tagset", sources=["ddtrace/internal/_tagset.pyx"], language="c", + extra_compile_args=debug_compile_args, ), Extension( "ddtrace.internal._encoding", @@ -1042,11 +1047,13 @@ def get_exts_for(name): include_dirs=["."], libraries=encoding_libraries, define_macros=[(f"__{sys.byteorder.upper()}_ENDIAN__", "1")], + extra_compile_args=debug_compile_args, ), Extension( "ddtrace.internal.telemetry.metrics_namespaces", ["ddtrace/internal/telemetry/metrics_namespaces.pyx"], language="c", + extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling.collector.stack", @@ -1056,22 +1063,27 @@ def get_exts_for(name): # OTOH, the MSVC toolchain is different. In a perfect world we'd deduce the underlying # toolchain and emit the right flags, but as a compromise we assume Windows implies MSVC and # everything else is on a GNU-like toolchain - extra_compile_args=extra_compile_args + (["-Wno-int-conversion"] if CURRENT_OS != "Windows" else []), + extra_compile_args=debug_compile_args + + extra_compile_args + + (["-Wno-int-conversion"] if CURRENT_OS != "Windows" else []), ), Cython.Distutils.Extension( "ddtrace.profiling.collector._traceback", sources=["ddtrace/profiling/collector/_traceback.pyx"], language="c", + extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling._threading", sources=["ddtrace/profiling/_threading.pyx"], language="c", + extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling.collector._task", sources=["ddtrace/profiling/collector/_task.pyx"], language="c", + extra_compile_args=debug_compile_args, ), ], compile_time_env={ From d39714aa8c97d1cff51ed3a60ee95730e92e5408 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 14:46:54 -0400 Subject: [PATCH 20/80] remove newline --- MANIFEST.in | 1 - 1 file changed, 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index fd58489b380..1baa89a6711 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ prune .riot/ prune benchmarks/ prune releasenotes/ - From 77ce3581ae0d0d0e27a811ade43c74a5c0d31e24 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 15:02:32 -0400 Subject: [PATCH 21/80] specify audithweel versions --- .github/workflows/build_python_3.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 06bea0f6bc5..19c6c5712cc 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -27,7 +27,6 @@ jobs: python-version: "3.13" - run: | pip install cibuildwheel==2.23.3 - pip install "auditwheel>=5.4.0" "pyelftools>=0.29" - id: set-matrix env: CIBW_BUILD: ${{ inputs.cibw_build }} @@ -74,7 +73,8 @@ jobs: python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && - auditwheel repair -w ./tempwheelhouse {wheel} && + pip install --force-reinstall "auditwheel>=5.4.0" "pyelftools>=0.29" && + python -m auditwheel repair -w ./tempwheelhouse {wheel} && mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | @@ -94,10 +94,6 @@ jobs: with: python-version: "3.13" - - name: Install auditwheel with compatible dependencies - run: | - pip install "auditwheel>=5.4.0" "pyelftools>=0.29" - - name: Set up QEMU if: runner.os == 'Linux' && matrix.os != 'ubuntu-24.04-arm' uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 @@ -110,9 +106,11 @@ jobs: only: ${{ matrix.only }} - name: Create debug symbols directory + if: runner.os != 'Windows' run: mkdir -p ./debugwheelhouse - name: Extract debug symbols from wheels + if: runner.os != 'Windows' run: | for wheel in ./wheelhouse/*.whl; do if [ -f "$wheel" ]; then @@ -144,6 +142,7 @@ jobs: path: ./wheelhouse/*.whl - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + if: runner.os != 'Windows' with: name: debug-symbols-${{ env.ARTIFACT_NAME }} path: ./debugwheelhouse/*.zip From 81ef28373f5db3032ae5ff72293184758c4f6a62 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 14 Aug 2025 17:44:14 -0400 Subject: [PATCH 22/80] update --- scripts/extract_debug_symbols.py | 224 +++++++++++++++++++++---------- setup.py | 53 +------- 2 files changed, 152 insertions(+), 125 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index e97c06e2c20..7203aa0b525 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -3,18 +3,21 @@ Extract debug symbols from wheels and create separate debug symbol packages. This script: -1. Extracts debug symbols (.debug files on Linux, .dSYM bundles on macOS) from wheels -2. Creates separate debug symbol packages -3. Removes debug symbols from the original wheel +1. Processes each .so file in the wheel +2. Creates debug symbols (.debug files on Linux, .dSYM bundles on macOS) for each .so file +3. Strips debug symbols from the .so files +4. Packages debug symbols into a separate zip file +5. Updates the wheel with stripped .so files """ import argparse -import csv -import fnmatch -import io import os from pathlib import Path +import platform +import shutil +import subprocess import sys +import tempfile from typing import List from typing import Tuple import zipfile @@ -25,90 +28,151 @@ def get_debug_symbol_patterns(): return ["*.debug", "*.dSYM/*"] -def find_debug_symbols_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: - """Find debug symbols in a wheel file.""" - debug_symbols = [] - patterns = get_debug_symbol_patterns() +def create_and_strip_debug_symbols(so_file: str) -> str | None: + """ + Create debug symbols from a shared object and strip them from the original. + + This function replicates the logic from setup.py's try_strip_symbols method. + Returns the path to the created debug symbol file. + """ + current_os = platform.system() + + if current_os == "Linux": + objcopy = shutil.which("objcopy") + strip = shutil.which("strip") + + if not objcopy: + print("WARNING: objcopy not found, skipping symbol stripping", file=sys.stderr) + return None + + if not strip: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + return None + + # Try removing the .llvmbc section from the .so file + subprocess.run([objcopy, "--remove-section", ".llvmbc", so_file], check=False) + + # Then keep the debug symbols in a separate file + debug_out = f"{so_file}.debug" + subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) + + # Strip the debug symbols from the .so file + subprocess.run([strip, "-g", so_file], check=True) + + # Link the debug symbols to the .so file + subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) + + return debug_out + + elif current_os == "Darwin": + dsymutil = shutil.which("dsymutil") + strip = shutil.which("strip") + + debug_path = None + if dsymutil: + # 1) Emit dSYM + dsym_path = Path(so_file).with_suffix(".dSYM") + subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], check=False) + debug_path = str(dsym_path) + + if strip: + # Strip DWARF + local symbols + subprocess.run([strip, "-S", "-x", so_file], check=True) + else: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + + return debug_path + + return None + + +def find_so_files_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: + """Find and read .so files from a wheel file.""" + so_files = [] with zipfile.ZipFile(wheel_path, "r") as wheel: for file_info in wheel.infolist(): - if any(fnmatch.fnmatch(file_info.filename, pattern) for pattern in patterns): - debug_symbols.append((file_info.filename, wheel.read(file_info.filename))) + if file_info.filename.endswith(".so"): + so_files.append((file_info.filename, wheel.read(file_info.filename))) + + return so_files + + +def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: str) -> str | None: + """ + Process a .so file from a wheel to create debug symbols. - return debug_symbols + Args: + so_filename: Original filename in the wheel + so_content: Binary content of the .so file + temp_dir: Temporary directory to work in + Returns: + Path to the created debug symbol file, or None if no debug symbols were created + """ + # Create a temporary file for the .so to process it + so_path = os.path.join(temp_dir, os.path.basename(so_filename)) + with open(so_path, "wb") as f: + f.write(so_content) -def create_debug_symbols_package(wheel_path: str, debug_symbols: List[Tuple[str, bytes]], output_dir: str): + print(f"Processing .so file: {so_filename}") + + try: + debug_file = create_and_strip_debug_symbols(so_path) + if debug_file: + print(f"Created debug symbols: {debug_file}") + return debug_file + return None + except Exception as e: + print(f"Error processing .so file {so_filename}: {e}") + return None + + +def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output_dir: str) -> str: """Create a separate debug symbols package.""" wheel_name = Path(wheel_path).stem debug_package_name = f"{wheel_name}-debug-symbols.zip" debug_package_path = os.path.join(output_dir, debug_package_name) with zipfile.ZipFile(debug_package_path, "w", zipfile.ZIP_DEFLATED) as debug_zip: - for filename, content in debug_symbols: - debug_zip.writestr(filename, content) + for debug_file in debug_files: + if os.path.exists(debug_file): + # Add the debug file to the zip with a relative path + arcname = os.path.basename(debug_file) + debug_zip.write(debug_file, arcname) print(f"Created debug symbols package: {debug_package_path}") return debug_package_path -def remove_debug_symbols_from_wheel(wheel_path: str, debug_symbols: List[Tuple[str, bytes]]): - """Remove debug symbols from the original wheel and update RECORD file.""" - if not debug_symbols: - return - +def update_wheel_with_stripped_so_files(wheel_path: str, temp_dir: str): + """Update the wheel with stripped .so files.""" temp_wheel_path = f"{wheel_path}.tmp" - debug_filenames = [filename for filename, _ in debug_symbols] - - # Read existing RECORD content - record_content = None - with zipfile.ZipFile(wheel_path, "r") as wheel: - for file_info in wheel.infolist(): - if file_info.filename.endswith(".dist-info/RECORD"): - record_content = wheel.read(file_info.filename).decode("utf-8") - break - # Create new wheel without debug symbols + # Create new wheel with stripped .so files with zipfile.ZipFile(wheel_path, "r") as source_wheel, zipfile.ZipFile( temp_wheel_path, "w", zipfile.ZIP_DEFLATED ) as temp_wheel: for file_info in source_wheel.infolist(): - if file_info.filename in debug_filenames: - continue - elif file_info.filename.endswith(".dist-info/RECORD") and record_content: - # Update RECORD file to remove debug symbol entries - updated_record = update_record_file(record_content, debug_filenames) - temp_wheel.writestr(file_info, updated_record) + if file_info.filename.endswith(".so"): + # Replace with stripped version + so_basename = os.path.basename(file_info.filename) + stripped_so_path = os.path.join(temp_dir, so_basename) + if os.path.exists(stripped_so_path): + with open(stripped_so_path, "rb") as f: + temp_wheel.writestr(file_info.filename, f.read()) + else: + # If stripping failed, keep original + temp_wheel.writestr(file_info.filename, source_wheel.read(file_info.filename)) else: - temp_wheel.writestr(file_info, source_wheel.read(file_info.filename)) + temp_wheel.writestr(file_info.filename, source_wheel.read(file_info.filename)) - # Replace original wheel with cleaned version + # Replace original wheel with updated version os.replace(temp_wheel_path, wheel_path) - print(f"Removed debug symbols from: {wheel_path}") - - -def update_record_file(record_content: str, files_to_remove: List[str]) -> str: - """Update the RECORD file to remove entries for deleted files.""" - records = [] - reader = csv.reader(io.StringIO(record_content)) - - for row in reader: - if not row: - continue - file_path = row[0] - if file_path not in files_to_remove: - records.append(row) - - # Rebuild the RECORD content - output = io.StringIO() - writer = csv.writer(output, lineterminator="\n") - for record in records: - writer.writerow(record) + print(f"Updated wheel with stripped .so files: {wheel_path}") - return output.getvalue() - -def process_wheel(wheel_path: str, output_dir: str = None): +def process_wheel(wheel_path: str, output_dir: str = None) -> str | None: """Process a single wheel file.""" if output_dir is None: output_dir = os.path.dirname(wheel_path) @@ -117,22 +181,36 @@ def process_wheel(wheel_path: str, output_dir: str = None): print(f"Processing wheel: {wheel_path}") - # Find debug symbols in the wheel - debug_symbols = find_debug_symbols_in_wheel(wheel_path) + # Find and read .so files from the wheel + so_files = find_so_files_in_wheel(wheel_path) - if not debug_symbols: - print("No debug symbols found in wheel") + if not so_files: + print("No .so files found in wheel") return None - print(f"Found {len(debug_symbols)} debug symbol files") + print(f"Found {len(so_files)} .so files") - # Create separate debug symbols package - debug_package_path = create_debug_symbols_package(wheel_path, debug_symbols, output_dir) + # Create temporary directory for processing + with tempfile.TemporaryDirectory() as temp_dir: + debug_files = [] - # Remove debug symbols from original wheel - remove_debug_symbols_from_wheel(wheel_path, debug_symbols) + # Process each .so file from the wheel + for so_filename, so_content in so_files: + debug_file = process_so_file_from_wheel(so_filename, so_content, temp_dir) + if debug_file: + debug_files.append(debug_file) - return debug_package_path + if not debug_files: + print("No debug symbols were created") + return None + + # Create debug symbols package + debug_package_path = create_debug_symbols_package(wheel_path, debug_files, output_dir) + + # Update wheel with stripped .so files + update_wheel_with_stripped_so_files(wheel_path, temp_dir) + + return debug_package_path def main(): @@ -151,7 +229,7 @@ def main(): if debug_package_path: print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") else: - print("No debug symbols found in wheel") + print("No debug symbols were created") except Exception as e: print(f"Error processing wheel: {e}") sys.exit(1) diff --git a/setup.py b/setup.py index b96dc0790e2..d1590c88bc0 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ # ON Windows, we build with Release by default, and RelWithDebInfo for other platforms # to generate debug symbols for native extensions. +# Note: We strip debug symbols when releasing wheels using scripts/extract_debug_symbols.py COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn( @@ -524,52 +525,6 @@ def build_rust(self): elif CURRENT_OS == "Darwin": subprocess.run(["install_name_tool", "-id", native_name, library], check=True) - @staticmethod - def try_strip_symbols(so_file): - if COMPILE_MODE.lower() == "debug": - return - - if CURRENT_OS == "Linux": - objcopy = shutil.which("objcopy") - strip = shutil.which("strip") - - if not objcopy: - print("WARNING: objcopy not found, skipping symbol stripping", file=sys.stderr) - return - - if not strip: - print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) - return - - # Try removing the .llvmbc section from the .so file - subprocess.run([objcopy, "--remove-section", ".llvmbc", so_file], check=False) - - # Then keep the debug symbols in a separate file - debug_out = f"{so_file}.debug" - subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) - - # Strip the debug symbols from the .so file - subprocess.run([strip, "-g", so_file], check=True) - - # Link the debug symbols to the .so file - subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) - - elif CURRENT_OS == "Darwin": - dsymutil = shutil.which("dsymutil") - strip = shutil.which("strip") - - if dsymutil: - # 1) Emit dSYM - dsym_path = Path(so_file).with_suffix(".dSYM") - subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], check=False) - - if strip: - # Strip DWARF + local symbols - subprocess.run([strip, "-S", "-x", so_file], check=True) - pass - else: - print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) - def build_extension(self, ext): if isinstance(ext, CMakeExtension): try: @@ -587,12 +542,6 @@ def build_extension(self, ext): else: super().build_extension(ext) - if COMPILE_MODE.lower() != "debug": - try: - self.try_strip_symbols(self.get_ext_fullpath(ext.name)) - except Exception as e: - print(f"WARNING: An error occurred while building the extension: {e}") - def build_extension_cmake(self, ext: "CMakeExtension") -> None: if IS_EDITABLE and self.INCREMENTAL: # DEV: Rudimentary incremental build support. We copy the logic from From 0489f5bf30b8a5345961ad56b5fd1f96267f6c43 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 10:01:44 -0400 Subject: [PATCH 23/80] extract debug symbols before auditwheel --- .github/workflows/build_python_3.yml | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 19c6c5712cc..13967ade38b 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -70,6 +70,8 @@ jobs: # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" CIBW_REPAIR_WHEEL_COMMAND_LINUX: | + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -78,6 +80,8 @@ jobs: mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" @@ -105,20 +109,6 @@ jobs: with: only: ${{ matrix.only }} - - name: Create debug symbols directory - if: runner.os != 'Windows' - run: mkdir -p ./debugwheelhouse - - - name: Extract debug symbols from wheels - if: runner.os != 'Windows' - run: | - for wheel in ./wheelhouse/*.whl; do - if [ -f "$wheel" ]; then - echo "Extracting debug symbols from $(basename $wheel)..." - python scripts/extract_debug_symbols.py "$wheel" --output-dir ./debugwheelhouse - fi - done - - name: Validate wheel RECORD files shell: bash run: | From e937f93b0c7e6942d623038a41b97e5883095693 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 10:18:03 -0400 Subject: [PATCH 24/80] revert --- .github/workflows/build_python_3.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 13967ade38b..c9fa66f9558 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -70,18 +70,13 @@ jobs: # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" CIBW_REPAIR_WHEEL_COMMAND_LINUX: | - mkdir -p ./debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && - pip install --force-reinstall "auditwheel>=5.4.0" "pyelftools>=0.29" && python -m auditwheel repair -w ./tempwheelhouse {wheel} && mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | - mkdir -p ./debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" From 1ce125446efa955b554ed4cea35bf2458f734ae4 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 10:29:36 -0400 Subject: [PATCH 25/80] use auditwheel command --- .github/workflows/build_python_3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index c9fa66f9558..d6910f6eb18 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -73,7 +73,7 @@ jobs: python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && - python -m auditwheel repair -w ./tempwheelhouse {wheel} && + auditwheel repair -w ./tempwheelhouse {wheel} && mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | From 8e6800defadbc8cf34e6427c79a5bdd392ef1a37 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 11:19:51 -0400 Subject: [PATCH 26/80] dont generate debug file here --- .../profiling/dd_wrapper/CMakeLists.txt | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index 2885b5ecd81..7622a426c86 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -122,12 +122,6 @@ if(LIB_INSTALL_DIR) LIBRARY DESTINATION ${LIB_INSTALL_DIR}/.. ARCHIVE DESTINATION ${LIB_INSTALL_DIR}/.. RUNTIME DESTINATION ${LIB_INSTALL_DIR}/..) - - # Install debug files if they exist (created by objcopy) - install( - FILES $.debug - DESTINATION ${LIB_INSTALL_DIR}/.. - OPTIONAL) endif() # Configure cppcheck @@ -145,24 +139,6 @@ add_cppcheck_target( add_infer_target(dd_wrapper) add_clangtidy_target(dd_wrapper) -# Generate debug files on Linux when not in debug mode We handle this here in CMakefile instead of in setup.py because -# dd_wrapper is built as a shared library to be depended by other native extensions. -if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - find_program(OBJCOPY_EXECUTABLE objcopy) - find_program(STRIP_EXECUTABLE strip) - - if(OBJCOPY_EXECUTABLE AND STRIP_EXECUTABLE) - # Create debug file after build - add_custom_command( - TARGET dd_wrapper - POST_BUILD - COMMAND ${OBJCOPY_EXECUTABLE} --only-keep-debug $ $.debug - COMMAND ${STRIP_EXECUTABLE} -g $ - COMMAND ${OBJCOPY_EXECUTABLE} --add-gnu-debuglink $.debug $ - COMMENT "Creating debug symbols for dd_wrapper") - endif() -endif() - # Add the tests if(BUILD_TESTING) enable_testing() From 093cc10af84e04e31f18bba10249bdc00b08ba64 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 11:29:06 -0400 Subject: [PATCH 27/80] generate debug files before auditwheel and delocate-wheel to make sure that RECORD is properly updated after modifying .so files --- .github/workflows/build_python_3.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index d6910f6eb18..65925c2e46b 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -25,8 +25,7 @@ jobs: - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.13" - - run: | - pip install cibuildwheel==2.23.3 + - run: pip install cibuildwheel==2.23.3 - id: set-matrix env: CIBW_BUILD: ${{ inputs.cibw_build }} @@ -70,6 +69,8 @@ jobs: # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" CIBW_REPAIR_WHEEL_COMMAND_LINUX: | + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -77,6 +78,8 @@ jobs: mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" From ffec034f31944627a3c8f4fdb5370eebaefc44bd Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 11:49:53 -0400 Subject: [PATCH 28/80] fix docs build --- docs/debug_symbols.rst | 25 ++++--------------------- docs/index.rst | 1 + 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index b35ffb71899..517057e5352 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -4,7 +4,7 @@ Debug Symbols Packaging dd-trace-py is built with debug symbols by default, and packaged separately from the main wheel files to reduce the size of the primary distribution packages. Debug Symbol Files ------------------ +------------------ The project generates debug symbols during the build process: @@ -22,7 +22,7 @@ For example: - `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip` Build Process ------------- +------------- The debug symbols are handled automatically during the CI build process: @@ -37,23 +37,6 @@ Usage To use debug symbols for debugging or crash analysis: 1. Download the appropriate debug symbol package for your platform and Python version -2. Extract the debug symbol files to the same directory as the corresponding `.so` files +2. Extract the debug symbol files to the same directory as the corresponding `.so` files. + Typically, the site-packages directory where ddtrace is installed. 3. Your debugger or crash analysis tool should automatically find the debug symbols - -For example, on Linux: -```bash -# Extract debug symbols -unzip ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip - -# Debug symbols will be placed in the same directory structure as the wheel -# The debugger will automatically find them when analyzing crashes -``` - -On macOS: -```bash -# Extract debug symbols -unzip ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip - -# The .dSYM bundles will be placed in the same directory structure -# Xcode and other debugging tools will automatically find them -``` diff --git a/docs/index.rst b/docs/index.rst index 1b6766e53b6..ef35f9f8cd8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -283,6 +283,7 @@ Indices and tables basic_usage advanced_usage build_system + debug_symbols benchmarks contributing troubleshooting From c4a068ad3d359349ec1ccbcba492d2f18ec269df Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 11:51:43 -0400 Subject: [PATCH 29/80] fix script for older python versions --- scripts/extract_debug_symbols.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index 7203aa0b525..73cfb476fc8 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -18,8 +18,7 @@ import subprocess import sys import tempfile -from typing import List -from typing import Tuple +from typing import List, Tuple, Union, Optional import zipfile @@ -28,7 +27,7 @@ def get_debug_symbol_patterns(): return ["*.debug", "*.dSYM/*"] -def create_and_strip_debug_symbols(so_file: str) -> str | None: +def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: """ Create debug symbols from a shared object and strip them from the original. @@ -98,7 +97,7 @@ def find_so_files_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: return so_files -def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: str) -> str | None: +def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: str) -> Union[str, None]: """ Process a .so file from a wheel to create debug symbols. @@ -172,7 +171,7 @@ def update_wheel_with_stripped_so_files(wheel_path: str, temp_dir: str): print(f"Updated wheel with stripped .so files: {wheel_path}") -def process_wheel(wheel_path: str, output_dir: str = None) -> str | None: +def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional[str]: """Process a single wheel file.""" if output_dir is None: output_dir = os.path.dirname(wheel_path) From a30496714db5bb3a8cf4f98327c72c53eb9e22f7 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 12:47:34 -0400 Subject: [PATCH 30/80] keep the same directory structure --- scripts/extract_debug_symbols.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index 73cfb476fc8..a5126277083 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -109,8 +109,9 @@ def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: st Returns: Path to the created debug symbol file, or None if no debug symbols were created """ - # Create a temporary file for the .so to process it - so_path = os.path.join(temp_dir, os.path.basename(so_filename)) + # Create a temporary file for the .so to process it, preserving directory structure + so_path = os.path.join(temp_dir, so_filename) + os.makedirs(os.path.dirname(so_path), exist_ok=True) with open(so_path, "wb") as f: f.write(so_content) @@ -127,7 +128,7 @@ def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: st return None -def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output_dir: str) -> str: +def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output_dir: str, temp_dir: str) -> str: """Create a separate debug symbols package.""" wheel_name = Path(wheel_path).stem debug_package_name = f"{wheel_name}-debug-symbols.zip" @@ -136,9 +137,10 @@ def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output with zipfile.ZipFile(debug_package_path, "w", zipfile.ZIP_DEFLATED) as debug_zip: for debug_file in debug_files: if os.path.exists(debug_file): - # Add the debug file to the zip with a relative path - arcname = os.path.basename(debug_file) - debug_zip.write(debug_file, arcname) + # Add the debug file to the zip, preserving directory structure + # The debug_file path is relative to temp_dir, so we need to extract the relative path + rel_path = os.path.relpath(debug_file, temp_dir) + debug_zip.write(debug_file, rel_path) print(f"Created debug symbols package: {debug_package_path}") return debug_package_path @@ -154,9 +156,8 @@ def update_wheel_with_stripped_so_files(wheel_path: str, temp_dir: str): ) as temp_wheel: for file_info in source_wheel.infolist(): if file_info.filename.endswith(".so"): - # Replace with stripped version - so_basename = os.path.basename(file_info.filename) - stripped_so_path = os.path.join(temp_dir, so_basename) + # Replace with stripped version, preserving directory structure + stripped_so_path = os.path.join(temp_dir, file_info.filename) if os.path.exists(stripped_so_path): with open(stripped_so_path, "rb") as f: temp_wheel.writestr(file_info.filename, f.read()) @@ -204,7 +205,7 @@ def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional return None # Create debug symbols package - debug_package_path = create_debug_symbols_package(wheel_path, debug_files, output_dir) + debug_package_path = create_debug_symbols_package(wheel_path, debug_files, output_dir, temp_dir) # Update wheel with stripped .so files update_wheel_with_stripped_so_files(wheel_path, temp_dir) From 43baefc4525761690de2760cd79fa48d28e06ed1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 12:47:47 -0400 Subject: [PATCH 31/80] make sure to not strip these symbols --- ddtrace/appsec/_iast/_taint_tracking/native.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index ae4fb3cf4d1..810ac4c40b0 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -41,7 +41,7 @@ static PyMethodDef AspectsMethods[] = { { nullptr, nullptr, 0, nullptr } }; -static struct PyModuleDef aspects = { PyModuleDef_HEAD_INIT, +static struct PyModuleDef aspects __attribute__((used)) = { PyModuleDef_HEAD_INIT, .m_name = PY_MODULE_NAME_ASPECTS, .m_doc = "Taint tracking Aspects", .m_size = -1, @@ -53,7 +53,7 @@ static PyMethodDef OpsMethods[] = { { nullptr, nullptr, 0, nullptr } }; -static struct PyModuleDef ops = { PyModuleDef_HEAD_INIT, +static struct PyModuleDef ops __attribute__((used)) = { PyModuleDef_HEAD_INIT, .m_name = PY_MODULE_NAME_ASPECTS, .m_doc = "Taint tracking operations", .m_size = -1, @@ -88,4 +88,9 @@ PYBIND11_MODULE(_native, m) PyObject* hm_ops = PyModule_Create(&ops); m.add_object("ops", hm_ops); + + // Explicitly reference the module definitions to prevent them from being stripped + // This ensures the symbols are preserved even when debug symbols are stripped + (void)&ops; + (void)&aspects; } From 0b66d1ce308f9cf9590d184ecc9e13cb8d540ec1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 14:06:59 -0400 Subject: [PATCH 32/80] format --- scripts/extract_debug_symbols.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index a5126277083..b8cdd11fd17 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -18,7 +18,10 @@ import subprocess import sys import tempfile -from typing import List, Tuple, Union, Optional +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union import zipfile From 8dbb8fae65d0479df769546eefe886632537eb8f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 14:12:22 -0400 Subject: [PATCH 33/80] copy over debugwheelhouse --- .github/workflows/build_python_3.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 65925c2e46b..382f8275afe 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -71,6 +71,8 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_LINUX: | mkdir -p ./debugwheelhouse && python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && + cp -r ./debugwheelhouse {dest_dir} && + rm -rf ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -107,6 +109,8 @@ jobs: with: only: ${{ matrix.only }} + + - name: Validate wheel RECORD files shell: bash run: | From 39665b369d3c5d1b5918d5ff09e5a165694a819c Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 14:18:26 -0400 Subject: [PATCH 34/80] Format code --- ddtrace/appsec/_iast/_taint_tracking/native.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index 810ac4c40b0..9c5616a2505 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -42,10 +42,10 @@ static PyMethodDef AspectsMethods[] = { }; static struct PyModuleDef aspects __attribute__((used)) = { PyModuleDef_HEAD_INIT, - .m_name = PY_MODULE_NAME_ASPECTS, - .m_doc = "Taint tracking Aspects", - .m_size = -1, - .m_methods = AspectsMethods }; + .m_name = PY_MODULE_NAME_ASPECTS, + .m_doc = "Taint tracking Aspects", + .m_size = -1, + .m_methods = AspectsMethods }; static PyMethodDef OpsMethods[] = { { "new_pyobject_id", (PyCFunction)api_new_pyobject_id, METH_FASTCALL, "new pyobject id" }, @@ -54,10 +54,10 @@ static PyMethodDef OpsMethods[] = { }; static struct PyModuleDef ops __attribute__((used)) = { PyModuleDef_HEAD_INIT, - .m_name = PY_MODULE_NAME_ASPECTS, - .m_doc = "Taint tracking operations", - .m_size = -1, - .m_methods = OpsMethods }; + .m_name = PY_MODULE_NAME_ASPECTS, + .m_doc = "Taint tracking operations", + .m_size = -1, + .m_methods = OpsMethods }; /** * This function initializes the native module. From 5cf1291e1b975dd21054febb6dfacaa592f3c449 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 14:19:21 -0400 Subject: [PATCH 35/80] remove newlines --- .github/workflows/build_python_3.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 382f8275afe..5c3a7016fe5 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -109,8 +109,6 @@ jobs: with: only: ${{ matrix.only }} - - - name: Validate wheel RECORD files shell: bash run: | From 0ed4e990e52a9eb487838d2a8a9a9399a0a2fd7a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 15 Aug 2025 14:21:01 -0400 Subject: [PATCH 36/80] no need to include these, as theyre generated while auditing and delocating --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index d1590c88bc0..35ff2068774 100644 --- a/setup.py +++ b/setup.py @@ -961,8 +961,6 @@ def get_exts_for(name): "ddtrace.internal.datadog.profiling": ( ["libdd_wrapper*.*"] + ["ddtrace/internal/datadog/profiling/test/*"] if BUILD_PROFILING_NATIVE_TESTS else [] ), - # Include debug files for native extensions - **({"": ["*.debug", "*.dSYM/*"]}), }, zip_safe=False, # enum34 is an enum backport for earlier versions of python From 2690477f80638bc4db70a411926b3dcbd10c440f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 10:51:15 -0400 Subject: [PATCH 37/80] update dirs --- .github/workflows/build_python_3.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 5c3a7016fe5..9e47f74e5aa 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -71,7 +71,8 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_LINUX: | mkdir -p ./debugwheelhouse && python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && - cp -r ./debugwheelhouse {dest_dir} && + mkdir -p {dest_dir}/debugwheelhouse && + cp -r ./debugwheelhouse/* {dest_dir}/debugwheelhouse/ && rm -rf ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && @@ -82,6 +83,9 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_MACOS: | mkdir -p ./debugwheelhouse && python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && + mkdir -p {dest_dir}/debugwheelhouse && + cp -r ./debugwheelhouse/* {dest_dir}/debugwheelhouse/ && + rm -rf ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" @@ -135,4 +139,4 @@ jobs: if: runner.os != 'Windows' with: name: debug-symbols-${{ env.ARTIFACT_NAME }} - path: ./debugwheelhouse/*.zip + path: ./wheelhouse/debugwheelhouse/*.zip From db444781b0d684707cfc0cfcc08aa61a0ffb99be Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 11:22:38 -0400 Subject: [PATCH 38/80] create directory under {dest_dir} --- .github/workflows/build_python_3.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 9e47f74e5aa..146841ef5ea 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -69,11 +69,8 @@ jobs: # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" CIBW_REPAIR_WHEEL_COMMAND_LINUX: | - mkdir -p ./debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && mkdir -p {dest_dir}/debugwheelhouse && - cp -r ./debugwheelhouse/* {dest_dir}/debugwheelhouse/ && - rm -rf ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir {dest_dir}/debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -81,11 +78,8 @@ jobs: mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | - mkdir -p ./debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && mkdir -p {dest_dir}/debugwheelhouse && - cp -r ./debugwheelhouse/* {dest_dir}/debugwheelhouse/ && - rm -rf ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir {dest_dir}/debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" From 99641b2d0f9b63757995e69b3a8e88ad9e60258a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 12:36:29 -0400 Subject: [PATCH 39/80] fix for mac --- .github/workflows/build_python_3.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 146841ef5ea..5993d0490e3 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -78,8 +78,10 @@ jobs: mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | - mkdir -p {dest_dir}/debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir {dest_dir}/debugwheelhouse && + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && + cp -r ./debugwheelhouse {dest_dir} && + rm -rf ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" From c18f1254f05e4464c1ec8a36b7d069d7f4b49e6f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 12:54:22 -0400 Subject: [PATCH 40/80] linux: put to /output container output dir, mac: dont move it to dest_dir and just put it in debugwheelhouse --- .github/workflows/build_python_3.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 5993d0490e3..1cda390d1ae 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -69,8 +69,8 @@ jobs: # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" CIBW_REPAIR_WHEEL_COMMAND_LINUX: | - mkdir -p {dest_dir}/debugwheelhouse && - python scripts/extract_debug_symbols.py {wheel} --output-dir {dest_dir}/debugwheelhouse && + mkdir -p /output/debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir /output/debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -80,8 +80,6 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_MACOS: | mkdir -p ./debugwheelhouse && python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && - cp -r ./debugwheelhouse {dest_dir} && - rm -rf ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" @@ -135,4 +133,6 @@ jobs: if: runner.os != 'Windows' with: name: debug-symbols-${{ env.ARTIFACT_NAME }} - path: ./wheelhouse/debugwheelhouse/*.zip + path: | + ./debugwheelhouse/*.zip + ./wheelhouse/debugwheelhouse/*.zip From e10abf046e95c77dedd8d612d05ed8210ff1e55a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 13:45:55 -0400 Subject: [PATCH 41/80] add comments --- .github/workflows/build_python_3.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 1cda390d1ae..22e8e5b621d 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -68,6 +68,10 @@ jobs: # `platform.mac_ver()` reports incorrect MacOS version at 11.0 # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" + # cibuildwheel repair will copy anything's under /output directory from the + # build container to the host machine. This is a bit hacky way, but seems + # to be the only way getting debug symbols out from the container while + # we don't mess up with RECORD file. CIBW_REPAIR_WHEEL_COMMAND_LINUX: | mkdir -p /output/debugwheelhouse && python scripts/extract_debug_symbols.py {wheel} --output-dir /output/debugwheelhouse && From bf6f03c37a0c4cf94b6a9f0bbe0b0a2edae704c2 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 18 Aug 2025 14:06:31 -0400 Subject: [PATCH 42/80] only download wheels and source-dist --- .gitlab/download-wheels-from-gh-actions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/download-wheels-from-gh-actions.sh b/.gitlab/download-wheels-from-gh-actions.sh index 547c1cd92f2..2792e8754fe 100755 --- a/.gitlab/download-wheels-from-gh-actions.sh +++ b/.gitlab/download-wheels-from-gh-actions.sh @@ -67,7 +67,7 @@ fi echo "Github workflow finished. Downloading wheels" # download all wheels -gh run download $RUN_ID --repo DataDog/dd-trace-py +gh run download $RUN_ID --repo DataDog/dd-trace-py --pattern "wheels-*" --pattern "source-dist*" cd .. From e6c6ac09e4c588e35c55eb63de933ce6a8c969db Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 14:17:00 -0400 Subject: [PATCH 43/80] enable memalloc assertions only on Debug compile mode --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 35ff2068774..fd6f642bc19 100644 --- a/setup.py +++ b/setup.py @@ -877,7 +877,7 @@ def get_exts_for(name): # sure we explicitly set this for normal builds, and explicitly # _unset_ it for debug builds in case the CFLAGS from sysconfig # include -DNDEBUG - + (["-DNDEBUG"] if not debug_compile_args else ["-UNDEBUG"]) + + (["-DNDEBUG"] if COMPILE_MODE.lower() != "debug" else ["-UNDEBUG"]) + ["-D_POSIX_C_SOURCE=200809L", "-std=c11"] + fast_build_args if CURRENT_OS != "Windows" From e9c35d0a2ccca388df851b2d4546ac02d2052667 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 15:00:10 -0400 Subject: [PATCH 44/80] verify step --- scripts/extract_debug_symbols.py | 230 ++++++++++++++++++++++++------- 1 file changed, 184 insertions(+), 46 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index b8cdd11fd17..24f64fffafa 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -30,6 +30,134 @@ def get_debug_symbol_patterns(): return ["*.debug", "*.dSYM/*"] +def create_dsym_bundle(so_file: str, dsymutil: str) -> Optional[str]: + """Create a .dSYM bundle for a .so file.""" + dsym_path = Path(so_file).with_suffix(".dSYM") + + print(f"Attempting to create .dSYM bundle for: {so_file}") + print(f"dsymutil command: {dsymutil} {so_file} -o {dsym_path}") + + try: + result = subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], capture_output=True, text=True, check=True) + + print(f"dsymutil stdout: {result.stdout}") + if result.stderr: + print(f"dsymutil stderr: {result.stderr}") + + # Verify that the .dSYM bundle was created and contains content + if verify_dsym_bundle(dsym_path): + return str(dsym_path) + else: + print(f"dsymutil succeeded but created empty .dSYM bundle for: {so_file}") + return None + + except subprocess.CalledProcessError as e: + print(f"Warning: dsymutil failed with exit code {e.returncode}") + print(f"dsymutil stdout: {e.stdout}") + print(f"dsymutil stderr: {e.stderr}") + return None + except Exception as e: + print(f"Warning: Error running dsymutil: {e}") + return None + + +def verify_debug_file(debug_path: Path) -> bool: + """Verify that a Linux .debug file was created successfully and contains content.""" + print(f"Verifying debug file: {debug_path}") + + if not debug_path.exists(): + print(f" Error: Debug file does not exist: {debug_path}") + return False + + if not debug_path.is_file(): + print(f" Error: Debug file is not a regular file: {debug_path}") + return False + + # Check file size + file_size = debug_path.stat().st_size + print(f" Debug file size: {file_size} bytes") + + if file_size == 0: + print(f" Error: Debug file is empty: {debug_path}") + os.remove(debug_path) + return False + + # Check if the debug file contains debug sections using objdump + try: + result = subprocess.run(["objdump", "-h", str(debug_path)], capture_output=True, text=True, check=True) + debug_sections = [line for line in result.stdout.split('\n') if line.strip().startswith('.debug_')] + print(f" Found {len(debug_sections)} debug sections") + + if debug_sections: + print(f" Debug sections found:") + for section in debug_sections[:5]: # Show first 5 sections + print(f" {section.strip()}") + if len(debug_sections) > 5: + print(f" ... and {len(debug_sections) - 5} more") + else: + # If no debug sections found, check if the file has substantial content + # Some debug files might contain other types of debug information + if file_size > 1000: # More than 1KB + print(f" Warning: No debug sections found, but file has substantial content ({file_size} bytes)") + print(f" Accepting debug file as it may contain other debug information") + else: + print(f" Error: Debug file contains no debug sections and is too small: {debug_path}") + os.remove(debug_path) + return False + + except (subprocess.CalledProcessError, FileNotFoundError): + print(f" Warning: Could not verify debug sections with objdump") + # If we can't verify with objdump, just check that the file has content + if file_size > 0: + print(f" Debug file has content ({file_size} bytes), assuming it's valid") + else: + print(f" Error: Debug file appears to be empty") + os.remove(debug_path) + return False + + print(f"Successfully created debug file: {debug_path}") + return True + + +def verify_dsym_bundle(dsym_path: Path) -> bool: + """Verify that a .dSYM bundle was created successfully and contains content.""" + print(f"Verifying .dSYM bundle: {dsym_path}") + + if not dsym_path.exists(): + print(f" Error: .dSYM bundle does not exist: {dsym_path}") + return False + + if not dsym_path.is_dir(): + print(f" Error: .dSYM bundle is not a directory: {dsym_path}") + return False + + # Check if the .dSYM bundle contains the expected Contents/Resources/DWARF directory + dwarf_dir = dsym_path / "Contents" / "Resources" / "DWARF" + print(f" Checking for DWARF directory: {dwarf_dir}") + + if not dwarf_dir.exists(): + print(f" Error: DWARF directory does not exist: {dwarf_dir}") + # List what's actually in the .dSYM bundle + print(f" Contents of .dSYM bundle:") + for item in dsym_path.rglob("*"): + print(f" {item}") + shutil.rmtree(dsym_path, ignore_errors=True) + return False + + dwarf_files = list(dwarf_dir.iterdir()) + if not dwarf_files: + print(f" Error: DWARF directory is empty: {dwarf_dir}") + shutil.rmtree(dsym_path, ignore_errors=True) + return False + + print(f" Success: Found {len(dwarf_files)} files in DWARF directory") + for dwarf_file in dwarf_files: + print(f" {dwarf_file.name}") + + print(f"Successfully created .dSYM bundle: {dsym_path}") + return True + + def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: """ Create debug symbols from a shared object and strip them from the original. @@ -56,15 +184,25 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: # Then keep the debug symbols in a separate file debug_out = f"{so_file}.debug" - subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) + try: + subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) - # Strip the debug symbols from the .so file - subprocess.run([strip, "-g", so_file], check=True) + # Verify that the debug file was created and contains content + if verify_debug_file(Path(debug_out)): + # Strip the debug symbols from the .so file + subprocess.run([strip, "-g", so_file], check=True) - # Link the debug symbols to the .so file - subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) + # Link the debug symbols to the .so file + subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) - return debug_out + return debug_out + else: + print(f"Warning: Failed to create valid debug file for {so_file}") + return None + + except subprocess.CalledProcessError as e: + print(f"Warning: objcopy failed to create debug file: {e}") + return None elif current_os == "Darwin": dsymutil = shutil.which("dsymutil") @@ -72,10 +210,8 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: debug_path = None if dsymutil: - # 1) Emit dSYM - dsym_path = Path(so_file).with_suffix(".dSYM") - subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], check=False) - debug_path = str(dsym_path) + # 1) Emit dSYM - let dsymutil handle the detection + debug_path = create_dsym_bundle(so_file, dsymutil) if strip: # Strip DWARF + local symbols @@ -88,46 +224,46 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: return None -def find_so_files_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: - """Find and read .so files from a wheel file.""" - so_files = [] +def find_dynamic_libraries_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: + """Find and read .so and .dylib files from a wheel file.""" + dynamic_libs = [] with zipfile.ZipFile(wheel_path, "r") as wheel: for file_info in wheel.infolist(): - if file_info.filename.endswith(".so"): - so_files.append((file_info.filename, wheel.read(file_info.filename))) + if file_info.filename.endswith(".so") or file_info.filename.endswith(".dylib"): + dynamic_libs.append((file_info.filename, wheel.read(file_info.filename))) - return so_files + return dynamic_libs -def process_so_file_from_wheel(so_filename: str, so_content: bytes, temp_dir: str) -> Union[str, None]: +def process_dynamic_library_from_wheel(lib_filename: str, lib_content: bytes, temp_dir: str) -> Union[str, None]: """ - Process a .so file from a wheel to create debug symbols. + Process a dynamic library (.so or .dylib) from a wheel to create debug symbols. Args: - so_filename: Original filename in the wheel - so_content: Binary content of the .so file + lib_filename: Original filename in the wheel + lib_content: Binary content of the dynamic library file temp_dir: Temporary directory to work in Returns: Path to the created debug symbol file, or None if no debug symbols were created """ - # Create a temporary file for the .so to process it, preserving directory structure - so_path = os.path.join(temp_dir, so_filename) - os.makedirs(os.path.dirname(so_path), exist_ok=True) - with open(so_path, "wb") as f: - f.write(so_content) + # Create a temporary file for the dynamic library to process it, preserving directory structure + lib_path = os.path.join(temp_dir, lib_filename) + os.makedirs(os.path.dirname(lib_path), exist_ok=True) + with open(lib_path, "wb") as f: + f.write(lib_content) - print(f"Processing .so file: {so_filename}") + print(f"Processing dynamic library: {lib_filename}") try: - debug_file = create_and_strip_debug_symbols(so_path) + debug_file = create_and_strip_debug_symbols(lib_path) if debug_file: print(f"Created debug symbols: {debug_file}") return debug_file return None except Exception as e: - print(f"Error processing .so file {so_filename}: {e}") + print(f"Error processing dynamic library {lib_filename}: {e}") return None @@ -149,20 +285,20 @@ def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output return debug_package_path -def update_wheel_with_stripped_so_files(wheel_path: str, temp_dir: str): - """Update the wheel with stripped .so files.""" +def update_wheel_with_stripped_dynamic_libraries(wheel_path: str, temp_dir: str): + """Update the wheel with stripped .so and .dylib files.""" temp_wheel_path = f"{wheel_path}.tmp" - # Create new wheel with stripped .so files + # Create new wheel with stripped dynamic library files with zipfile.ZipFile(wheel_path, "r") as source_wheel, zipfile.ZipFile( temp_wheel_path, "w", zipfile.ZIP_DEFLATED ) as temp_wheel: for file_info in source_wheel.infolist(): - if file_info.filename.endswith(".so"): + if file_info.filename.endswith(".so") or file_info.filename.endswith(".dylib"): # Replace with stripped version, preserving directory structure - stripped_so_path = os.path.join(temp_dir, file_info.filename) - if os.path.exists(stripped_so_path): - with open(stripped_so_path, "rb") as f: + stripped_lib_path = os.path.join(temp_dir, file_info.filename) + if os.path.exists(stripped_lib_path): + with open(stripped_lib_path, "rb") as f: temp_wheel.writestr(file_info.filename, f.read()) else: # If stripping failed, keep original @@ -172,7 +308,7 @@ def update_wheel_with_stripped_so_files(wheel_path: str, temp_dir: str): # Replace original wheel with updated version os.replace(temp_wheel_path, wheel_path) - print(f"Updated wheel with stripped .so files: {wheel_path}") + print(f"Updated wheel with stripped dynamic library files: {wheel_path}") def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional[str]: @@ -184,22 +320,22 @@ def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional print(f"Processing wheel: {wheel_path}") - # Find and read .so files from the wheel - so_files = find_so_files_in_wheel(wheel_path) + # Find and read .so and .dylib files from the wheel + dynamic_libs = find_dynamic_libraries_in_wheel(wheel_path) - if not so_files: - print("No .so files found in wheel") + if not dynamic_libs: + print("No .so or .dylib files found in wheel") return None - print(f"Found {len(so_files)} .so files") + print(f"Found {len(dynamic_libs)} dynamic library files") # Create temporary directory for processing with tempfile.TemporaryDirectory() as temp_dir: debug_files = [] - # Process each .so file from the wheel - for so_filename, so_content in so_files: - debug_file = process_so_file_from_wheel(so_filename, so_content, temp_dir) + # Process each dynamic library file from the wheel + for lib_filename, lib_content in dynamic_libs: + debug_file = process_dynamic_library_from_wheel(lib_filename, lib_content, temp_dir) if debug_file: debug_files.append(debug_file) @@ -207,11 +343,13 @@ def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional print("No debug symbols were created") return None + print(f"Successfully created {len(debug_files)} debug symbol files") + # Create debug symbols package debug_package_path = create_debug_symbols_package(wheel_path, debug_files, output_dir, temp_dir) - # Update wheel with stripped .so files - update_wheel_with_stripped_so_files(wheel_path, temp_dir) + # Update wheel with stripped dynamic library files + update_wheel_with_stripped_dynamic_libraries(wheel_path, temp_dir) return debug_package_path From 543284ec864ade7627a32b2ec908a3a99ff1e7cf Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 16:57:22 -0400 Subject: [PATCH 45/80] fix lto options for appleclang --- .../profiling/cmake/AnalysisFunc.cmake | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index 567ba15208f..67b9545804b 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -27,7 +27,7 @@ function(add_ddup_config target) endif() # Common link options - target_link_options(${target} PRIVATE "$<$:>") + target_link_options(${target} PRIVATE "$<$:-g>") if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") # macOS-specific linker options @@ -46,11 +46,30 @@ function(add_ddup_config target) -Wl,--exclude-libs,ALL) endif() - # If we can IPO, then do so + # If we can IPO, then do so Note: We use thin LTO where supported to preserve debug symbols and match Rust's LTO + # strategy check_ipo_supported(RESULT result) if(result) - set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + # Use thin LTO instead of full LTO for better debug symbol preservation + if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") + # Use thin LTO for AppleClang to preserve debug symbols + target_compile_options(${target} PRIVATE -flto=thin) + target_link_options(${target} PRIVATE -flto=thin) + # On Darwin, preserve LTO object files for debug symbols + target_link_options(${target} PRIVATE -Wl,-object_path_lto,${CMAKE_CURRENT_BINARY_DIR}/${target}_lto.o) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # Use thin LTO for Clang + target_compile_options(${target} PRIVATE -flto=thin) + target_link_options(${target} PRIVATE -flto=thin) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + # Use thin LTO for GCC + target_compile_options(${target} PRIVATE -flto=thin) + target_link_options(${target} PRIVATE -flto=thin) + else() + # Fallback to regular LTO for other compilers + set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() endif() # Propagate sanitizers @@ -85,4 +104,5 @@ function(add_ddup_config target) # The main targets, ddup, crashtracker, stack_v2, and dd_wrapper are built as dynamic libraries, so PIC is required. # And setting this is also fine for tests as they're loading those dynamic libraries. set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) + endfunction() From 8da48c9aa5e884dc8ba1bfda0727e558aae9503e Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 17:16:52 -0400 Subject: [PATCH 46/80] fix lto --- ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index 67b9545804b..3930e1e568a 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -64,8 +64,8 @@ function(add_ddup_config target) target_link_options(${target} PRIVATE -flto=thin) elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") # Use thin LTO for GCC - target_compile_options(${target} PRIVATE -flto=thin) - target_link_options(${target} PRIVATE -flto=thin) + target_compile_options(${target} PRIVATE -flto) + target_link_options(${target} PRIVATE -flto) else() # Fallback to regular LTO for other compilers set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) From bd9a3e7409b4f6b2c90557595cc1e5b740580e8e Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 17:19:23 -0400 Subject: [PATCH 47/80] recursive copy for mac --- scripts/extract_debug_symbols.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index 24f64fffafa..ee4df6ed90c 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -279,7 +279,24 @@ def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output # Add the debug file to the zip, preserving directory structure # The debug_file path is relative to temp_dir, so we need to extract the relative path rel_path = os.path.relpath(debug_file, temp_dir) - debug_zip.write(debug_file, rel_path) + + if os.path.isdir(debug_file): + # For directories (like .dSYM bundles), recursively add all contents + for root, dirs, files in os.walk(debug_file): + # Add directories + for dir_name in dirs: + dir_path = os.path.join(root, dir_name) + arc_path = os.path.relpath(dir_path, temp_dir) + debug_zip.write(dir_path, arc_path) + + # Add files + for file_name in files: + file_path = os.path.join(root, file_name) + arc_path = os.path.relpath(file_path, temp_dir) + debug_zip.write(file_path, arc_path) + else: + # For regular files, add directly + debug_zip.write(debug_file, rel_path) print(f"Created debug symbols package: {debug_package_path}") return debug_package_path From 823b3fb0d94802963506b4b148176b66e0220739 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 17:26:31 -0400 Subject: [PATCH 48/80] use in instead of startswith --- scripts/extract_debug_symbols.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index ee4df6ed90c..aa32e230ea1 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -85,7 +85,7 @@ def verify_debug_file(debug_path: Path) -> bool: # Check if the debug file contains debug sections using objdump try: result = subprocess.run(["objdump", "-h", str(debug_path)], capture_output=True, text=True, check=True) - debug_sections = [line for line in result.stdout.split('\n') if line.strip().startswith('.debug_')] + debug_sections = [line for line in result.stdout.split('\n') if '.debug_' in line] print(f" Found {len(debug_sections)} debug sections") if debug_sections: From 0bae713603555c5d6580744af57498332645b227 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 19 Aug 2025 17:29:43 -0400 Subject: [PATCH 49/80] format --- scripts/extract_debug_symbols.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index aa32e230ea1..ff0fd7e17ef 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -85,11 +85,11 @@ def verify_debug_file(debug_path: Path) -> bool: # Check if the debug file contains debug sections using objdump try: result = subprocess.run(["objdump", "-h", str(debug_path)], capture_output=True, text=True, check=True) - debug_sections = [line for line in result.stdout.split('\n') if '.debug_' in line] + debug_sections = [line for line in result.stdout.split("\n") if ".debug_" in line] print(f" Found {len(debug_sections)} debug sections") if debug_sections: - print(f" Debug sections found:") + print(" Debug sections found:") for section in debug_sections[:5]: # Show first 5 sections print(f" {section.strip()}") if len(debug_sections) > 5: @@ -99,19 +99,19 @@ def verify_debug_file(debug_path: Path) -> bool: # Some debug files might contain other types of debug information if file_size > 1000: # More than 1KB print(f" Warning: No debug sections found, but file has substantial content ({file_size} bytes)") - print(f" Accepting debug file as it may contain other debug information") + print(" Accepting debug file as it may contain other debug information") else: print(f" Error: Debug file contains no debug sections and is too small: {debug_path}") os.remove(debug_path) return False except (subprocess.CalledProcessError, FileNotFoundError): - print(f" Warning: Could not verify debug sections with objdump") + print(" Warning: Could not verify debug sections with objdump") # If we can't verify with objdump, just check that the file has content if file_size > 0: print(f" Debug file has content ({file_size} bytes), assuming it's valid") else: - print(f" Error: Debug file appears to be empty") + print(" Error: Debug file appears to be empty") os.remove(debug_path) return False @@ -138,7 +138,7 @@ def verify_dsym_bundle(dsym_path: Path) -> bool: if not dwarf_dir.exists(): print(f" Error: DWARF directory does not exist: {dwarf_dir}") # List what's actually in the .dSYM bundle - print(f" Contents of .dSYM bundle:") + print(" Contents of .dSYM bundle:") for item in dsym_path.rglob("*"): print(f" {item}") shutil.rmtree(dsym_path, ignore_errors=True) From 9c0bcffc55e51bf9da4563ea582ae1fefb9e7276 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 11:37:00 -0400 Subject: [PATCH 50/80] use taegyunkim/dd-compile-release for serverless benchmarks --- .gitlab/benchmarks/serverless.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/benchmarks/serverless.yml b/.gitlab/benchmarks/serverless.yml index d6d4685558e..6ab75c25b89 100644 --- a/.gitlab/benchmarks/serverless.yml +++ b/.gitlab/benchmarks/serverless.yml @@ -5,6 +5,7 @@ benchmark-serverless: stage: benchmarks trigger: project: DataDog/serverless-tools + branch: taegyunkim/dd-compile-release strategy: depend needs: [] rules: From 357ad7eb2ad7c5e1e117fbdb7bf4a3f33e583689 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 13:24:34 -0400 Subject: [PATCH 51/80] strip debug symbol when release --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index fd6f642bc19..81b389d1f69 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ import warnings import cmake +import setuptools_rust from setuptools_rust import Binding from setuptools_rust import RustExtension from setuptools_rust import build_rust @@ -198,6 +199,7 @@ def __init__(self, attrs=None): py_limited_api="auto", binding=Binding.PyO3, debug=COMPILE_MODE.lower() == "debug", + strip=setuptools_rust.Strip.All if COMPILE_MODE.lower() == "release" else setuptools_rust.Strip.No, features=( ["crashtracker", "profiling"] if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python() else [] ), From 8f0d7b9cf9e9919363da8aac4a8725e3e7aa85af Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 13:44:40 -0400 Subject: [PATCH 52/80] add debug print for compile mode --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 81b389d1f69..208f65172f9 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,8 @@ else: COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", COMPILE_MODE) +print("=========== COMPILE_MODE ===========", COMPILE_MODE) + FAST_BUILD = os.getenv("DD_FAST_BUILD", "false").lower() in ("1", "yes", "on", "true") if FAST_BUILD: print("WARNING: DD_FAST_BUILD is enabled, some optimizations will be disabled") From d7eaa71af3a148fd4a673cf3e40da56d21a97274 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 14:57:05 -0400 Subject: [PATCH 53/80] fix typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 208f65172f9..b2b830c8f53 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ CURRENT_OS = platform.system() -# ON Windows, we build with Release by default, and RelWithDebInfo for other platforms +# On Windows, we build with Release by default, and RelWithDebInfo for other platforms # to generate debug symbols for native extensions. # Note: We strip debug symbols when releasing wheels using scripts/extract_debug_symbols.py COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" From 5bb7ab45ae0e20ab01f6eb9123b0a088e7fbd51a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 15:00:03 -0400 Subject: [PATCH 54/80] update comment --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index b2b830c8f53..aee4d430ce4 100644 --- a/setup.py +++ b/setup.py @@ -201,6 +201,9 @@ def __init__(self, attrs=None): py_limited_api="auto", binding=Binding.PyO3, debug=COMPILE_MODE.lower() == "debug", + # The release build profile sets debug='line-tables-only', to + # build with debug symbols. If one wants a binary without debug + # symbols, one can use DD_COMPILE_MODE=Release. strip=setuptools_rust.Strip.All if COMPILE_MODE.lower() == "release" else setuptools_rust.Strip.No, features=( ["crashtracker", "profiling"] if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python() else [] From e9d7fc29d9a743e2db28ea46781961b70adb6604 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 15:02:52 -0400 Subject: [PATCH 55/80] Revive try_strip_symbols for minsizerel --- setup.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/setup.py b/setup.py index aee4d430ce4..4efc5ee7c09 100644 --- a/setup.py +++ b/setup.py @@ -532,6 +532,20 @@ def build_rust(self): elif CURRENT_OS == "Darwin": subprocess.run(["install_name_tool", "-id", native_name, library], check=True) + @staticmethod + def try_strip_symbols(so_file): + if CURRENT_OS == "Linux" and shutil.which("strip") is not None: + try: + subprocess.run(["strip", "-g", so_file], check=True) + except subprocess.CalledProcessError as e: + print( + "WARNING: stripping '{}' returned non-zero exit status ({}), ignoring".format(so_file, e.returncode) + ) + except Exception as e: + print( + "WARNING: An error occurred while stripping the symbols from '{}', ignoring: {}".format(so_file, e) + ) + def build_extension(self, ext): if isinstance(ext, CMakeExtension): try: @@ -549,6 +563,12 @@ def build_extension(self, ext): else: super().build_extension(ext) + if COMPILE_MODE.lower() == "minsizerel": + try: + self.try_strip.symbols(self.get_ext_fullpath(ext.name)) + except Exception as e: + print(f"WARNING: An error occurred while building the extension: {e}") + def build_extension_cmake(self, ext: "CMakeExtension") -> None: if IS_EDITABLE and self.INCREMENTAL: # DEV: Rudimentary incremental build support. We copy the logic from From ae5d0fc4d3ba21bc24e8de5cb31c9a8740fa0abe Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 14:53:44 -0400 Subject: [PATCH 56/80] remove debug print --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 4efc5ee7c09..7a800c0520d 100644 --- a/setup.py +++ b/setup.py @@ -63,8 +63,6 @@ else: COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", COMPILE_MODE) -print("=========== COMPILE_MODE ===========", COMPILE_MODE) - FAST_BUILD = os.getenv("DD_FAST_BUILD", "false").lower() in ("1", "yes", "on", "true") if FAST_BUILD: print("WARNING: DD_FAST_BUILD is enabled, some optimizations will be disabled") From e2d4f3c725a60a42ec6cc4bcf72997fc2f79d256 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 15:14:41 -0400 Subject: [PATCH 57/80] update comments --- scripts/extract_debug_symbols.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index ff0fd7e17ef..dbf0509fc66 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -3,11 +3,11 @@ Extract debug symbols from wheels and create separate debug symbol packages. This script: -1. Processes each .so file in the wheel -2. Creates debug symbols (.debug files on Linux, .dSYM bundles on macOS) for each .so file -3. Strips debug symbols from the .so files -4. Packages debug symbols into a separate zip file -5. Updates the wheel with stripped .so files +1. Processes each .so/.dylib file in the wheel +2. Creates debug symbols (.debug files on Linux, .dSYM bundles on macOS) for each .so/.dylib file +3. Strips debug symbols from the original .so/.dylib files +4. Packages debug symbols into a separate zip file (with proper recursive copying for .dSYM bundles) +5. Updates the wheel with stripped .so/.dylib files """ import argparse From 3c22db9865bead9d5657015058a1ce5f9b98cdb0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 15:31:12 -0400 Subject: [PATCH 58/80] supply ignore pattern having libddwaf* by default --- scripts/extract_debug_symbols.py | 42 ++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index dbf0509fc66..d681be9d3b5 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -3,7 +3,7 @@ Extract debug symbols from wheels and create separate debug symbol packages. This script: -1. Processes each .so/.dylib file in the wheel +1. Processes each .so/.dylib file in the wheel (excluding files that match ignore patterns) 2. Creates debug symbols (.debug files on Linux, .dSYM bundles on macOS) for each .so/.dylib file 3. Strips debug symbols from the original .so/.dylib files 4. Packages debug symbols into a separate zip file (with proper recursive copying for .dSYM bundles) @@ -11,6 +11,7 @@ """ import argparse +import fnmatch import os from pathlib import Path import platform @@ -224,14 +225,31 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: return None -def find_dynamic_libraries_in_wheel(wheel_path: str) -> List[Tuple[str, bytes]]: - """Find and read .so and .dylib files from a wheel file.""" +def should_ignore_file(filename: str, ignore_patterns: List[str]) -> bool: + """Check if a file should be ignored based on glob patterns.""" + if not ignore_patterns: + return False + + for pattern in ignore_patterns: + if fnmatch.fnmatch(filename, pattern.strip()): + print(f"Ignoring {filename} (matches pattern: {pattern})") + return True + return False + + +def find_dynamic_libraries_in_wheel(wheel_path: str, ignore_patterns: List[str] = None) -> List[Tuple[str, bytes]]: + """Find and read .so and .dylib files from a wheel file, excluding ignored patterns.""" dynamic_libs = [] with zipfile.ZipFile(wheel_path, "r") as wheel: for file_info in wheel.infolist(): if file_info.filename.endswith(".so") or file_info.filename.endswith(".dylib"): - dynamic_libs.append((file_info.filename, wheel.read(file_info.filename))) + # Extract just the filename without path for pattern matching + filename = os.path.basename(file_info.filename) + if not should_ignore_file(filename, ignore_patterns or []): + dynamic_libs.append((file_info.filename, wheel.read(file_info.filename))) + else: + print(f"Skipping {file_info.filename} (matches ignore pattern)") return dynamic_libs @@ -328,7 +346,9 @@ def update_wheel_with_stripped_dynamic_libraries(wheel_path: str, temp_dir: str) print(f"Updated wheel with stripped dynamic library files: {wheel_path}") -def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional[str]: +def process_wheel( + wheel_path: str, output_dir: Optional[str] = None, ignore_patterns: List[str] = None +) -> Optional[str]: """Process a single wheel file.""" if output_dir is None: output_dir = os.path.dirname(wheel_path) @@ -338,7 +358,7 @@ def process_wheel(wheel_path: str, output_dir: Optional[str] = None) -> Optional print(f"Processing wheel: {wheel_path}") # Find and read .so and .dylib files from the wheel - dynamic_libs = find_dynamic_libraries_in_wheel(wheel_path) + dynamic_libs = find_dynamic_libraries_in_wheel(wheel_path, ignore_patterns) if not dynamic_libs: print("No .so or .dylib files found in wheel") @@ -375,6 +395,11 @@ def main(): parser = argparse.ArgumentParser(description="Extract debug symbols from wheels") parser.add_argument("wheel", help="Path to the wheel file") parser.add_argument("--output-dir", "-o", help="Output directory for debug symbol packages") + parser.add_argument( + "--ignore-patterns", + default="libddwaf*", + help="Comma-separated list of glob patterns to ignore (default: libddwaf*)", + ) args = parser.parse_args() @@ -382,8 +407,11 @@ def main(): print(f"Error: Wheel file not found: {args.wheel}") sys.exit(1) + # Parse ignore patterns + ignore_patterns = [p.strip() for p in args.ignore_patterns.split(",") if p.strip()] + try: - debug_package_path = process_wheel(args.wheel, args.output_dir) + debug_package_path = process_wheel(args.wheel, args.output_dir, ignore_patterns) if debug_package_path: print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") else: From 3356d8d63d9764f3e7aef9cc47b38255b2d1447d Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 15:43:25 -0400 Subject: [PATCH 59/80] macOS: error when no debug symbols --- scripts/extract_debug_symbols.py | 38 +++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index d681be9d3b5..ab72278a295 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -44,6 +44,11 @@ def create_dsym_bundle(so_file: str, dsymutil: str) -> Optional[str]: print(f"dsymutil stdout: {result.stdout}") if result.stderr: print(f"dsymutil stderr: {result.stderr}") + if "no debug symbols" in result.stderr: + print( + f" Warning: dsymutil failed to create .dSYM bundle for {so_file} because it has no debug sections" + ) + return None # Verify that the .dSYM bundle was created and contains content if verify_dsym_bundle(dsym_path): @@ -348,8 +353,12 @@ def update_wheel_with_stripped_dynamic_libraries(wheel_path: str, temp_dir: str) def process_wheel( wheel_path: str, output_dir: Optional[str] = None, ignore_patterns: List[str] = None -) -> Optional[str]: - """Process a single wheel file.""" +) -> Tuple[Optional[str], bool]: + """Process a single wheel file. + + Returns: + Tuple of (debug_package_path, success). success is False if no debug symbols were found. + """ if output_dir is None: output_dir = os.path.dirname(wheel_path) @@ -362,23 +371,29 @@ def process_wheel( if not dynamic_libs: print("No .so or .dylib files found in wheel") - return None + return None, True # Success - no files to process print(f"Found {len(dynamic_libs)} dynamic library files") # Create temporary directory for processing with tempfile.TemporaryDirectory() as temp_dir: debug_files = [] + failed_libs = [] # Process each dynamic library file from the wheel for lib_filename, lib_content in dynamic_libs: debug_file = process_dynamic_library_from_wheel(lib_filename, lib_content, temp_dir) if debug_file: debug_files.append(debug_file) + else: + failed_libs.append(lib_filename) - if not debug_files: - print("No debug symbols were created") - return None + if failed_libs: + print("ERROR: Failed to generate debug symbols for the following libraries:") + for lib in failed_libs: + print(f" - {lib}") + print("This indicates that these binaries were built without debug symbols or they were already stripped") + return None, False print(f"Successfully created {len(debug_files)} debug symbol files") @@ -388,7 +403,7 @@ def process_wheel( # Update wheel with stripped dynamic library files update_wheel_with_stripped_dynamic_libraries(wheel_path, temp_dir) - return debug_package_path + return debug_package_path, True def main(): @@ -411,11 +426,14 @@ def main(): ignore_patterns = [p.strip() for p in args.ignore_patterns.split(",") if p.strip()] try: - debug_package_path = process_wheel(args.wheel, args.output_dir, ignore_patterns) - if debug_package_path: + debug_package_path, success = process_wheel(args.wheel, args.output_dir, ignore_patterns) + if not success: + print("ERROR: Failed to extract debug symbols from wheel") + sys.exit(1) + elif debug_package_path: print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") else: - print("No debug symbols were created") + print("No debug symbols were created (no dynamic libraries found)") except Exception as e: print(f"Error processing wheel: {e}") sys.exit(1) From 3de37a2287ab254c934eadae3245020527e5c1a0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 16:05:02 -0400 Subject: [PATCH 60/80] Fail if no debug symbols --- scripts/extract_debug_symbols.py | 75 ++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py index ab72278a295..77cdccbaa65 100644 --- a/scripts/extract_debug_symbols.py +++ b/scripts/extract_debug_symbols.py @@ -8,6 +8,10 @@ 3. Strips debug symbols from the original .so/.dylib files 4. Packages debug symbols into a separate zip file (with proper recursive copying for .dSYM bundles) 5. Updates the wheel with stripped .so/.dylib files + +On Linux, the script will exit with error code 1 if: +- Any input .so file does not contain debug symbols (not built with -g flag) +- Any generated debug file does not contain debug symbols """ import argparse @@ -31,6 +35,29 @@ def get_debug_symbol_patterns(): return ["*.debug", "*.dSYM/*"] +def has_debug_symbols(so_file: str) -> bool: + """Check if a .so file has debug symbols (Linux only).""" + if platform.system() != "Linux": + # On non-Linux platforms, assume debug symbols exist to avoid false positives + return True + + try: + # Use objdump to check for debug sections + result = subprocess.run(["objdump", "-h", so_file], capture_output=True, text=True, check=True) + debug_sections = [line for line in result.stdout.split("\n") if ".debug_" in line] + + if debug_sections: + print(f" Found {len(debug_sections)} debug sections in {so_file}") + return True + else: + print(f" No debug sections found in {so_file}") + return False + except (subprocess.CalledProcessError, FileNotFoundError) as e: + print(f" Warning: Could not check debug symbols in {so_file}: {e}") + # If we can't check, assume it has debug symbols to avoid false positives + return True + + def create_dsym_bundle(so_file: str, dsymutil: str) -> Optional[str]: """Create a .dSYM bundle for a .so file.""" dsym_path = Path(so_file).with_suffix(".dSYM") @@ -68,7 +95,7 @@ def create_dsym_bundle(so_file: str, dsymutil: str) -> Optional[str]: def verify_debug_file(debug_path: Path) -> bool: - """Verify that a Linux .debug file was created successfully and contains content.""" + """Verify that a Linux .debug file was created successfully and contains debug symbols.""" print(f"Verifying debug file: {debug_path}") if not debug_path.exists(): @@ -100,29 +127,19 @@ def verify_debug_file(debug_path: Path) -> bool: print(f" {section.strip()}") if len(debug_sections) > 5: print(f" ... and {len(debug_sections) - 5} more") + print(f"Successfully created debug file: {debug_path}") + return True else: - # If no debug sections found, check if the file has substantial content - # Some debug files might contain other types of debug information - if file_size > 1000: # More than 1KB - print(f" Warning: No debug sections found, but file has substantial content ({file_size} bytes)") - print(" Accepting debug file as it may contain other debug information") - else: - print(f" Error: Debug file contains no debug sections and is too small: {debug_path}") - os.remove(debug_path) - return False - - except (subprocess.CalledProcessError, FileNotFoundError): - print(" Warning: Could not verify debug sections with objdump") - # If we can't verify with objdump, just check that the file has content - if file_size > 0: - print(f" Debug file has content ({file_size} bytes), assuming it's valid") - else: - print(" Error: Debug file appears to be empty") + # No debug sections found - this is an error + print(f" Error: Debug file contains no debug sections: {debug_path}") os.remove(debug_path) return False - print(f"Successfully created debug file: {debug_path}") - return True + except (subprocess.CalledProcessError, FileNotFoundError): + print(" Error: Could not verify debug sections with objdump") + # If we can't verify with objdump, this is an error + os.remove(debug_path) + return False def verify_dsym_bundle(dsym_path: Path) -> bool: @@ -185,6 +202,12 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) return None + # Check if the input .so file has debug symbols + print(f"Checking for debug symbols in: {so_file}") + if not has_debug_symbols(so_file): + print(f"ERROR: {so_file} does not contain debug symbols (not built with -g)") + return None + # Try removing the .llvmbc section from the .so file subprocess.run([objcopy, "--remove-section", ".llvmbc", so_file], check=False) @@ -193,7 +216,7 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: try: subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) - # Verify that the debug file was created and contains content + # Verify that the debug file was created and contains debug symbols if verify_debug_file(Path(debug_out)): # Strip the debug symbols from the .so file subprocess.run([strip, "-g", so_file], check=True) @@ -203,11 +226,11 @@ def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: return debug_out else: - print(f"Warning: Failed to create valid debug file for {so_file}") + print(f"ERROR: Failed to create valid debug file for {so_file}") return None except subprocess.CalledProcessError as e: - print(f"Warning: objcopy failed to create debug file: {e}") + print(f"ERROR: objcopy failed to create debug file: {e}") return None elif current_os == "Darwin": @@ -392,7 +415,10 @@ def process_wheel( print("ERROR: Failed to generate debug symbols for the following libraries:") for lib in failed_libs: print(f" - {lib}") - print("This indicates that these binaries were built without debug symbols or they were already stripped") + print( + "This indicates that these binaries were built without debug symbols (-g flag) " + "or they were already stripped" + ) return None, False print(f"Successfully created {len(debug_files)} debug symbol files") @@ -429,6 +455,7 @@ def main(): debug_package_path, success = process_wheel(args.wheel, args.output_dir, ignore_patterns) if not success: print("ERROR: Failed to extract debug symbols from wheel") + print("This usually means one or more .so files were not built with debug symbols (-g flag)") sys.exit(1) elif debug_package_path: print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") From 20d7a6233387dd0207ea208aa532ed21ff2dd716 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 16:34:43 -0400 Subject: [PATCH 61/80] Revert to use head DataDog/serverless-tools --- .gitlab/benchmarks/serverless.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitlab/benchmarks/serverless.yml b/.gitlab/benchmarks/serverless.yml index 6ab75c25b89..d6d4685558e 100644 --- a/.gitlab/benchmarks/serverless.yml +++ b/.gitlab/benchmarks/serverless.yml @@ -5,7 +5,6 @@ benchmark-serverless: stage: benchmarks trigger: project: DataDog/serverless-tools - branch: taegyunkim/dd-compile-release strategy: depend needs: [] rules: From 58cb4bfe78a9095da2ca5d67ee0a382626f99671 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Wed, 20 Aug 2025 16:36:17 -0400 Subject: [PATCH 62/80] fix typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7a800c0520d..f4b9a525813 100644 --- a/setup.py +++ b/setup.py @@ -563,7 +563,7 @@ def build_extension(self, ext): if COMPILE_MODE.lower() == "minsizerel": try: - self.try_strip.symbols(self.get_ext_fullpath(ext.name)) + self.try_strip_symbols(self.get_ext_fullpath(ext.name)) except Exception as e: print(f"WARNING: An error occurred while building the extension: {e}") From b74d4c277774aeb1928d15e4e71c505990ab77a1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 14:50:10 -0400 Subject: [PATCH 63/80] update comments and dont strip rust binary from here --- setup.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index f4b9a525813..f1374d9747a 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ import warnings import cmake -import setuptools_rust from setuptools_rust import Binding from setuptools_rust import RustExtension from setuptools_rust import build_rust @@ -50,9 +49,20 @@ CURRENT_OS = platform.system() -# On Windows, we build with Release by default, and RelWithDebInfo for other platforms -# to generate debug symbols for native extensions. -# Note: We strip debug symbols when releasing wheels using scripts/extract_debug_symbols.py +# What's meant by each build mode is similar to that from CMake, except that +# non-CMake extensions are by default built with debug symbols. And we build +# with Release by default for Windows. +# Released wheels on Linux and macOS are stripped of debug symbols. We use +# scripts/extract_debug_symbols.py to extract the debug symbols from the wheels. +# C/C++ and Cython extensions built with setuptools.Extension, and +# Cython.Distutils.Extension by default inherits CFLAGS from the Python +# interpreter, and it usually has -O3 -g. So they're built with debug symbols +# by default. +# RustExtension src/native has two build profiles, release and debug, and only +# DD_COMPILE_MODE=Debug will build with debug profile, and rest will build with +# release profile, which also has debug symbols by default. +# And when MinSizeRel is used, we strip the debug symbols from the wheels, +# see try_strip_symbols() below. COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn( @@ -199,10 +209,6 @@ def __init__(self, attrs=None): py_limited_api="auto", binding=Binding.PyO3, debug=COMPILE_MODE.lower() == "debug", - # The release build profile sets debug='line-tables-only', to - # build with debug symbols. If one wants a binary without debug - # symbols, one can use DD_COMPILE_MODE=Release. - strip=setuptools_rust.Strip.All if COMPILE_MODE.lower() == "release" else setuptools_rust.Strip.No, features=( ["crashtracker", "profiling"] if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python() else [] ), From fe34d6534cea5683f4a90d2aca5d007fee1b368a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 14:50:24 -0400 Subject: [PATCH 64/80] no need to set -g in linker options --- ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index 3930e1e568a..de6d57ccd0b 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -26,9 +26,6 @@ function(add_ddup_config target) "$<$:-Os;-ggdb3>" -fno-semantic-interposition) endif() - # Common link options - target_link_options(${target} PRIVATE "$<$:-g>") - if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") # macOS-specific linker options target_link_options(${target} PRIVATE "$<$:-Wl,-dead_strip>") From f10708e03be8dcb0c22f76cf471fc3831a4eb1d1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 14:54:50 -0400 Subject: [PATCH 65/80] minimize diff --- setup.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/setup.py b/setup.py index f1374d9747a..530f6533133 100644 --- a/setup.py +++ b/setup.py @@ -884,9 +884,6 @@ def get_exts_for(name): # Cython is not deprecation-proof "-Wno-deprecated-declarations", ] - elif COMPILE_MODE.lower() == "relwithdebinfo": - # Only add debug symbols, let individual extensions handle optimizations and warnings - debug_compile_args = ["-g"] else: debug_compile_args = [] @@ -908,7 +905,7 @@ def get_exts_for(name): # sure we explicitly set this for normal builds, and explicitly # _unset_ it for debug builds in case the CFLAGS from sysconfig # include -DNDEBUG - + (["-DNDEBUG"] if COMPILE_MODE.lower() != "debug" else ["-UNDEBUG"]) + + (["-DNDEBUG"] if not debug_compile_args else ["-UNDEBUG"]) + ["-D_POSIX_C_SOURCE=200809L", "-std=c11"] + fast_build_args if CURRENT_OS != "Windows" @@ -919,7 +916,7 @@ def get_exts_for(name): "ddtrace.internal._threads", sources=["ddtrace/internal/_threads.cpp"], extra_compile_args=( - debug_compile_args + ["-std=c++17", "-Wall", "-Wextra"] + fast_build_args + ["-std=c++17", "-Wall", "-Wextra"] + fast_build_args if CURRENT_OS != "Windows" else ["/std:c++20", "/MT"] ), @@ -932,7 +929,7 @@ def get_exts_for(name): sources=[ "ddtrace/appsec/_iast/_stacktrace.c", ], - extra_compile_args=extra_compile_args + debug_compile_args + fast_build_args, + extra_compile_args=extra_compile_args + fast_build_args, ) ) ext_modules.append( @@ -941,7 +938,7 @@ def get_exts_for(name): sources=[ "ddtrace/appsec/_iast/_ast/iastpatch.c", ], - extra_compile_args=extra_compile_args + debug_compile_args + fast_build_args, + extra_compile_args=extra_compile_args + fast_build_args, ) ) ext_modules.append( @@ -1011,13 +1008,11 @@ def get_exts_for(name): "ddtrace.internal._rand", sources=["ddtrace/internal/_rand.pyx"], language="c", - extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.internal._tagset", sources=["ddtrace/internal/_tagset.pyx"], language="c", - extra_compile_args=debug_compile_args, ), Extension( "ddtrace.internal._encoding", @@ -1025,13 +1020,11 @@ def get_exts_for(name): include_dirs=["."], libraries=encoding_libraries, define_macros=[(f"__{sys.byteorder.upper()}_ENDIAN__", "1")], - extra_compile_args=debug_compile_args, ), Extension( "ddtrace.internal.telemetry.metrics_namespaces", ["ddtrace/internal/telemetry/metrics_namespaces.pyx"], language="c", - extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling.collector.stack", @@ -1041,27 +1034,22 @@ def get_exts_for(name): # OTOH, the MSVC toolchain is different. In a perfect world we'd deduce the underlying # toolchain and emit the right flags, but as a compromise we assume Windows implies MSVC and # everything else is on a GNU-like toolchain - extra_compile_args=debug_compile_args - + extra_compile_args - + (["-Wno-int-conversion"] if CURRENT_OS != "Windows" else []), + extra_compile_args=extra_compile_args + (["-Wno-int-conversion"] if CURRENT_OS != "Windows" else []), ), Cython.Distutils.Extension( "ddtrace.profiling.collector._traceback", sources=["ddtrace/profiling/collector/_traceback.pyx"], language="c", - extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling._threading", sources=["ddtrace/profiling/_threading.pyx"], language="c", - extra_compile_args=debug_compile_args, ), Cython.Distutils.Extension( "ddtrace.profiling.collector._task", sources=["ddtrace/profiling/collector/_task.pyx"], language="c", - extra_compile_args=debug_compile_args, ), ], compile_time_env={ From 4b8a6455b69f2f2fbd4264f66229e6608245fce3 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 15:01:18 -0400 Subject: [PATCH 66/80] minimize diff --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 530f6533133..c10f2052d94 100644 --- a/setup.py +++ b/setup.py @@ -929,7 +929,7 @@ def get_exts_for(name): sources=[ "ddtrace/appsec/_iast/_stacktrace.c", ], - extra_compile_args=extra_compile_args + fast_build_args, + extra_compile_args=extra_compile_args + debug_compile_args + fast_build_args, ) ) ext_modules.append( @@ -938,7 +938,7 @@ def get_exts_for(name): sources=[ "ddtrace/appsec/_iast/_ast/iastpatch.c", ], - extra_compile_args=extra_compile_args + fast_build_args, + extra_compile_args=extra_compile_args + debug_compile_args + fast_build_args, ) ) ext_modules.append( From 91321e55977bbfd66e9622cf04fef06eda6e7e6b Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 15:07:05 -0400 Subject: [PATCH 67/80] update comment --- ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index de6d57ccd0b..5d5cd7e22d0 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -43,8 +43,8 @@ function(add_ddup_config target) -Wl,--exclude-libs,ALL) endif() - # If we can IPO, then do so Note: We use thin LTO where supported to preserve debug symbols and match Rust's LTO - # strategy + # If we can IPO, then do so. We use thin LTO where supported to preserve debug symbols and match Rust's + # LTO strategy. check_ipo_supported(RESULT result) if(result) From 3dddf3736002edfcc07e5dc6a3c68d56db30cbc0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 15:12:26 -0400 Subject: [PATCH 68/80] remove unnecessary changes --- .../datadog/profiling/cmake/AnalysisFunc.cmake | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index 5d5cd7e22d0..f3b68ecbf32 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -48,23 +48,13 @@ function(add_ddup_config target) check_ipo_supported(RESULT result) if(result) - # Use thin LTO instead of full LTO for better debug symbol preservation if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") - # Use thin LTO for AppleClang to preserve debug symbols + # When using AppleClang, explicitly use thin LTO and set the object path for debug symbols. + # To match Rust native extension's thin LTO strategy. target_compile_options(${target} PRIVATE -flto=thin) target_link_options(${target} PRIVATE -flto=thin) - # On Darwin, preserve LTO object files for debug symbols target_link_options(${target} PRIVATE -Wl,-object_path_lto,${CMAKE_CURRENT_BINARY_DIR}/${target}_lto.o) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # Use thin LTO for Clang - target_compile_options(${target} PRIVATE -flto=thin) - target_link_options(${target} PRIVATE -flto=thin) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - # Use thin LTO for GCC - target_compile_options(${target} PRIVATE -flto) - target_link_options(${target} PRIVATE -flto) else() - # Fallback to regular LTO for other compilers set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif() endif() From 2c797349ddc5148affec867b34efe061b715d0c1 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 15:23:46 -0400 Subject: [PATCH 69/80] minimize diff and add comment --- ddtrace/appsec/_iast/_taint_tracking/native.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index 9c5616a2505..5adbbe9f3d2 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -41,6 +41,7 @@ static PyMethodDef AspectsMethods[] = { { nullptr, nullptr, 0, nullptr } }; +// Mark the module as used to prevent it from being stripped. static struct PyModuleDef aspects __attribute__((used)) = { PyModuleDef_HEAD_INIT, .m_name = PY_MODULE_NAME_ASPECTS, .m_doc = "Taint tracking Aspects", @@ -53,6 +54,7 @@ static PyMethodDef OpsMethods[] = { { nullptr, nullptr, 0, nullptr } }; +// Mark the module as used to prevent it from being stripped. static struct PyModuleDef ops __attribute__((used)) = { PyModuleDef_HEAD_INIT, .m_name = PY_MODULE_NAME_ASPECTS, .m_doc = "Taint tracking operations", @@ -88,9 +90,4 @@ PYBIND11_MODULE(_native, m) PyObject* hm_ops = PyModule_Create(&ops); m.add_object("ops", hm_ops); - - // Explicitly reference the module definitions to prevent them from being stripped - // This ensures the symbols are preserved even when debug symbols are stripped - (void)&ops; - (void)&aspects; } From c1949ff69e0874f578cbb61ddcbaad957c3940ec Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 21 Aug 2025 15:37:59 -0400 Subject: [PATCH 70/80] format --- .../internal/datadog/profiling/cmake/AnalysisFunc.cmake | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index f3b68ecbf32..1a7d66493c9 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -43,14 +43,13 @@ function(add_ddup_config target) -Wl,--exclude-libs,ALL) endif() - # If we can IPO, then do so. We use thin LTO where supported to preserve debug symbols and match Rust's - # LTO strategy. + # If we can IPO, then do so. check_ipo_supported(RESULT result) if(result) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") - # When using AppleClang, explicitly use thin LTO and set the object path for debug symbols. - # To match Rust native extension's thin LTO strategy. + # When using AppleClang, explicitly use thin LTO to match Rust's thin LTO strategy. And set the object path + # for debug symbols. target_compile_options(${target} PRIVATE -flto=thin) target_link_options(${target} PRIVATE -flto=thin) target_link_options(${target} PRIVATE -Wl,-object_path_lto,${CMAKE_CURRENT_BINARY_DIR}/${target}_lto.o) From 8d651c7e0d8ac5a36cbfbb201593f58321650ffe Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:21:38 -0400 Subject: [PATCH 71/80] update docs on how to set substitute-path --- docs/debug_symbols.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index 517057e5352..7e6e00188d7 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -40,3 +40,33 @@ To use debug symbols for debugging or crash analysis: 2. Extract the debug symbol files to the same directory as the corresponding `.so` files. Typically, the site-packages directory where ddtrace is installed. 3. Your debugger or crash analysis tool should automatically find the debug symbols +4. To view assembly with code side by side, you also need the source code, and + set substitute paths in your debugger to the source code directory. For example, + for `_stack_v2.cpython-313-x86_64-linux-gnu.so` is mostly compiled from + echion as specified in `ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt`. + So you first need to check out the echion repository and checkout the commit hash. + Then, set substitute paths in gdb to the echion source code directory. + Typically, if you run `dias /m ` in gdb, it will tell you the full + file path of the source code as the following: + ``` +(gdb) disas /m Frame::read +Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: +269 /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc: No such file or directory. + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + +270 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc +271 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc + ``` + Then you can set substitute paths in gdb to the echion source code directory + ``` + (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code + ``` + Then you can run `dias /m Frame::read` again to see the assembly with code side by side. + ``` + (gdb) disas /m Frame::read + ``` From 8337a61fecc9aa142e3cbad633ae62ae607330ec Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:23:08 -0400 Subject: [PATCH 72/80] update with expected output --- docs/debug_symbols.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index 7e6e00188d7..824d01e44f3 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -69,4 +69,30 @@ Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: Then you can run `dias /m Frame::read` again to see the assembly with code side by side. ``` (gdb) disas /m Frame::read + Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: + warning: Source file is more recent than executable. + 269 { + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + + 270 #if PY_VERSION_HEX >= 0x030b0000 + 271 _PyInterpreterFrame iframe; + + 272 #if PY_VERSION_HEX >= 0x030d0000 + 273 // From Python versions 3.13, f_executable can have objects other than + 274 // code objects for an internal frame. We need to skip some frames if + 275 // its f_executable is not code as suggested here: + 276 // https://github.com/python/cpython/issues/100987#issuecomment-1485556487 + 277 PyObject f_executable; + + 278 + 279 for (; frame_addr; frame_addr = frame_addr->previous) + 0x000000000000ecf7 <+19>: test %r8,%r8 + 0x000000000000ecfa <+22>: je 0xed91 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+173> + 0x000000000000ed88 <+164>: mov 0x8(%rbx),%r8 + 0x000000000000ed8c <+168>: jmp 0xecf7 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+19> ``` From 27d04b4237a0b872a40c527a921fa55858e17201 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:25:02 -0400 Subject: [PATCH 73/80] proper code formatting --- docs/debug_symbols.rst | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index 824d01e44f3..cb895dacc48 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -13,9 +13,9 @@ The project generates debug symbols during the build process: These debug symbols are extracted from the main wheels and packaged into separate `.zip` files with the naming convention: -``` -{original-wheel-name}-debug-symbols.zip -``` +:: + + {original-wheel-name}-debug-symbols.zip For example: - `ddtrace-1.20.0-cp39-cp39-linux_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip` @@ -48,24 +48,26 @@ To use debug symbols for debugging or crash analysis: Then, set substitute paths in gdb to the echion source code directory. Typically, if you run `dias /m ` in gdb, it will tell you the full file path of the source code as the following: - ``` -(gdb) disas /m Frame::read -Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: -269 /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc: No such file or directory. - 0x000000000000ece4 <+0>: push %r12 - 0x000000000000ece6 <+2>: mov %rdi,%r8 - 0x000000000000ece9 <+5>: push %rbp - 0x000000000000ecea <+6>: mov %rsi,%rbp - 0x000000000000eced <+9>: push %rbx - 0x000000000000ecee <+10>: sub $0x60,%rsp - -270 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc -271 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc - ``` + + .. code-block:: bash + + (gdb) disas /m Frame::read + Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: + 269 /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc: No such file or directory. + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + + 270 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc + 271 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc Then you can set substitute paths in gdb to the echion source code directory - ``` - (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code - ``` + + .. code-block:: bash + + (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code Then you can run `dias /m Frame::read` again to see the assembly with code side by side. ``` (gdb) disas /m Frame::read From 377b630bf456083a341464152806b68693b5937d Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:27:41 -0400 Subject: [PATCH 74/80] code formatting --- docs/debug_symbols.rst | 61 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index cb895dacc48..d89372da175 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -42,7 +42,7 @@ To use debug symbols for debugging or crash analysis: 3. Your debugger or crash analysis tool should automatically find the debug symbols 4. To view assembly with code side by side, you also need the source code, and set substitute paths in your debugger to the source code directory. For example, - for `_stack_v2.cpython-313-x86_64-linux-gnu.so` is mostly compiled from + for `_stack_v2.cpython-313-x86_64-linux-gnu.so` is compiled from echion as specified in `ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt`. So you first need to check out the echion repository and checkout the commit hash. Then, set substitute paths in gdb to the echion source code directory. @@ -69,32 +69,33 @@ To use debug symbols for debugging or crash analysis: (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code Then you can run `dias /m Frame::read` again to see the assembly with code side by side. - ``` - (gdb) disas /m Frame::read - Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: - warning: Source file is more recent than executable. - 269 { - 0x000000000000ece4 <+0>: push %r12 - 0x000000000000ece6 <+2>: mov %rdi,%r8 - 0x000000000000ece9 <+5>: push %rbp - 0x000000000000ecea <+6>: mov %rsi,%rbp - 0x000000000000eced <+9>: push %rbx - 0x000000000000ecee <+10>: sub $0x60,%rsp - - 270 #if PY_VERSION_HEX >= 0x030b0000 - 271 _PyInterpreterFrame iframe; - - 272 #if PY_VERSION_HEX >= 0x030d0000 - 273 // From Python versions 3.13, f_executable can have objects other than - 274 // code objects for an internal frame. We need to skip some frames if - 275 // its f_executable is not code as suggested here: - 276 // https://github.com/python/cpython/issues/100987#issuecomment-1485556487 - 277 PyObject f_executable; - - 278 - 279 for (; frame_addr; frame_addr = frame_addr->previous) - 0x000000000000ecf7 <+19>: test %r8,%r8 - 0x000000000000ecfa <+22>: je 0xed91 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+173> - 0x000000000000ed88 <+164>: mov 0x8(%rbx),%r8 - 0x000000000000ed8c <+168>: jmp 0xecf7 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+19> - ``` + + .. code-block:: bash + + (gdb) disas /m Frame::read + Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: + warning: Source file is more recent than executable. + 269 { + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + + 270 #if PY_VERSION_HEX >= 0x030b0000 + 271 _PyInterpreterFrame iframe; + + 272 #if PY_VERSION_HEX >= 0x030d0000 + 273 // From Python versions 3.13, f_executable can have objects other than + 274 // code objects for an internal frame. We need to skip some frames if + 275 // its f_executable is not code as suggested here: + 276 // https://github.com/python/cpython/issues/100987#issuecomment-1485556487 + 277 PyObject f_executable; + + 278 + 279 for (; frame_addr; frame_addr = frame_addr->previous) + 0x000000000000ecf7 <+19>: test %r8,%r8 + 0x000000000000ecfa <+22>: je 0xed91 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+173> + 0x000000000000ed88 <+164>: mov 0x8(%rbx),%r8 + 0x000000000000ed8c <+168>: jmp 0xecf7 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+19> From 8166378317e7d41b3b812e5c345aa74e12234d78 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:30:30 -0400 Subject: [PATCH 75/80] inline formats --- docs/debug_symbols.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index d89372da175..e1eb15269f5 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -18,8 +18,9 @@ These debug symbols are extracted from the main wheels and packaged into separat {original-wheel-name}-debug-symbols.zip For example: -- `ddtrace-1.20.0-cp39-cp39-linux_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip` -- `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl` → `ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip` + +- ``ddtrace-1.20.0-cp39-cp39-linux_x86_64.whl`` → ``ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip`` +- ``ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl`` → ``ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip`` Build Process ------------- @@ -27,7 +28,7 @@ Build Process The debug symbols are handled automatically during the CI build process: 1. Wheels are built with debug symbols included -2. Debug symbols are extracted using the `scripts/extract_debug_symbols.py` script +2. Debug symbols are extracted using the ``scripts/extract_debug_symbols.py`` script 3. Debug symbols are removed from the main wheel to reduce size 4. Separate debug symbol packages are created and uploaded as artifacts @@ -42,11 +43,11 @@ To use debug symbols for debugging or crash analysis: 3. Your debugger or crash analysis tool should automatically find the debug symbols 4. To view assembly with code side by side, you also need the source code, and set substitute paths in your debugger to the source code directory. For example, - for `_stack_v2.cpython-313-x86_64-linux-gnu.so` is compiled from - echion as specified in `ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt`. + for ``_stack_v2.cpython-313-x86_64-linux-gnu.so`` is compiled from + echion as specified in ``ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt``. So you first need to check out the echion repository and checkout the commit hash. Then, set substitute paths in gdb to the echion source code directory. - Typically, if you run `dias /m ` in gdb, it will tell you the full + Typically, if you run ``dias /m `` in gdb, it will tell you the full file path of the source code as the following: .. code-block:: bash From d0c5007ece26f7f3dd48d6a3bb99e66c627fecf0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:31:49 -0400 Subject: [PATCH 76/80] more inline --- docs/debug_symbols.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index e1eb15269f5..5f0e8bb9e4d 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -8,8 +8,8 @@ Debug Symbol Files The project generates debug symbols during the build process: -- **Linux**: `.debug` files (using `objcopy --only-keep-debug`) -- **macOS**: `.dSYM` bundles (using `dsymutil`) +- **Linux**: ``.debug`` files (using ``objcopy --only-keep-debug``) +- **macOS**: ``.dSYM`` bundles (using ``dsymutil``) These debug symbols are extracted from the main wheels and packaged into separate `.zip` files with the naming convention: @@ -69,7 +69,7 @@ To use debug symbols for debugging or crash analysis: .. code-block:: bash (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code - Then you can run `dias /m Frame::read` again to see the assembly with code side by side. + Then you can run ``dias /m Frame::read`` again to see the assembly with code side by side. .. code-block:: bash From 0f51076efb8e75ec71b0c18ab86bab0da834c1fd Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 15:44:01 -0400 Subject: [PATCH 77/80] few more edits to docs --- docs/debug_symbols.rst | 8 +++++--- docs/spelling_wordlist.txt | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index 5f0e8bb9e4d..6f79c67f2f6 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -1,5 +1,5 @@ -Debug Symbols Packaging -======================= +Debugging Native Extensions with Debug Symbols +============================================== dd-trace-py is built with debug symbols by default, and packaged separately from the main wheel files to reduce the size of the primary distribution packages. @@ -64,12 +64,14 @@ To use debug symbols for debugging or crash analysis: 270 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc 271 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc + Then you can set substitute paths in gdb to the echion source code directory .. code-block:: bash (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code - Then you can run ``dias /m Frame::read`` again to see the assembly with code side by side. + + Run ``dias /m Frame::read`` again to see the assembly with code side by side. .. code-block:: bash diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index fe3e513d0a0..09265483f3d 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -95,6 +95,7 @@ dramatiq Dramatiq dsn dunder +echion eg elasticsearch elasticsearch1 @@ -116,6 +117,7 @@ flamegraph fnmatch formatter freezegun +gdb genai generativeai gevent @@ -340,4 +342,4 @@ wsgi xfail yaaredis openai-agents -validators \ No newline at end of file +validators From 238a5251a2b4d759f2215c8f4e998f12bb2d24a0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 16:04:54 -0400 Subject: [PATCH 78/80] lldb instructions --- docs/debug_symbols.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst index 6f79c67f2f6..bc32ee3de41 100644 --- a/docs/debug_symbols.rst +++ b/docs/debug_symbols.rst @@ -102,3 +102,6 @@ To use debug symbols for debugging or crash analysis: 0x000000000000ecfa <+22>: je 0xed91 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+173> 0x000000000000ed88 <+164>: mov 0x8(%rbx),%r8 0x000000000000ed8c <+168>: jmp 0xecf7 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+19> + + On lldb, you can find the source code full path by running ``image lookup -n Frame::read --verbose``, + and set the source code path using ``settings set target.source-map ``. From 0921930fa9728a04ce41a31415b30db855f8b545 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 16:56:23 -0400 Subject: [PATCH 79/80] this is still relevant to strip debug symbols from release build mode for non cmake built binaries --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 20ef159e9db..cf592dc6c6a 100644 --- a/setup.py +++ b/setup.py @@ -569,7 +569,7 @@ def build_extension(self, ext): else: super().build_extension(ext) - if COMPILE_MODE.lower() == "minsizerel": + if COMPILE_MODE.lower() in ("release", "minsizerel"): try: self.try_strip_symbols(self.get_ext_fullpath(ext.name)) except Exception as e: From dccf04ea7c986fd5ca2a7a6f19949d79f4e806a2 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Thu, 4 Sep 2025 16:57:26 -0400 Subject: [PATCH 80/80] update comment --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index cf592dc6c6a..5a55b7f1867 100644 --- a/setup.py +++ b/setup.py @@ -61,8 +61,8 @@ # RustExtension src/native has two build profiles, release and debug, and only # DD_COMPILE_MODE=Debug will build with debug profile, and rest will build with # release profile, which also has debug symbols by default. -# And when MinSizeRel is used, we strip the debug symbols from the wheels, -# see try_strip_symbols() below. +# And when MinSizeRel or Release is used, we strip the debug symbols from the +# wheels, see try_strip_symbols() below. COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn(