diff --git a/.github/workflows/build_python_3.yml b/.github/workflows/build_python_3.yml index 303e2576221..cc60103fb27 100644 --- a/.github/workflows/build_python_3.yml +++ b/.github/workflows/build_python_3.yml @@ -70,7 +70,13 @@ jobs: # `platform.mac_ver()` reports incorrect MacOS version at 11.0 # See: https://stackoverflow.com/a/65402241 CIBW_ENVIRONMENT_MACOS: CMAKE_BUILD_PARALLEL_LEVEL=24 SYSTEM_VERSION_COMPAT=0 CMAKE_ARGS="-DNATIVE_TESTING=OFF" + # cibuildwheel repair will copy anything's under /output directory from the + # build container to the host machine. This is a bit hacky way, but seems + # to be the only way getting debug symbols out from the container while + # we don't mess up with RECORD file. CIBW_REPAIR_WHEEL_COMMAND_LINUX: | + mkdir -p /output/debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir /output/debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && mkdir ./tempwheelhouse && unzip -l {wheel} | grep '\.so' && @@ -78,6 +84,8 @@ jobs: mv ./tempwheelhouse/*.whl {dest_dir} && rm -rf ./tempwheelhouse CIBW_REPAIR_WHEEL_COMMAND_MACOS: | + mkdir -p ./debugwheelhouse && + python scripts/extract_debug_symbols.py {wheel} --output-dir ./debugwheelhouse && python scripts/zip_filter.py {wheel} \*.c \*.cpp \*.cc \*.h \*.hpp \*.pyx \*.md && MACOSX_DEPLOYMENT_TARGET=12.7 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: python scripts/zip_filter.py "{wheel}" "*.c" "*.cpp" "*.cc" "*.h" "*.hpp" "*.pyx" "*.md" && mv "{wheel}" "{dest_dir}" @@ -126,3 +134,11 @@ jobs: with: name: wheels-${{ env.ARTIFACT_NAME }} path: ./wheelhouse/*.whl + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + if: runner.os != 'Windows' + with: + name: debug-symbols-${{ env.ARTIFACT_NAME }} + path: | + ./debugwheelhouse/*.zip + ./wheelhouse/debugwheelhouse/*.zip diff --git a/.gitignore b/.gitignore index 56222903b81..2bd3cd4fef5 100644 --- a/.gitignore +++ b/.gitignore @@ -195,3 +195,6 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt # .env file .env .envrc + +*.debug +*.dSYM/ diff --git a/.gitlab/download-wheels-from-gh-actions.sh b/.gitlab/download-wheels-from-gh-actions.sh index 547c1cd92f2..2792e8754fe 100755 --- a/.gitlab/download-wheels-from-gh-actions.sh +++ b/.gitlab/download-wheels-from-gh-actions.sh @@ -67,7 +67,7 @@ fi echo "Github workflow finished. Downloading wheels" # download all wheels -gh run download $RUN_ID --repo DataDog/dd-trace-py +gh run download $RUN_ID --repo DataDog/dd-trace-py --pattern "wheels-*" --pattern "source-dist*" cd .. diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index 7faa7ea845e..14e3ce4323d 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -42,11 +42,12 @@ static PyMethodDef AspectsMethods[] = { { nullptr, nullptr, 0, nullptr } }; -static struct PyModuleDef aspects = { PyModuleDef_HEAD_INIT, - .m_name = PY_MODULE_NAME_ASPECTS, - .m_doc = "Taint tracking Aspects", - .m_size = -1, - .m_methods = AspectsMethods }; +// Mark the module as used to prevent it from being stripped. +static struct PyModuleDef aspects __attribute__((used)) = { PyModuleDef_HEAD_INIT, + .m_name = PY_MODULE_NAME_ASPECTS, + .m_doc = "Taint tracking Aspects", + .m_size = -1, + .m_methods = AspectsMethods }; static PyMethodDef OpsMethods[] = { { "new_pyobject_id", (PyCFunction)api_new_pyobject_id, METH_FASTCALL, "new pyobject id" }, @@ -55,11 +56,12 @@ static PyMethodDef OpsMethods[] = { { nullptr, nullptr, 0, nullptr } }; -static struct PyModuleDef ops = { PyModuleDef_HEAD_INIT, - .m_name = PY_MODULE_NAME_ASPECTS, - .m_doc = "Taint tracking operations", - .m_size = -1, - .m_methods = OpsMethods }; +// Mark the module as used to prevent it from being stripped. +static struct PyModuleDef ops __attribute__((used)) = { PyModuleDef_HEAD_INIT, + .m_name = PY_MODULE_NAME_ASPECTS, + .m_doc = "Taint tracking operations", + .m_size = -1, + .m_methods = OpsMethods }; /** * This function initializes the native module. diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake index 567ba15208f..1a7d66493c9 100644 --- a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -26,9 +26,6 @@ function(add_ddup_config target) "$<$:-Os;-ggdb3>" -fno-semantic-interposition) endif() - # Common link options - target_link_options(${target} PRIVATE "$<$:>") - if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") # macOS-specific linker options target_link_options(${target} PRIVATE "$<$:-Wl,-dead_strip>") @@ -46,11 +43,19 @@ function(add_ddup_config target) -Wl,--exclude-libs,ALL) endif() - # If we can IPO, then do so + # If we can IPO, then do so. check_ipo_supported(RESULT result) if(result) - set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") + # When using AppleClang, explicitly use thin LTO to match Rust's thin LTO strategy. And set the object path + # for debug symbols. + target_compile_options(${target} PRIVATE -flto=thin) + target_link_options(${target} PRIVATE -flto=thin) + target_link_options(${target} PRIVATE -Wl,-object_path_lto,${CMAKE_CURRENT_BINARY_DIR}/${target}_lto.o) + else() + set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() endif() # Propagate sanitizers @@ -85,4 +90,5 @@ function(add_ddup_config target) # The main targets, ddup, crashtracker, stack_v2, and dd_wrapper are built as dynamic libraries, so PIC is required. # And setting this is also fine for tests as they're loading those dynamic libraries. set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) + endfunction() diff --git a/docs/debug_symbols.rst b/docs/debug_symbols.rst new file mode 100644 index 00000000000..bc32ee3de41 --- /dev/null +++ b/docs/debug_symbols.rst @@ -0,0 +1,107 @@ +Debugging Native Extensions with Debug Symbols +============================================== + +dd-trace-py is built with debug symbols by default, and packaged separately from the main wheel files to reduce the size of the primary distribution packages. + +Debug Symbol Files +------------------ + +The project generates debug symbols during the build process: + +- **Linux**: ``.debug`` files (using ``objcopy --only-keep-debug``) +- **macOS**: ``.dSYM`` bundles (using ``dsymutil``) + +These debug symbols are extracted from the main wheels and packaged into separate `.zip` files with the naming convention: + +:: + + {original-wheel-name}-debug-symbols.zip + +For example: + +- ``ddtrace-1.20.0-cp39-cp39-linux_x86_64.whl`` → ``ddtrace-1.20.0-cp39-cp39-linux_x86_64-debug-symbols.zip`` +- ``ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64.whl`` → ``ddtrace-1.20.0-cp39-cp39-macosx_10_9_x86_64-debug-symbols.zip`` + +Build Process +------------- + +The debug symbols are handled automatically during the CI build process: + +1. Wheels are built with debug symbols included +2. Debug symbols are extracted using the ``scripts/extract_debug_symbols.py`` script +3. Debug symbols are removed from the main wheel to reduce size +4. Separate debug symbol packages are created and uploaded as artifacts + +Usage +----- + +To use debug symbols for debugging or crash analysis: + +1. Download the appropriate debug symbol package for your platform and Python version +2. Extract the debug symbol files to the same directory as the corresponding `.so` files. + Typically, the site-packages directory where ddtrace is installed. +3. Your debugger or crash analysis tool should automatically find the debug symbols +4. To view assembly with code side by side, you also need the source code, and + set substitute paths in your debugger to the source code directory. For example, + for ``_stack_v2.cpython-313-x86_64-linux-gnu.so`` is compiled from + echion as specified in ``ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt``. + So you first need to check out the echion repository and checkout the commit hash. + Then, set substitute paths in gdb to the echion source code directory. + Typically, if you run ``dias /m `` in gdb, it will tell you the full + file path of the source code as the following: + + .. code-block:: bash + + (gdb) disas /m Frame::read + Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: + 269 /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc: No such file or directory. + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + + 270 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc + 271 in /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion/frame.cc + + Then you can set substitute paths in gdb to the echion source code directory + + .. code-block:: bash + + (gdb) set substitute-path /project/build/cmake.linux-x86_64-cpython-313/ddtrace.internal.datadog.profiling.stack_v2._stack_v2/_deps/echion-src/echion /path/to/echion/source/code + + Run ``dias /m Frame::read`` again to see the assembly with code side by side. + + .. code-block:: bash + + (gdb) disas /m Frame::read + Dump of assembler code for function _ZN5Frame4readEP19_PyInterpreterFramePS1_: + warning: Source file is more recent than executable. + 269 { + 0x000000000000ece4 <+0>: push %r12 + 0x000000000000ece6 <+2>: mov %rdi,%r8 + 0x000000000000ece9 <+5>: push %rbp + 0x000000000000ecea <+6>: mov %rsi,%rbp + 0x000000000000eced <+9>: push %rbx + 0x000000000000ecee <+10>: sub $0x60,%rsp + + 270 #if PY_VERSION_HEX >= 0x030b0000 + 271 _PyInterpreterFrame iframe; + + 272 #if PY_VERSION_HEX >= 0x030d0000 + 273 // From Python versions 3.13, f_executable can have objects other than + 274 // code objects for an internal frame. We need to skip some frames if + 275 // its f_executable is not code as suggested here: + 276 // https://github.com/python/cpython/issues/100987#issuecomment-1485556487 + 277 PyObject f_executable; + + 278 + 279 for (; frame_addr; frame_addr = frame_addr->previous) + 0x000000000000ecf7 <+19>: test %r8,%r8 + 0x000000000000ecfa <+22>: je 0xed91 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+173> + 0x000000000000ed88 <+164>: mov 0x8(%rbx),%r8 + 0x000000000000ed8c <+168>: jmp 0xecf7 <_ZN5Frame4readEP19_PyInterpreterFramePS1_+19> + + On lldb, you can find the source code full path by running ``image lookup -n Frame::read --verbose``, + and set the source code path using ``settings set target.source-map ``. diff --git a/docs/index.rst b/docs/index.rst index 1b6766e53b6..ef35f9f8cd8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -283,6 +283,7 @@ Indices and tables basic_usage advanced_usage build_system + debug_symbols benchmarks contributing troubleshooting diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index fe3e513d0a0..09265483f3d 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -95,6 +95,7 @@ dramatiq Dramatiq dsn dunder +echion eg elasticsearch elasticsearch1 @@ -116,6 +117,7 @@ flamegraph fnmatch formatter freezegun +gdb genai generativeai gevent @@ -340,4 +342,4 @@ wsgi xfail yaaredis openai-agents -validators \ No newline at end of file +validators diff --git a/scripts/extract_debug_symbols.py b/scripts/extract_debug_symbols.py new file mode 100644 index 00000000000..77cdccbaa65 --- /dev/null +++ b/scripts/extract_debug_symbols.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +""" +Extract debug symbols from wheels and create separate debug symbol packages. + +This script: +1. Processes each .so/.dylib file in the wheel (excluding files that match ignore patterns) +2. Creates debug symbols (.debug files on Linux, .dSYM bundles on macOS) for each .so/.dylib file +3. Strips debug symbols from the original .so/.dylib files +4. Packages debug symbols into a separate zip file (with proper recursive copying for .dSYM bundles) +5. Updates the wheel with stripped .so/.dylib files + +On Linux, the script will exit with error code 1 if: +- Any input .so file does not contain debug symbols (not built with -g flag) +- Any generated debug file does not contain debug symbols +""" + +import argparse +import fnmatch +import os +from pathlib import Path +import platform +import shutil +import subprocess +import sys +import tempfile +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union +import zipfile + + +def get_debug_symbol_patterns(): + """Get file patterns for debug symbols based on platform.""" + return ["*.debug", "*.dSYM/*"] + + +def has_debug_symbols(so_file: str) -> bool: + """Check if a .so file has debug symbols (Linux only).""" + if platform.system() != "Linux": + # On non-Linux platforms, assume debug symbols exist to avoid false positives + return True + + try: + # Use objdump to check for debug sections + result = subprocess.run(["objdump", "-h", so_file], capture_output=True, text=True, check=True) + debug_sections = [line for line in result.stdout.split("\n") if ".debug_" in line] + + if debug_sections: + print(f" Found {len(debug_sections)} debug sections in {so_file}") + return True + else: + print(f" No debug sections found in {so_file}") + return False + except (subprocess.CalledProcessError, FileNotFoundError) as e: + print(f" Warning: Could not check debug symbols in {so_file}: {e}") + # If we can't check, assume it has debug symbols to avoid false positives + return True + + +def create_dsym_bundle(so_file: str, dsymutil: str) -> Optional[str]: + """Create a .dSYM bundle for a .so file.""" + dsym_path = Path(so_file).with_suffix(".dSYM") + + print(f"Attempting to create .dSYM bundle for: {so_file}") + print(f"dsymutil command: {dsymutil} {so_file} -o {dsym_path}") + + try: + result = subprocess.run([dsymutil, so_file, "-o", str(dsym_path)], capture_output=True, text=True, check=True) + + print(f"dsymutil stdout: {result.stdout}") + if result.stderr: + print(f"dsymutil stderr: {result.stderr}") + if "no debug symbols" in result.stderr: + print( + f" Warning: dsymutil failed to create .dSYM bundle for {so_file} because it has no debug sections" + ) + return None + + # Verify that the .dSYM bundle was created and contains content + if verify_dsym_bundle(dsym_path): + return str(dsym_path) + else: + print(f"dsymutil succeeded but created empty .dSYM bundle for: {so_file}") + return None + + except subprocess.CalledProcessError as e: + print(f"Warning: dsymutil failed with exit code {e.returncode}") + print(f"dsymutil stdout: {e.stdout}") + print(f"dsymutil stderr: {e.stderr}") + return None + except Exception as e: + print(f"Warning: Error running dsymutil: {e}") + return None + + +def verify_debug_file(debug_path: Path) -> bool: + """Verify that a Linux .debug file was created successfully and contains debug symbols.""" + print(f"Verifying debug file: {debug_path}") + + if not debug_path.exists(): + print(f" Error: Debug file does not exist: {debug_path}") + return False + + if not debug_path.is_file(): + print(f" Error: Debug file is not a regular file: {debug_path}") + return False + + # Check file size + file_size = debug_path.stat().st_size + print(f" Debug file size: {file_size} bytes") + + if file_size == 0: + print(f" Error: Debug file is empty: {debug_path}") + os.remove(debug_path) + return False + + # Check if the debug file contains debug sections using objdump + try: + result = subprocess.run(["objdump", "-h", str(debug_path)], capture_output=True, text=True, check=True) + debug_sections = [line for line in result.stdout.split("\n") if ".debug_" in line] + print(f" Found {len(debug_sections)} debug sections") + + if debug_sections: + print(" Debug sections found:") + for section in debug_sections[:5]: # Show first 5 sections + print(f" {section.strip()}") + if len(debug_sections) > 5: + print(f" ... and {len(debug_sections) - 5} more") + print(f"Successfully created debug file: {debug_path}") + return True + else: + # No debug sections found - this is an error + print(f" Error: Debug file contains no debug sections: {debug_path}") + os.remove(debug_path) + return False + + except (subprocess.CalledProcessError, FileNotFoundError): + print(" Error: Could not verify debug sections with objdump") + # If we can't verify with objdump, this is an error + os.remove(debug_path) + return False + + +def verify_dsym_bundle(dsym_path: Path) -> bool: + """Verify that a .dSYM bundle was created successfully and contains content.""" + print(f"Verifying .dSYM bundle: {dsym_path}") + + if not dsym_path.exists(): + print(f" Error: .dSYM bundle does not exist: {dsym_path}") + return False + + if not dsym_path.is_dir(): + print(f" Error: .dSYM bundle is not a directory: {dsym_path}") + return False + + # Check if the .dSYM bundle contains the expected Contents/Resources/DWARF directory + dwarf_dir = dsym_path / "Contents" / "Resources" / "DWARF" + print(f" Checking for DWARF directory: {dwarf_dir}") + + if not dwarf_dir.exists(): + print(f" Error: DWARF directory does not exist: {dwarf_dir}") + # List what's actually in the .dSYM bundle + print(" Contents of .dSYM bundle:") + for item in dsym_path.rglob("*"): + print(f" {item}") + shutil.rmtree(dsym_path, ignore_errors=True) + return False + + dwarf_files = list(dwarf_dir.iterdir()) + if not dwarf_files: + print(f" Error: DWARF directory is empty: {dwarf_dir}") + shutil.rmtree(dsym_path, ignore_errors=True) + return False + + print(f" Success: Found {len(dwarf_files)} files in DWARF directory") + for dwarf_file in dwarf_files: + print(f" {dwarf_file.name}") + + print(f"Successfully created .dSYM bundle: {dsym_path}") + return True + + +def create_and_strip_debug_symbols(so_file: str) -> Union[str, None]: + """ + Create debug symbols from a shared object and strip them from the original. + + This function replicates the logic from setup.py's try_strip_symbols method. + Returns the path to the created debug symbol file. + """ + current_os = platform.system() + + if current_os == "Linux": + objcopy = shutil.which("objcopy") + strip = shutil.which("strip") + + if not objcopy: + print("WARNING: objcopy not found, skipping symbol stripping", file=sys.stderr) + return None + + if not strip: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + return None + + # Check if the input .so file has debug symbols + print(f"Checking for debug symbols in: {so_file}") + if not has_debug_symbols(so_file): + print(f"ERROR: {so_file} does not contain debug symbols (not built with -g)") + return None + + # Try removing the .llvmbc section from the .so file + subprocess.run([objcopy, "--remove-section", ".llvmbc", so_file], check=False) + + # Then keep the debug symbols in a separate file + debug_out = f"{so_file}.debug" + try: + subprocess.run([objcopy, "--only-keep-debug", so_file, debug_out], check=True) + + # Verify that the debug file was created and contains debug symbols + if verify_debug_file(Path(debug_out)): + # Strip the debug symbols from the .so file + subprocess.run([strip, "-g", so_file], check=True) + + # Link the debug symbols to the .so file + subprocess.run([objcopy, "--add-gnu-debuglink", debug_out, so_file], check=True) + + return debug_out + else: + print(f"ERROR: Failed to create valid debug file for {so_file}") + return None + + except subprocess.CalledProcessError as e: + print(f"ERROR: objcopy failed to create debug file: {e}") + return None + + elif current_os == "Darwin": + dsymutil = shutil.which("dsymutil") + strip = shutil.which("strip") + + debug_path = None + if dsymutil: + # 1) Emit dSYM - let dsymutil handle the detection + debug_path = create_dsym_bundle(so_file, dsymutil) + + if strip: + # Strip DWARF + local symbols + subprocess.run([strip, "-S", "-x", so_file], check=True) + else: + print("WARNING: strip not found, skipping symbol stripping", file=sys.stderr) + + return debug_path + + return None + + +def should_ignore_file(filename: str, ignore_patterns: List[str]) -> bool: + """Check if a file should be ignored based on glob patterns.""" + if not ignore_patterns: + return False + + for pattern in ignore_patterns: + if fnmatch.fnmatch(filename, pattern.strip()): + print(f"Ignoring {filename} (matches pattern: {pattern})") + return True + return False + + +def find_dynamic_libraries_in_wheel(wheel_path: str, ignore_patterns: List[str] = None) -> List[Tuple[str, bytes]]: + """Find and read .so and .dylib files from a wheel file, excluding ignored patterns.""" + dynamic_libs = [] + + with zipfile.ZipFile(wheel_path, "r") as wheel: + for file_info in wheel.infolist(): + if file_info.filename.endswith(".so") or file_info.filename.endswith(".dylib"): + # Extract just the filename without path for pattern matching + filename = os.path.basename(file_info.filename) + if not should_ignore_file(filename, ignore_patterns or []): + dynamic_libs.append((file_info.filename, wheel.read(file_info.filename))) + else: + print(f"Skipping {file_info.filename} (matches ignore pattern)") + + return dynamic_libs + + +def process_dynamic_library_from_wheel(lib_filename: str, lib_content: bytes, temp_dir: str) -> Union[str, None]: + """ + Process a dynamic library (.so or .dylib) from a wheel to create debug symbols. + + Args: + lib_filename: Original filename in the wheel + lib_content: Binary content of the dynamic library file + temp_dir: Temporary directory to work in + + Returns: + Path to the created debug symbol file, or None if no debug symbols were created + """ + # Create a temporary file for the dynamic library to process it, preserving directory structure + lib_path = os.path.join(temp_dir, lib_filename) + os.makedirs(os.path.dirname(lib_path), exist_ok=True) + with open(lib_path, "wb") as f: + f.write(lib_content) + + print(f"Processing dynamic library: {lib_filename}") + + try: + debug_file = create_and_strip_debug_symbols(lib_path) + if debug_file: + print(f"Created debug symbols: {debug_file}") + return debug_file + return None + except Exception as e: + print(f"Error processing dynamic library {lib_filename}: {e}") + return None + + +def create_debug_symbols_package(wheel_path: str, debug_files: List[str], output_dir: str, temp_dir: str) -> str: + """Create a separate debug symbols package.""" + wheel_name = Path(wheel_path).stem + debug_package_name = f"{wheel_name}-debug-symbols.zip" + debug_package_path = os.path.join(output_dir, debug_package_name) + + with zipfile.ZipFile(debug_package_path, "w", zipfile.ZIP_DEFLATED) as debug_zip: + for debug_file in debug_files: + if os.path.exists(debug_file): + # Add the debug file to the zip, preserving directory structure + # The debug_file path is relative to temp_dir, so we need to extract the relative path + rel_path = os.path.relpath(debug_file, temp_dir) + + if os.path.isdir(debug_file): + # For directories (like .dSYM bundles), recursively add all contents + for root, dirs, files in os.walk(debug_file): + # Add directories + for dir_name in dirs: + dir_path = os.path.join(root, dir_name) + arc_path = os.path.relpath(dir_path, temp_dir) + debug_zip.write(dir_path, arc_path) + + # Add files + for file_name in files: + file_path = os.path.join(root, file_name) + arc_path = os.path.relpath(file_path, temp_dir) + debug_zip.write(file_path, arc_path) + else: + # For regular files, add directly + debug_zip.write(debug_file, rel_path) + + print(f"Created debug symbols package: {debug_package_path}") + return debug_package_path + + +def update_wheel_with_stripped_dynamic_libraries(wheel_path: str, temp_dir: str): + """Update the wheel with stripped .so and .dylib files.""" + temp_wheel_path = f"{wheel_path}.tmp" + + # Create new wheel with stripped dynamic library files + with zipfile.ZipFile(wheel_path, "r") as source_wheel, zipfile.ZipFile( + temp_wheel_path, "w", zipfile.ZIP_DEFLATED + ) as temp_wheel: + for file_info in source_wheel.infolist(): + if file_info.filename.endswith(".so") or file_info.filename.endswith(".dylib"): + # Replace with stripped version, preserving directory structure + stripped_lib_path = os.path.join(temp_dir, file_info.filename) + if os.path.exists(stripped_lib_path): + with open(stripped_lib_path, "rb") as f: + temp_wheel.writestr(file_info.filename, f.read()) + else: + # If stripping failed, keep original + temp_wheel.writestr(file_info.filename, source_wheel.read(file_info.filename)) + else: + temp_wheel.writestr(file_info.filename, source_wheel.read(file_info.filename)) + + # Replace original wheel with updated version + os.replace(temp_wheel_path, wheel_path) + print(f"Updated wheel with stripped dynamic library files: {wheel_path}") + + +def process_wheel( + wheel_path: str, output_dir: Optional[str] = None, ignore_patterns: List[str] = None +) -> Tuple[Optional[str], bool]: + """Process a single wheel file. + + Returns: + Tuple of (debug_package_path, success). success is False if no debug symbols were found. + """ + if output_dir is None: + output_dir = os.path.dirname(wheel_path) + + os.makedirs(output_dir, exist_ok=True) + + print(f"Processing wheel: {wheel_path}") + + # Find and read .so and .dylib files from the wheel + dynamic_libs = find_dynamic_libraries_in_wheel(wheel_path, ignore_patterns) + + if not dynamic_libs: + print("No .so or .dylib files found in wheel") + return None, True # Success - no files to process + + print(f"Found {len(dynamic_libs)} dynamic library files") + + # Create temporary directory for processing + with tempfile.TemporaryDirectory() as temp_dir: + debug_files = [] + failed_libs = [] + + # Process each dynamic library file from the wheel + for lib_filename, lib_content in dynamic_libs: + debug_file = process_dynamic_library_from_wheel(lib_filename, lib_content, temp_dir) + if debug_file: + debug_files.append(debug_file) + else: + failed_libs.append(lib_filename) + + if failed_libs: + print("ERROR: Failed to generate debug symbols for the following libraries:") + for lib in failed_libs: + print(f" - {lib}") + print( + "This indicates that these binaries were built without debug symbols (-g flag) " + "or they were already stripped" + ) + return None, False + + print(f"Successfully created {len(debug_files)} debug symbol files") + + # Create debug symbols package + debug_package_path = create_debug_symbols_package(wheel_path, debug_files, output_dir, temp_dir) + + # Update wheel with stripped dynamic library files + update_wheel_with_stripped_dynamic_libraries(wheel_path, temp_dir) + + return debug_package_path, True + + +def main(): + parser = argparse.ArgumentParser(description="Extract debug symbols from wheels") + parser.add_argument("wheel", help="Path to the wheel file") + parser.add_argument("--output-dir", "-o", help="Output directory for debug symbol packages") + parser.add_argument( + "--ignore-patterns", + default="libddwaf*", + help="Comma-separated list of glob patterns to ignore (default: libddwaf*)", + ) + + args = parser.parse_args() + + if not os.path.exists(args.wheel): + print(f"Error: Wheel file not found: {args.wheel}") + sys.exit(1) + + # Parse ignore patterns + ignore_patterns = [p.strip() for p in args.ignore_patterns.split(",") if p.strip()] + + try: + debug_package_path, success = process_wheel(args.wheel, args.output_dir, ignore_patterns) + if not success: + print("ERROR: Failed to extract debug symbols from wheel") + print("This usually means one or more .so files were not built with debug symbols (-g flag)") + sys.exit(1) + elif debug_package_path: + print(f"Successfully processed wheel. Debug symbols saved to: {debug_package_path}") + else: + print("No debug symbols were created (no dynamic libraries found)") + except Exception as e: + print(f"Error processing wheel: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 7613afce026..5a55b7f1867 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,23 @@ HERE = Path(__file__).resolve().parent -COMPILE_MODE = "Release" +CURRENT_OS = platform.system() + +# What's meant by each build mode is similar to that from CMake, except that +# non-CMake extensions are by default built with debug symbols. And we build +# with Release by default for Windows. +# Released wheels on Linux and macOS are stripped of debug symbols. We use +# scripts/extract_debug_symbols.py to extract the debug symbols from the wheels. +# C/C++ and Cython extensions built with setuptools.Extension, and +# Cython.Distutils.Extension by default inherits CFLAGS from the Python +# interpreter, and it usually has -O3 -g. So they're built with debug symbols +# by default. +# RustExtension src/native has two build profiles, release and debug, and only +# DD_COMPILE_MODE=Debug will build with debug profile, and rest will build with +# release profile, which also has debug symbols by default. +# And when MinSizeRel or Release is used, we strip the debug symbols from the +# wheels, see try_strip_symbols() below. +COMPILE_MODE = "Release" if CURRENT_OS == "Windows" else "RelWithDebInfo" if "DD_COMPILE_DEBUG" in os.environ: warnings.warn( "The DD_COMPILE_DEBUG environment variable is deprecated and will be deleted, " @@ -55,7 +71,7 @@ ) COMPILE_MODE = "Debug" else: - COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", "Release") + COMPILE_MODE = os.environ.get("DD_COMPILE_MODE", COMPILE_MODE) FAST_BUILD = os.getenv("DD_FAST_BUILD", "false").lower() in ("1", "yes", "on", "true") if FAST_BUILD: diff --git a/src/native/Cargo.toml b/src/native/Cargo.toml index 924540d06fd..03e14cc91d3 100644 --- a/src/native/Cargo.toml +++ b/src/native/Cargo.toml @@ -6,7 +6,7 @@ resolver = "2" [profile.release] lto = true -strip = "debuginfo" +debug = "line-tables-only" opt-level = 's' codegen-units = 1