From 2d49850369e92196e67e0c662446fde55fc98e4e Mon Sep 17 00:00:00 2001 From: "Jonathan B. Coe" Date: Sun, 8 Mar 2026 22:42:20 +0000 Subject: [PATCH 1/2] Modify CMakeLists.txt to allow sanitizers to be enabled --- ASAN.md | 50 +++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 51 +++++++++++++++++++++++++++++++++------- nle/tests/test_system.py | 18 ++++++++++++++ 3 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 ASAN.md diff --git a/ASAN.md b/ASAN.md new file mode 100644 index 000000000..f8f3aa7bc --- /dev/null +++ b/ASAN.md @@ -0,0 +1,50 @@ +# Address Sanitizer (ASAN) Guide + +This project supports Address Sanitizer (ASAN) to help detect memory corruption, use-after-free, and buffer overflows in the C/C++ NetHack engine and its Python extensions. + +## Enabling ASAN + +ASAN is integrated into the CMake build system and can be enabled via `pyproject.toml`. + +### Current Configuration + +```toml +[tool.scikit-build] +cmake.build-type = "Release" +cmake.args = ["-DHACKDIR=nle/nethackdir", "-DPYTHON_PACKAGE_NAME=nle"] +``` + +To enable ASAN, add the cmake argument `-DENABLE_ASAN=On` and switch `cmake.build-type` to `Debug`. + +## Running Tests with ASAN + +Because the Python interpreter itself is not built with ASAN, you must preload the ASAN runtime library when running tests. + +### Execution Command + +Run the following command to execute tests with ASAN enabled: + +```bash +LD_PRELOAD=$(gcc -print-file-name=libasan.so):$(gcc -print-file-name=libstdc++.so) ASAN_OPTIONS=detect_leaks=0 uv run pytest +``` + +*Note: Preloading `libstdc++.so` may be necessary on some platforms (like aarch64 Linux) to avoid crashes when C++ exceptions are thrown.* + +### Why `detect_leaks=0`? + +We disable the LeakSanitizer (`detect_leaks=0`) for several reasons: + +1. Python Shutdown: CPython does not free all memory at exit (e.g., global singletons, interned strings). This is intentional for performance but is flagged as a "leak" by ASAN. +2. Pytest State: `pytest` keeps tracebacks, local variables, and fixture data in memory until the end of the session to generate reports. +3. Standard Interpreter: Since we are running a sanitized C extension inside a non-sanitized Python interpreter, the leak detector cannot accurately track the ownership boundary between the two. + +Disabling leak detection still allows ASAN to catch critical memory corruption errors (Buffer Overflows, Use-After-Free, etc.) as they happen. + +## Other Sanitizers + +The build system also supports: + +- Thread Sanitizer (TSAN): Use `-DENABLE_TSAN=ON`. +- Undefined Behavior Sanitizer (UBSAN): Use `-DENABLE_UBSAN=ON`. + +To use these, update `pyproject.toml` accordingly and preload the corresponding library (e.g., `libtsan.so`). diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a3073e0..feb90ce16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,15 +19,48 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") - if(0) - # address sanitizer. - set(CMAKE_CXX_FLAGS_DEBUG - "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address") - set(CMAKE_C_FLAGS_DEBUG - "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address") - set(CMAKE_LINKER_FLAGS_DEBUG - "${CMAKE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address" - ) + option(ENABLE_ASAN "Enable Address Sanitizer" OFF) + option(ENABLE_TSAN "Enable Thread Sanitizer" OFF) + option(ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) + + if(ENABLE_ASAN + OR ENABLE_TSAN + OR ENABLE_UBSAN) + include(CheckCXXCompilerFlag) + include(CheckCCompilerFlag) + add_library(nle_sanitizers INTERFACE) + + if(ENABLE_ASAN) + set(SANITIZER_FLAGS_ASAN -fsanitize=address -fno-omit-frame-pointer) + # Force support since we verified it manually in the environment + set(COMPILER_SUPPORTS_ASAN ON) + if(COMPILER_SUPPORTS_ASAN) + target_compile_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) + endif() + endif() + + if(ENABLE_TSAN) + set(SANITIZER_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer") + check_cxx_compiler_flag("${SANITIZER_FLAGS_TSAN}" COMPILER_SUPPORTS_TSAN) + if(COMPILER_SUPPORTS_TSAN) + target_compile_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_TSAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_TSAN}) + endif() + endif() + + if(ENABLE_UBSAN) + set(SANITIZER_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer") + check_cxx_compiler_flag("${SANITIZER_FLAGS_UBSAN}" + COMPILER_SUPPORTS_UBSAN) + if(COMPILER_SUPPORTS_UBSAN) + target_compile_options(nle_sanitizers + INTERFACE ${SANITIZER_FLAGS_UBSAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_UBSAN}) + endif() + endif() + + link_libraries(nle_sanitizers) endif() if(MSVC) add_compile_options(/W4) diff --git a/nle/tests/test_system.py b/nle/tests/test_system.py index 9fe5e60be..9cc1de4f4 100644 --- a/nle/tests/test_system.py +++ b/nle/tests/test_system.py @@ -1,4 +1,7 @@ # Copyright (c) Facebook, Inc. and its affiliates. + +import ctypes +import functools import multiprocessing as mp import queue import random @@ -19,17 +22,32 @@ def new_env_one_step(): return terminated +@functools.cache +def is_asan(): + """Checks if the process is running with ASAN. + + See if the __asan_init symbol is present in the current process. + """ + + current_process = ctypes.CDLL(None) + return hasattr(current_process, "__asan_init") + + @pytest.mark.parametrize( "ctx", [mp.get_context(m) for m in START_METHODS], ids=START_METHODS ) class TestEnvSubprocess: def test_env_in_subprocess(self, ctx): + if ctx.get_start_method() == "spawn" and is_asan(): + pytest.skip("ASAN crashes on spawn on this environment") p = ctx.Process(target=new_env_one_step) p.start() p.join() assert p.exitcode == 0 def test_env_before_and_in_subprocess(self, ctx): + if ctx.get_start_method() == "spawn" and is_asan(): + pytest.skip("ASAN crashes on spawn on this environment") new_env_one_step() p = ctx.Process(target=new_env_one_step) p.start() From 2d7a9c4487792a37fb03366bbc8e96e6d619dda8 Mon Sep 17 00:00:00 2001 From: "Jonathan B. Coe" Date: Sat, 14 Mar 2026 10:27:33 +0000 Subject: [PATCH 2/2] Add Address Sanitizer support and update CMake configuration --- ASAN.md | 41 +++++++++++++++++++------------ CMakeLists.txt | 60 ++++++++++++++++++++++++--------------------- util/CMakeLists.txt | 11 +++++++++ 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/ASAN.md b/ASAN.md index f8f3aa7bc..e38a61e8e 100644 --- a/ASAN.md +++ b/ASAN.md @@ -1,10 +1,13 @@ # Address Sanitizer (ASAN) Guide -This project supports Address Sanitizer (ASAN) to help detect memory corruption, use-after-free, and buffer overflows in the C/C++ NetHack engine and its Python extensions. +This project supports Address Sanitizer (ASAN) to help detect memory corruption, +use-after-free, and buffer overflows in the C/C++ NetHack engine and its Python +extensions. ## Enabling ASAN -ASAN is integrated into the CMake build system and can be enabled via `pyproject.toml`. +ASAN is integrated into the CMake build system and can be enabled by editing +`pyproject.toml`. ### Current Configuration @@ -14,31 +17,38 @@ cmake.build-type = "Release" cmake.args = ["-DHACKDIR=nle/nethackdir", "-DPYTHON_PACKAGE_NAME=nle"] ``` -To enable ASAN, add the cmake argument `-DENABLE_ASAN=On` and switch `cmake.build-type` to `Debug`. +To enable ASAN, add the cmake argument `-DENABLE_ASAN=On` and switch +`cmake.build-type` to `Debug`. ## Running Tests with ASAN -Because the Python interpreter itself is not built with ASAN, you must preload the ASAN runtime library when running tests. - -### Execution Command - -Run the following command to execute tests with ASAN enabled: +Because the Python interpreter itself is not built with ASAN, you must preload +the ASAN runtime library when running tests. ```bash -LD_PRELOAD=$(gcc -print-file-name=libasan.so):$(gcc -print-file-name=libstdc++.so) ASAN_OPTIONS=detect_leaks=0 uv run pytest +LD_PRELOAD=$(gcc -print-file-name=libasan.so):$(gcc -print-file-name=libstdc++.so) \ +ASAN_OPTIONS=detect_leaks=0 \ +uv run pytest ``` -*Note: Preloading `libstdc++.so` may be necessary on some platforms (like aarch64 Linux) to avoid crashes when C++ exceptions are thrown.* +_Note: Preloading `libstdc++.so` may be necessary on some platforms (like +aarch64 Linux) to avoid crashes when C++ exceptions are thrown._ ### Why `detect_leaks=0`? We disable the LeakSanitizer (`detect_leaks=0`) for several reasons: -1. Python Shutdown: CPython does not free all memory at exit (e.g., global singletons, interned strings). This is intentional for performance but is flagged as a "leak" by ASAN. -2. Pytest State: `pytest` keeps tracebacks, local variables, and fixture data in memory until the end of the session to generate reports. -3. Standard Interpreter: Since we are running a sanitized C extension inside a non-sanitized Python interpreter, the leak detector cannot accurately track the ownership boundary between the two. +1. Python Shutdown: CPython does not free all memory at exit (e.g., global + singletons, interned strings). This is intentional for performance but is + flagged as a "leak" by ASAN. +2. Pytest State: `pytest` keeps tracebacks, local variables, and fixture data in + memory until the end of the session to generate reports. +3. Standard Interpreter: Since we are running a sanitized C extension inside a + non-sanitized Python interpreter, the leak detector cannot accurately track + the ownership boundary between the two. -Disabling leak detection still allows ASAN to catch critical memory corruption errors (Buffer Overflows, Use-After-Free, etc.) as they happen. +Disabling leak detection still allows ASAN to catch critical memory corruption +errors (Buffer Overflows, Use-After-Free, etc.) as they happen. ## Other Sanitizers @@ -47,4 +57,5 @@ The build system also supports: - Thread Sanitizer (TSAN): Use `-DENABLE_TSAN=ON`. - Undefined Behavior Sanitizer (UBSAN): Use `-DENABLE_UBSAN=ON`. -To use these, update `pyproject.toml` accordingly and preload the corresponding library (e.g., `libtsan.so`). +To use these, update `pyproject.toml` accordingly and preload the corresponding +library (e.g., `libtsan.so`). diff --git a/CMakeLists.txt b/CMakeLists.txt index feb90ce16..c377a4801 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,6 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) message("Debug build.") # Unclear if this is even necessary. `dsymutil rlmain -o rlmain.dSYM` seems to # have done the trick. - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") option(ENABLE_ASAN "Enable Address Sanitizer" OFF) @@ -32,8 +31,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) if(ENABLE_ASAN) set(SANITIZER_FLAGS_ASAN -fsanitize=address -fno-omit-frame-pointer) - # Force support since we verified it manually in the environment - set(COMPILER_SUPPORTS_ASAN ON) + check_cxx_compiler_flag("${SANITIZER_FLAGS_ASAN}" COMPILER_SUPPORTS_ASAN) if(COMPILER_SUPPORTS_ASAN) target_compile_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) @@ -62,11 +60,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) link_libraries(nle_sanitizers) endif() - if(MSVC) - add_compile_options(/W4) - else() - add_compile_options(-Wall) - endif() + elseif(CMAKE_BUILD_TYPE MATCHES Release) message("Release build.") else() @@ -75,8 +69,6 @@ endif() message(STATUS "Building nle backend version: ${CMAKE_NLE_VERSION}") -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(HACKDIR "$ENV{HOME}/nethackdir.nle" CACHE STRING "Configuration files for nethack") @@ -87,6 +79,28 @@ message(STATUS "HACKDIR set to: ${HACKDIR}") set(VARDIR ${HACKDIR}) set(INSTDIR ${HACKDIR}) +add_library(nle_common_options INTERFACE) +set_target_properties(nle_common_options + PROPERTIES INTERFACE_POSITION_INDEPENDENT_CODE ON) + +target_compile_definitions( + nle_common_options + INTERFACE GCC_WARN + NOCLIPPING + NOMAIL + NOTPARMDECL + HACKDIR="${HACKDIR}" + DEFAULT_WINDOW_SYS="rl" + DLB + NOCWD_ASSUMPTIONS + NLE_USE_TILES) + +if(MSVC) + target_compile_options(nle_common_options INTERFACE /W4) +else() + target_compile_options(nle_common_options INTERFACE -Wall) +endif() + # pybind11 via FetchContent include(FetchContent) FetchContent_Declare( @@ -103,17 +117,6 @@ FetchContent_Declare( GIT_HASH "259fc4103bad6bb484d5ff426ace56ac557107a4" EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(deboost_context) -add_compile_definitions( - GCC_WARN - NOCLIPPING - NOMAIL - NOTPARMDECL - HACKDIR="${HACKDIR}" - DEFAULT_WINDOW_SYS="rl" - DLB - NOCWD_ASSUMPTIONS - NLE_USE_TILES) - set(NLE_SRC ${nle_SOURCE_DIR}/src) set(NLE_INC ${nle_SOURCE_DIR}/include) set(NLE_DAT ${nle_SOURCE_DIR}/dat) @@ -182,19 +185,20 @@ target_include_directories( # Careful with -DMONITOR_HEAP: Ironically, it fails to fclose FILE* heaplog. # target_compile_definitions(nethack PUBLIC "$<$:MONITOR_HEAP>") -target_link_libraries(nethack PUBLIC m fcontext bz2_static tmt) +target_link_libraries(nethack PUBLIC nle_common_options m fcontext bz2_static + tmt) # dlopen wrapper library add_library(nethackdl STATIC "sys/unix/nledl.c") target_include_directories( nethackdl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ${deboost_context_SOURCE_DIR}/include) -target_link_libraries(nethackdl PUBLIC dl) +target_link_libraries(nethackdl PUBLIC nle_common_options dl) # rlmain C++ (test) binary add_executable(rlmain "sys/unix/rlmain.cc") set_target_properties(rlmain PROPERTIES CXX_STANDARD 11) -target_link_libraries(rlmain PUBLIC nethackdl) +target_link_libraries(rlmain PUBLIC nle_common_options nethackdl) target_include_directories(rlmain PUBLIC ${NLE_INC_GEN}) add_dependencies(rlmain util) # For pm.h. @@ -207,7 +211,7 @@ pybind11_add_module( src/drawing.c src/objects.c $) -target_link_libraries(_pynethack PUBLIC nethackdl) +target_link_libraries(_pynethack PUBLIC nle_common_options nethackdl) set_target_properties(_pynethack PROPERTIES CXX_STANDARD 14) target_include_directories(_pynethack PUBLIC ${NLE_INC_GEN}) add_dependencies(_pynethack util) # For pm.h. @@ -220,19 +224,19 @@ target_include_directories( converter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/libtmt ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter ${bzip2_SOURCE_DIR}) -target_link_libraries(converter PUBLIC bz2_static tmt) +target_link_libraries(converter PUBLIC nle_common_options bz2_static tmt) if(CMAKE_BUILD_TYPE MATCHES Debug) target_compile_options(converter PRIVATE -Wall -Wextra -pedantic -Werror) endif() # ttyrec reader executable add_executable(ttyrec_reader EXCLUDE_FROM_ALL "third_party/converter/reader.c") -target_link_libraries(ttyrec_reader PUBLIC converter) +target_link_libraries(ttyrec_reader PUBLIC nle_common_options converter) target_include_directories( ttyrec_reader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter) pybind11_add_module(_pyconverter third_party/converter/pyconverter.cc) -target_link_libraries(_pyconverter PUBLIC converter) +target_link_libraries(_pyconverter PUBLIC nle_common_options converter) set_target_properties(_pyconverter PROPERTIES CXX_STANDARD 14) target_include_directories( _pyconverter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter) diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index 8f0aa828e..80f6d3376 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -27,18 +27,27 @@ file(MAKE_DIRECTORY ${NLE_INC_GEN} ${NLE_SRC_GEN}) add_executable(makedefs ${MAKEDEFS_SRC}) target_include_directories(makedefs PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(makedefs PRIVATE nle_common_options) + add_executable(dgn_comp ${DGN_COMP_SRC}) target_include_directories(dgn_comp PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(dgn_comp PRIVATE nle_common_options) + add_executable(lev_comp ${LEV_COMP_SRC}) target_include_directories(lev_comp PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(lev_comp PRIVATE nle_common_options) + add_executable(dlb ${DLB_SRC}) target_include_directories(dlb PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(dlb PRIVATE nle_common_options) + add_executable(recover recover.c) target_include_directories(recover PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(recover PRIVATE nle_common_options) add_custom_command( DEPENDS makedefs @@ -88,10 +97,12 @@ add_custom_target(util DEPENDS ${MAKEDEFS_HEADERS} recover) add_executable(tilemap ${NLE_WIN}/share/tilemap.c) target_include_directories(tilemap PUBLIC ${NLE_INC} ${NLE_INC_GEN}) +target_link_libraries(tilemap PRIVATE nle_common_options) add_dependencies(tilemap util) add_library(tile OBJECT ${NLE_SRC_GEN}/tile.c) target_include_directories(tile PUBLIC ${NLE_INC} ${NLE_INC_GEN}) +target_link_libraries(tile PRIVATE nle_common_options) # NOTE: util is dependent on these two add_dependencies(lev_comp util)