diff --git a/ASAN.md b/ASAN.md new file mode 100644 index 000000000..e38a61e8e --- /dev/null +++ b/ASAN.md @@ -0,0 +1,61 @@ +# Address Sanitizer (ASAN) Guide + +This project supports Address Sanitizer (ASAN) to help detect memory corruption, +use-after-free, and buffer overflows in the C/C++ NetHack engine and its Python +extensions. + +## Enabling ASAN + +ASAN is integrated into the CMake build system and can be enabled by editing +`pyproject.toml`. + +### Current Configuration + +```toml +[tool.scikit-build] +cmake.build-type = "Release" +cmake.args = ["-DHACKDIR=nle/nethackdir", "-DPYTHON_PACKAGE_NAME=nle"] +``` + +To enable ASAN, add the cmake argument `-DENABLE_ASAN=On` and switch +`cmake.build-type` to `Debug`. + +## Running Tests with ASAN + +Because the Python interpreter itself is not built with ASAN, you must preload +the ASAN runtime library when running tests. + +```bash +LD_PRELOAD=$(gcc -print-file-name=libasan.so):$(gcc -print-file-name=libstdc++.so) \ +ASAN_OPTIONS=detect_leaks=0 \ +uv run pytest +``` + +_Note: Preloading `libstdc++.so` may be necessary on some platforms (like +aarch64 Linux) to avoid crashes when C++ exceptions are thrown._ + +### Why `detect_leaks=0`? + +We disable the LeakSanitizer (`detect_leaks=0`) for several reasons: + +1. Python Shutdown: CPython does not free all memory at exit (e.g., global + singletons, interned strings). This is intentional for performance but is + flagged as a "leak" by ASAN. +2. Pytest State: `pytest` keeps tracebacks, local variables, and fixture data in + memory until the end of the session to generate reports. +3. Standard Interpreter: Since we are running a sanitized C extension inside a + non-sanitized Python interpreter, the leak detector cannot accurately track + the ownership boundary between the two. + +Disabling leak detection still allows ASAN to catch critical memory corruption +errors (Buffer Overflows, Use-After-Free, etc.) as they happen. + +## Other Sanitizers + +The build system also supports: + +- Thread Sanitizer (TSAN): Use `-DENABLE_TSAN=ON`. +- Undefined Behavior Sanitizer (UBSAN): Use `-DENABLE_UBSAN=ON`. + +To use these, update `pyproject.toml` accordingly and preload the corresponding +library (e.g., `libtsan.so`). diff --git a/CMakeLists.txt b/CMakeLists.txt index 48a45e4b8..dbcfebead 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,24 +16,51 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) message("Debug build.") # Unclear if this is even necessary. `dsymutil rlmain -o rlmain.dSYM` seems to # have done the trick. - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") - if(0) - # address sanitizer. - set(CMAKE_CXX_FLAGS_DEBUG - "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address") - set(CMAKE_C_FLAGS_DEBUG - "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address") - set(CMAKE_LINKER_FLAGS_DEBUG - "${CMAKE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address" - ) - endif() - if(MSVC) - add_compile_options(/W4) - else() - add_compile_options(-Wall) + option(ENABLE_ASAN "Enable Address Sanitizer" OFF) + option(ENABLE_TSAN "Enable Thread Sanitizer" OFF) + option(ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) + + if(ENABLE_ASAN + OR ENABLE_TSAN + OR ENABLE_UBSAN) + include(CheckCXXCompilerFlag) + include(CheckCCompilerFlag) + add_library(nle_sanitizers INTERFACE) + + if(ENABLE_ASAN) + set(SANITIZER_FLAGS_ASAN -fsanitize=address -fno-omit-frame-pointer) + check_cxx_compiler_flag("${SANITIZER_FLAGS_ASAN}" COMPILER_SUPPORTS_ASAN) + if(COMPILER_SUPPORTS_ASAN) + target_compile_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_ASAN}) + endif() + endif() + + if(ENABLE_TSAN) + set(SANITIZER_FLAGS_TSAN "-fsanitize=thread -fno-omit-frame-pointer") + check_cxx_compiler_flag("${SANITIZER_FLAGS_TSAN}" COMPILER_SUPPORTS_TSAN) + if(COMPILER_SUPPORTS_TSAN) + target_compile_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_TSAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_TSAN}) + endif() + endif() + + if(ENABLE_UBSAN) + set(SANITIZER_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer") + check_cxx_compiler_flag("${SANITIZER_FLAGS_UBSAN}" + COMPILER_SUPPORTS_UBSAN) + if(COMPILER_SUPPORTS_UBSAN) + target_compile_options(nle_sanitizers + INTERFACE ${SANITIZER_FLAGS_UBSAN}) + target_link_options(nle_sanitizers INTERFACE ${SANITIZER_FLAGS_UBSAN}) + endif() + endif() + + link_libraries(nle_sanitizers) endif() + elseif(CMAKE_BUILD_TYPE MATCHES Release) message("Release build.") else() @@ -42,11 +69,6 @@ endif() message(STATUS "Building nle backend version: ${CMAKE_NLE_VERSION}") -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -add_compile_options(-Wno-deprecated-non-prototype) -add_compile_options(-Wno-unused-variable) - set(HACKDIR "$ENV{HOME}/nethackdir.nle" CACHE STRING "Configuration files for nethack") @@ -57,6 +79,29 @@ message(STATUS "HACKDIR set to: ${HACKDIR}") set(VARDIR ${HACKDIR}) set(INSTDIR ${HACKDIR}) +add_library(nle_common_options INTERFACE) +set_target_properties(nle_common_options + PROPERTIES INTERFACE_POSITION_INDEPENDENT_CODE ON) + +target_compile_definitions( + nle_common_options + INTERFACE GCC_WARN + NOCLIPPING + NOMAIL + NOTPARMDECL + HACKDIR="${HACKDIR}" + DEFAULT_WINDOW_SYS="rl" + DLB + NOCWD_ASSUMPTIONS + NLE_USE_TILES + NOCWD_ASSUMPTIONS) + +if(MSVC) + target_compile_options(nle_common_options INTERFACE /W4) +else() + target_compile_options(nle_common_options INTERFACE -Wall) +endif() + # pybind11 via FetchContent include(FetchContent) FetchContent_Declare( @@ -73,16 +118,6 @@ FetchContent_Declare( GIT_HASH "259fc4103bad6bb484d5ff426ace56ac557107a4" EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(deboost_context) -add_compile_definitions( - GCC_WARN - NOCLIPPING - NOMAIL - NOTPARMDECL - HACKDIR="${HACKDIR}" - DEFAULT_WINDOW_SYS="rl" - DLB - NOCWD_ASSUMPTIONS) - set(NLE_SRC ${nle_SOURCE_DIR}/src) set(NLE_INC ${nle_SOURCE_DIR}/include) set(NLE_DAT ${nle_SOURCE_DIR}/dat) @@ -155,19 +190,20 @@ target_include_directories( # Careful with -DMONITOR_HEAP: Ironically, it fails to fclose FILE* heaplog. # target_compile_definitions(nethack PUBLIC "$<$:MONITOR_HEAP>") -target_link_libraries(nethack PUBLIC m fcontext bz2_static tmt) +target_link_libraries(nethack PUBLIC nle_common_options m fcontext bz2_static + tmt) # dlopen wrapper library add_library(nethackdl STATIC "sys/unix/nledl.c") target_include_directories( nethackdl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ${deboost_context_SOURCE_DIR}/include) -target_link_libraries(nethackdl PUBLIC dl) +target_link_libraries(nethackdl PUBLIC nle_common_options dl) # rlmain C++ (test) binary add_executable(rlmain "sys/unix/rlmain.cc") set_target_properties(rlmain PROPERTIES CXX_STANDARD 11) -target_link_libraries(rlmain PUBLIC nethackdl) +target_link_libraries(rlmain PUBLIC nle_common_options nethackdl) target_include_directories(rlmain PUBLIC ${NLE_INC_GEN}) add_dependencies(rlmain util) # For pm.h. @@ -180,7 +216,7 @@ pybind11_add_module( src/drawing.c src/objects.c $) -target_link_libraries(_pynethack PUBLIC nethackdl) +target_link_libraries(_pynethack PUBLIC nle_common_options nethackdl) set_target_properties(_pynethack PROPERTIES CXX_STANDARD 14) target_include_directories(_pynethack PUBLIC ${NLE_INC_GEN} ${NLE_WIN}/share) # add_dependencies(_pynethack util tile) # For pm.h. @@ -193,19 +229,19 @@ target_include_directories( converter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/libtmt ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter ${bzip2_SOURCE_DIR}) -target_link_libraries(converter PUBLIC bz2_static tmt) +target_link_libraries(converter PUBLIC nle_common_options bz2_static tmt) if(CMAKE_BUILD_TYPE MATCHES Debug) target_compile_options(converter PRIVATE -Wall -Wextra -pedantic -Werror) endif() # ttyrec reader executable add_executable(ttyrec_reader EXCLUDE_FROM_ALL "third_party/converter/reader.c") -target_link_libraries(ttyrec_reader PUBLIC converter) +target_link_libraries(ttyrec_reader PUBLIC nle_common_options converter) target_include_directories( ttyrec_reader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter) pybind11_add_module(_pyconverter third_party/converter/pyconverter.cc) -target_link_libraries(_pyconverter PUBLIC converter) +target_link_libraries(_pyconverter PUBLIC nle_common_options converter) set_target_properties(_pyconverter PROPERTIES CXX_STANDARD 14) target_include_directories( _pyconverter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/converter) diff --git a/nle/tests/test_system.py b/nle/tests/test_system.py index 9fe5e60be..9cc1de4f4 100644 --- a/nle/tests/test_system.py +++ b/nle/tests/test_system.py @@ -1,4 +1,7 @@ # Copyright (c) Facebook, Inc. and its affiliates. + +import ctypes +import functools import multiprocessing as mp import queue import random @@ -19,17 +22,32 @@ def new_env_one_step(): return terminated +@functools.cache +def is_asan(): + """Checks if the process is running with ASAN. + + See if the __asan_init symbol is present in the current process. + """ + + current_process = ctypes.CDLL(None) + return hasattr(current_process, "__asan_init") + + @pytest.mark.parametrize( "ctx", [mp.get_context(m) for m in START_METHODS], ids=START_METHODS ) class TestEnvSubprocess: def test_env_in_subprocess(self, ctx): + if ctx.get_start_method() == "spawn" and is_asan(): + pytest.skip("ASAN crashes on spawn on this environment") p = ctx.Process(target=new_env_one_step) p.start() p.join() assert p.exitcode == 0 def test_env_before_and_in_subprocess(self, ctx): + if ctx.get_start_method() == "spawn" and is_asan(): + pytest.skip("ASAN crashes on spawn on this environment") new_env_one_step() p = ctx.Process(target=new_env_one_step) p.start() diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index 4f0194b67..a796ddb5d 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -27,18 +27,27 @@ file(MAKE_DIRECTORY ${NLE_INC_GEN} ${NLE_SRC_GEN}) add_executable(makedefs ${MAKEDEFS_SRC}) target_include_directories(makedefs PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(makedefs PRIVATE nle_common_options) + add_executable(dgn_comp ${DGN_COMP_SRC}) target_include_directories(dgn_comp PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(dgn_comp PRIVATE nle_common_options) + add_executable(lev_comp ${LEV_COMP_SRC}) target_include_directories(lev_comp PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(lev_comp PRIVATE nle_common_options) + add_executable(dlb ${DLB_SRC}) target_include_directories(dlb PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(dlb PRIVATE nle_common_options) + add_executable(recover recover.c) target_include_directories(recover PRIVATE ${NLE_INC} ${NLE_INC_GEN} ${NLE_UTIL_GEN}) +target_link_libraries(recover PRIVATE nle_common_options) add_custom_command( DEPENDS makedefs @@ -90,6 +99,7 @@ add_custom_target(util DEPENDS ${MAKEDEFS_HEADERS} recover) add_executable(tilemap ${NLE_WIN}/share/tilemap.c) target_include_directories(tilemap PUBLIC ${NLE_INC} ${NLE_INC_GEN}) +target_link_libraries(tilemap PRIVATE nle_common_options) add_dependencies(tilemap util) file(GLOB NETHACK_TILE_SRC ${NLE_WIN}/rl/tile2rgb.c ${NLE_WIN}/rl/nletiletxt.c @@ -97,7 +107,7 @@ file(GLOB NETHACK_TILE_SRC ${NLE_WIN}/rl/tile2rgb.c ${NLE_WIN}/rl/nletiletxt.c add_library(tile OBJECT ${NETHACK_TILE_SRC} ${NLE_SRC_GEN}/tile.c) target_include_directories(tile PUBLIC ${NLE_INC} ${NLE_INC_GEN} ${NLE_WIN}/share) -add_dependencies(tile generate_pm_h generate_tile_c) +add_dependencies(tile generate_pm_h generate_tile_c nle_common_options) # NOTE: util is dependent on these two add_dependencies(lev_comp util)