Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ CPMAddPackage("gh:simdjson/simdjson#v3.9.2")
find_package(RapidJSON REQUIRED)
find_library(TCLAP tclap)

option(GPUEVM_O2_DEBUG "Enable O2 debug for CUDA" ON)
option(GPUEVM_DEBUG_INTERPRETER "Printing from device during interpreter runtime" OFF)
option(GPUEVM_DEBUG_FUZZER "Print debug info from fuzzer during runtime" OFF)
option(GPUEVM_DEBUG_NVTX "Printing nvtx context pushing/popping during runtime" OFF)
Expand Down Expand Up @@ -103,10 +104,13 @@ function(configure_target_gpuevm target)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-g>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas -v>)

# TODO make this an option
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-dopt=on>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O2>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas -O2>)
# Recall that we have this a suboption of the debug build type bc we couldn't figure out how to capture
# all the lineinfo in a -g Release build
if(GPUEVM_O2_DEBUG)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-dopt=on>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-O2>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas -O2>)
endif()

# And set the GPUEVM_DEBUG symbol
target_compile_definitions(${target} PRIVATE GPUEVM_DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion src/common/fuzz_algos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ set_target_properties(evm_fuzz_algos PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_include_directories(evm_fuzz_algos PUBLIC ${GPU_EVM_SOURCE_DIR}/src/common)

target_link_libraries(evm_fuzz_algos PUBLIC evm_ingest ${CUDA_LIBRARIES} libcuda.so cudart)
target_link_libraries(evm_fuzz_algos PUBLIC evm_ingest ${CUDA_LIBRARIES} libcuda.so cudart termcolor)
63 changes: 30 additions & 33 deletions src/common/fuzz_algos/fuzz_check_invariants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <iomanip>
#include <iostream>
#include <termcolor/termcolor.hpp>

__global__ void set_test_selectors_kernel(GpuArrayView<uint8_t> test_selectors_out, const __restrict__ uint8_t* invariant_selectors, const ExtFuzzFuncs fuzz_funcs, uint32_t n_tested_storages, uint32_t n_tot_runners) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down Expand Up @@ -40,68 +41,64 @@ void set_test_selectors(GpuArrayView<uint8_t> test_selectors, const ExtFuzzFuncs
safeCudaFree(selector_buf);
}

__global__ void set_storage_indices_kernel(GpuArrayView<size_t> storage_indices, uint32_t n_tested_storages, uint32_t n_test_threads, size_t index_offset) {
__global__ void set_storage_indices_kernel(GpuArrayView<size_t> storage_indices, uint32_t n_tested_storages, uint32_t n_test_threads) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
// For tid < n_tested_storages, we're already done with the cyclical assignment
if (tid < n_tested_storages) return;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can set the index of storage_indices with an offset like n_tested_storages. For example, updating storage_indices by using storage_indices[n_tested_storages + tid];

// All evms above n_test_threads just get storage index 0, since it's guaranteed to exist
if (tid >= MAX_N_SEPARATE_CONTEXTS) return;
if (tid >= n_test_threads) {
storage_indices[tid] = 0;
} else {
storage_indices[tid] = (tid % n_tested_storages) + index_offset;
storage_indices[tid] = storage_indices[tid % n_tested_storages];
}
}

void set_storage_indices(GpuArrayView<size_t> storage_indices, uint32_t n_tested_storages, uint32_t n_test_threads, size_t index_offset) {
void cyclic_repeat_storage_indices(GpuArrayView<size_t> storage_indices, uint32_t n_tested_storages, uint32_t n_test_threads) {
auto [grid_size, block_size] = get_cuda_max_occupancy(storage_indices.size, (void*)set_storage_indices_kernel);
set_storage_indices_kernel<<<grid_size, block_size>>>(storage_indices, n_tested_storages, n_test_threads, index_offset);
set_storage_indices_kernel<<<grid_size, block_size>>>(storage_indices, n_tested_storages, n_test_threads);
safeStreamSync();
}

// TODO store this in the storage corpus itself
size_t first_unchecked_idx = 0;
void fuzz_check_invariants(FuzzerState& fuzz_state) {
gpuevm_nvtx_push("fuzz_check_invariants");
// TODO: Complete the implementation and remove limitations

// THE AMPERSAND IS NECESSARY WHEN USING AUTO; OTHERWISE YOU GET A BY-VALUE COPY!
auto& api_ctx = fuzz_active_ctx();

assert(fuzz_state.fuzz_funcs.n_invariant_funcs > 0);
assert(fuzz_state.fuzz_funcs.n_invariant_funcs <= MAX_N_SEPARATE_CONTEXTS);
assert(api_ctx.n_runners == MAX_N_SEPARATE_CONTEXTS);
assert(fuzz_state.fuzz_funcs.n_invariant_funcs > 0 && "No invariants to check");
assert(fuzz_state.fuzz_funcs.n_invariant_funcs <= MAX_N_SEPARATE_CONTEXTS && "Too many invariants to check (more than one per thread)");
assert(api_ctx.n_runners == MAX_N_SEPARATE_CONTEXTS && "Wrong number of interpreter threads, must use one per cuda thread");

// The actual number of storages we can test (since each storage needs to be tested against all invariants)
auto n_tested_storages = MAX_N_SEPARATE_CONTEXTS / fuzz_state.fuzz_funcs.n_invariant_funcs;
auto n_test_threads = n_tested_storages * fuzz_state.fuzz_funcs.n_invariant_funcs;

// TODO remove this requirement
// There must be enough storages actually recorded
assert(fuzz_state.storage_corpus.size >= n_tested_storages);

/* Plan for using/setting the checkedness on storages
1. Get the first storage index that is unchecked, (This should and can just be tracked in single counter, until we switch to having separate fuzzing and invariantchecking storage corpori)
2. Doublecheck that we actually have enough storages to run a full batch
3. Use the index as offset when we make the cyclic storage selection array
4. Mark the check storages from that index as checked
/* Storage selection for invariant checking
1. Check that there are n_tested_storages that are unchecked
2. Get the n_tested_storages first such indices
3. Use that array of indices to cyclically repeat
4. Mark those storages as checked
5. Move the storages into the ctx's gpu_map
*/

// 1. Get the first unchecked index
// TODO use cub::argindexinputiterator in the future?

// 2. Check that we have enough storages
assert(fuzz_state.storage_corpus.size > first_unchecked_idx + n_tested_storages);
// 1. Check that there are n_tested_storages that are unchecked
assert(fuzz_state.storage_corpus.num_unchecked_cases() >= n_tested_storages && "Not enough unchecked storages to check invariants on");

// 3. Use the unchecked_idx as (value)offset in storage selection array
// 2. Get the n_tested_storages first such indices
GpuArray<size_t> storage_indices(MAX_N_SEPARATE_CONTEXTS);
set_storage_indices(storage_indices.view(), n_tested_storages, n_test_threads, first_unchecked_idx);
auto small_storage_indices_view = storage_indices.view();
// We need to let the get_unchecked_indices only put n_tested_storages into the array
small_storage_indices_view.size = n_tested_storages;
fuzz_state.storage_corpus.get_unchecked_indices(small_storage_indices_view, n_tested_storages);
// 3. Use that array of indices to cyclically repeat
cyclic_repeat_storage_indices(storage_indices.view(), n_tested_storages, n_test_threads);

// 4. Set the checkedness of the storages to true
// TODO this should also probably be a member of the storage corpus
for (auto i = 0; i < n_tested_storages; i++) {
fuzz_state.storage_corpus.metadatas[i + first_unchecked_idx].checked_invariants = true;
}
fuzz_state.storage_corpus.mark_checked(small_storage_indices_view);

// Set the storages into the ctx
// 5. Set the storages into the ctx's gpu_map
fuzz_state.storage_corpus.scatter_kvs(api_ctx.storage, storage_indices.view());

// Now we group the selectors into contiguous groups to get maximal warp coherence
Expand Down Expand Up @@ -150,17 +147,17 @@ void fuzz_check_invariants(FuzzerState& fuzz_state) {

CUDA_SAFE_CALL(cudaDeviceSynchronize());

size_t n_broken_invariants = fuzz_state.fuzz_funcs.n_invariant_funcs - std::count(invariant_revert_mask.data, invariant_revert_mask.data + fuzz_state.fuzz_funcs.n_invariant_funcs, 0);

// If any reverts are new (i.e. set in this mask, but not previously set in the reverted_invariants_times), set to current time
fuzz_time_diff revert_time = timing::time_diff(fuzz_state.start_time, timing::current_time());
for (auto invariant_idx = 0; invariant_idx < fuzz_state.fuzz_funcs.n_invariant_funcs; invariant_idx++) {
if (invariant_revert_mask[invariant_idx] && fuzz_state.reverted_invariants_times[invariant_idx] == 0) {
std::cout << "[Broke inv " << invariant_idx << "]";
std::cout << termcolor::red << "[Broke inv " << invariant_idx << "| tot broken: " << n_broken_invariants << "]" << termcolor::reset;
fuzz_state.reverted_invariants_times[invariant_idx] = revert_time;
}
}
// TODO check what the actual memory consistency guarantees are for stream syncs vs device syncs and manual (i.e. not via kernels that are in streams) access to unified memory
CUDA_SAFE_CALL(cudaDeviceSynchronize());

first_unchecked_idx += n_tested_storages;
gpuevm_nvtx_pop();
}
104 changes: 99 additions & 5 deletions src/common/fuzz_algos/fuzz_cull.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,108 @@
#include "fuzz_cull.h"

#include <fuzz_dss/gpu_array_utils.h>

#include <iomanip>

/*
Culling the queue of fuzzing inputs.
Culling the inputs/storages that have been accumulated.

Calldata culling ideas:
Initial implementation reconstructs all edge coverage found by every input in the queue, and then finds a subset (not necessarily minimal, this is subset cover which is NP hard) of queue entries
that cover the same edges.

Storage culling ideas:
Remove all storages that have above M visits and below R ratio of votes/visits
TODO think about some sort of probabalistic thing to cull
TODO make it reach a target absolute # passing instead (histogram vote ratios, then cutoff)
*/

namespace fuzz_culling {
// Thresholds for storage culling
constexpr int MIN_VISITS = 1024;
constexpr float MIN_VOTE_RATIO = 1 / 1024;

// From metadata array, set into allocated mask whether a storage passes (should not be culled)
__global__ void cull_storages_mark_kernel(const GpuArrayView<StorageCaseMetadata> metadata, GpuArrayView<bool> passes_mask, const GpuArrayView<size_t> storage_sizes, size_t max_storage_size) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= metadata.size) return;
// Note that if we haven't had MIN_VISITS, then we spare that storage
auto votes = metadata[tid].fuzz_case_metadata.votes;
auto visits = metadata[tid].fuzz_case_metadata.visits;
auto ratio = metadata[tid].fuzz_case_metadata.vote_ratio();
auto storage_size = storage_sizes[tid];
// TODO come up with good rules here
if (visits < MIN_VISITS) {
// Automatically spared
passes_mask[tid] = storage_size >= (max_storage_size - 1);
} else {
// Having already had MIN_VISITS changes, it better have generated good results, or it should be of max size
passes_mask[tid] = (ratio >= MIN_VOTE_RATIO);
}
}

size_t max_storage_size(const GpuArrayView<size_t>& kv_sizes) {
auto max_size_ptr = thrust::max_element(thrust::device, kv_sizes.data, kv_sizes.data + kv_sizes.size);
auto max_size = *max_size_ptr;
return max_size;
}

void cull_storages_mark(FuzzStorageBuffer& storage_corpus, GpuArrayView<bool> passes_mask) {
assert(storage_corpus.size <= passes_mask.size);
// We calculate the max storage kv size, and then don't allow any max-size storages to be culled
auto max_size = max_storage_size(storage_corpus.kv_sizes.view());
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

convert max_storage_size() to an inline function or put it into cull_storages_mark() if no other function will use it


auto [grid_size, block_size] = get_cuda_max_occupancy(storage_corpus.metadatas.size(), (void*)cull_storages_mark_kernel);
cull_storages_mark_kernel<<<grid_size, block_size>>>(storage_corpus.metadatas.view(), passes_mask, storage_corpus.kv_sizes.view(), max_size);
}

// Given a pass mask, removes & compacts the storage corpus
// TODO think about whether ancestor handling should be done in a GC-like way instead
void cull_storages_filter(FuzzerState& fuzz_state, GpuArrayView<bool> passes_mask) {
// We get back the culled storages from the compaction

auto culled_storages = fuzz_state.storage_corpus.filter(passes_mask);
GpuArray<bool> ancestor_mask = fuzz_state.storage_corpus.get_ancestor_mask(culled_storages);
// NB ignoring return value here, we just want the ones that remain after keeping ancestors
culled_storages.filter(ancestor_mask);
// TODO make sure batch actually checks hashes)
fuzz_state.ancestor_corpus.push_batch(culled_storages);
}

}

void fuzz_cull(FuzzerState& fuzz_state) {
std::cout << "hello world I am culling!" << std::endl;
#ifdef GPUEVM_DEBUG
std::cout << "Culling from corpus size " << fuzz_state.corpus.size() << std::endl;
#endif
gpuevm_nvtx_push("fuzz_cull");

long long num_passes_before = fuzz_state.storage_corpus.size;

// TODO cache the various tempbuffers here
GpuArray<bool> passes_mask(fuzz_state.storage_corpus.size);

// We get back a mask that has true if the storage passed, and false if it should be culled (with no regard for ancestor-keeping)
fuzz_culling::cull_storages_mark(fuzz_state.storage_corpus, passes_mask.view());
safeStreamSync();
CUDA_SAFE_CALL(cudaDeviceSynchronize());

// Don't allow removing all states, so check that enough are kept before actually culling
if (device_array_count<bool>(passes_mask, true) >= 32) {
// We take care of ancestors here
fuzz_culling::cull_storages_filter(fuzz_state, passes_mask.view());
safeStreamSync();

CUDA_SAFE_CALL(cudaDeviceSynchronize());
// Count and print some stats about the passes_mask
long long num_passes_after = device_array_count<bool>(passes_mask, true);
long long failed = num_passes_before - num_passes_after;
if (failed > 0) {
std::cout << "[Cull: " << num_passes_after << " pass, " << failed << " failed | ";
std::cout << "Rate " << std::fixed << std::setprecision(1) << 100 * failed / (float)num_passes_before << "%]";
} else {
std::cout << "P";
}
} else {
std::cout << "N";
}

gpuevm_nvtx_pop();
}
14 changes: 14 additions & 0 deletions src/common/fuzz_algos/fuzz_dedup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,20 @@ namespace fuzz_dedup {
return marked_inputs;
}

__global__ void add_votes_to_storages_kernel(const GpuArrayView<bool> interesting_mask, const GpuArrayView<size_t> storage_indices, GpuArrayView<StorageCaseMetadata> metadata) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, use the indices to access the metadata

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or we can use the 2darray to make the code clearer

int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= interesting_mask.size) return;
if (interesting_mask.data[tid])
atomicAdd((unsigned long long*)&(metadata[storage_indices[tid]].fuzz_case_metadata.votes), (unsigned long long)1);
}

void add_votes_to_storages(const GpuArrayView<bool> interesting_mask, const GpuArrayView<size_t> storage_indices, GpuArrayView<StorageCaseMetadata> metadata) {
gpuevm_nvtx_push("add_votes_to_storages");
auto [grid_size, block_size] = get_cuda_max_occupancy(interesting_mask.size, (void*)add_votes_to_storages_kernel);
add_votes_to_storages_kernel<<<grid_size, block_size>>>(interesting_mask, storage_indices, metadata);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add some asserts here to make sure the kernel function has enough threads?

gpuevm_nvtx_pop();
}

__global__ void get_revert_mask_kernel(uint8_t* const bytecode, const GpuArrayView<evm_tracer_t> tracers_in, GpuArrayView<bool> reverting_mask) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= tracers_in.size) return;
Expand Down
3 changes: 2 additions & 1 deletion src/common/fuzz_algos/fuzz_dedup.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace fuzz_dedup {
FuzzCaseInputs filter_inputs_by_mask(const FuzzCaseInputs& run_inputs, const GpuArrayView<bool>& mask);

std::pair<std::optional<GpuArrayView<bool>>, std::optional<FuzzCaseInputs>> get_reverting_data(uint8_t* const bytecode, evm_tracer_t* tracer_buf, const FuzzCaseInputs& run_inputs);
void add_votes_to_storages(const GpuArrayView<bool> interesting_mask, const GpuArrayView<size_t> storage_indices, GpuArrayView<StorageCaseMetadata> metadata);
void get_revert_mask(uint8_t* const bytecode, const GpuArrayView<evm_tracer_t> tracers_in, GpuArrayView<bool> reverting_mask);
void compute_revert_discard_mask(const GpuArrayView<bool> reverting_mask, const GpuArrayView<evm_tracer_t> tracers_in, GpuArrayView<bool> revert_discard_mask);

Expand All @@ -22,4 +23,4 @@ namespace fuzz_dedup {
void inplace_bool_or_gpuarrays(const GpuArrayView<bool> a_inout, const GpuArrayView<bool> b_in);
void inplace_bool_not_gpuarrays(const GpuArrayView<bool> a_inout);
void inplace_bool_and_gpuarrays(const GpuArrayView<bool> a_inout, const GpuArrayView<bool> b_in);
}
}
12 changes: 10 additions & 2 deletions src/common/fuzz_algos/fuzz_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ FuzzerState::FuzzerState(const ExtGpuEvmFuzzSetup& setup, const FuzzStorageCase&
corpus(EvmFlatAbiTypes(fuzz_funcs.stateful_funcs[0].evm_flat_abi).calldata_size()),
storage_corpus(initial_storage),
storage_hashes_seen(max_corpus_size_pow_2 + 5),
ancestor_corpus(),
storage_dep_map(max_corpus_size_pow_2 + 5),
reverting_exit_datas_map(max_corpus_size_pow_2 + 5),
reverting_corpus(EvmFlatAbiTypes(fuzz_funcs.stateful_funcs[0].evm_flat_abi).calldata_size()),
Expand Down Expand Up @@ -120,10 +121,12 @@ std::tuple<FuzzCaseInputs, std::optional<FuzzCaseInputs>, std::optional<FuzzStor
gpuevm_nvtx_push("run_get_interesting");

// Set the storage cases to use
storage_corpus.scatter_kvs(fuzz_active_ctx().storage, storage_indices);
// Also mark these storage scatters as being real visits in the metadata
storage_corpus.scatter_kvs(fuzz_active_ctx().storage, storage_indices, true);

// Setup gpuevm with those calldatas
fuzz_set_calldatas(inputs);

// Put the current distmap into interpreter memory
// This can do resizing under the hood
dist_map.sync_to_ctx(fuzz_active_ctx());
Expand Down Expand Up @@ -182,7 +185,7 @@ std::tuple<FuzzCaseInputs, std::optional<FuzzCaseInputs>, std::optional<FuzzStor
auto n_interesting_storages = device_array_count(GpuArrayView(scratch_storage_hashes_interesting_mask), true);
gpuevm_nvtx_pop();

// Immediately collect the interesting storages
// Immediately collect the interesting storages (NB here interesting storage means that is has new storagehash, and is not discarded)
std::optional<FuzzStorageCaseBatch> interesting_storages;
if (n_interesting_storages > 0) {
interesting_storages = FuzzStorageCaseBatch(run_ctx, scratch_storage_hashes_interesting_mask);
Expand Down Expand Up @@ -261,6 +264,11 @@ std::tuple<FuzzCaseInputs, std::optional<FuzzCaseInputs>, std::optional<FuzzStor
return {fuzz_dedup::concat_fuzz_case_inputs(interesting_inputs, preserve_add_back_inputs), opt_reverting_inputs, interesting_storages};
}
}

// For all the interesting inputs, we want to add votes to the storages they started with
// So we use the general interestingness mask and add votes to those storages
fuzz_dedup::add_votes_to_storages(interesting_mask, input_storage_indices, storage_corpus.metadatas.view());

gpuevm_nvtx_pop();
return {interesting_inputs, opt_reverting_inputs, interesting_storages};
}
Expand Down
Loading