-
Notifications
You must be signed in to change notification settings - Fork 0
Storage culling implementation #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,108 @@ | ||
| #include "fuzz_cull.h" | ||
|
|
||
| #include <fuzz_dss/gpu_array_utils.h> | ||
|
|
||
| #include <iomanip> | ||
|
|
||
| /* | ||
| Culling the queue of fuzzing inputs. | ||
| Culling the inputs/storages that have been accumulated. | ||
|
|
||
| Calldata culling ideas: | ||
| Initial implementation reconstructs all edge coverage found by every input in the queue, and then finds a subset (not necessarily minimal, this is subset cover which is NP hard) of queue entries | ||
| that cover the same edges. | ||
|
|
||
| Storage culling ideas: | ||
| Remove all storages that have above M visits and below R ratio of votes/visits | ||
| TODO think about some sort of probabalistic thing to cull | ||
| TODO make it reach a target absolute # passing instead (histogram vote ratios, then cutoff) | ||
| */ | ||
|
|
||
| namespace fuzz_culling { | ||
| // Thresholds for storage culling | ||
| constexpr int MIN_VISITS = 1024; | ||
| constexpr float MIN_VOTE_RATIO = 1 / 1024; | ||
|
|
||
| // From metadata array, set into allocated mask whether a storage passes (should not be culled) | ||
| __global__ void cull_storages_mark_kernel(const GpuArrayView<StorageCaseMetadata> metadata, GpuArrayView<bool> passes_mask, const GpuArrayView<size_t> storage_sizes, size_t max_storage_size) { | ||
| int tid = blockIdx.x * blockDim.x + threadIdx.x; | ||
| if (tid >= metadata.size) return; | ||
| // Note that if we haven't had MIN_VISITS, then we spare that storage | ||
| auto votes = metadata[tid].fuzz_case_metadata.votes; | ||
| auto visits = metadata[tid].fuzz_case_metadata.visits; | ||
| auto ratio = metadata[tid].fuzz_case_metadata.vote_ratio(); | ||
| auto storage_size = storage_sizes[tid]; | ||
| // TODO come up with good rules here | ||
| if (visits < MIN_VISITS) { | ||
| // Automatically spared | ||
| passes_mask[tid] = storage_size >= (max_storage_size - 1); | ||
| } else { | ||
| // Having already had MIN_VISITS changes, it better have generated good results, or it should be of max size | ||
| passes_mask[tid] = (ratio >= MIN_VOTE_RATIO); | ||
| } | ||
| } | ||
|
|
||
| size_t max_storage_size(const GpuArrayView<size_t>& kv_sizes) { | ||
| auto max_size_ptr = thrust::max_element(thrust::device, kv_sizes.data, kv_sizes.data + kv_sizes.size); | ||
| auto max_size = *max_size_ptr; | ||
| return max_size; | ||
| } | ||
|
|
||
| void cull_storages_mark(FuzzStorageBuffer& storage_corpus, GpuArrayView<bool> passes_mask) { | ||
| assert(storage_corpus.size <= passes_mask.size); | ||
| // We calculate the max storage kv size, and then don't allow any max-size storages to be culled | ||
| auto max_size = max_storage_size(storage_corpus.kv_sizes.view()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. convert max_storage_size() to an inline function or put it into cull_storages_mark() if no other function will use it |
||
|
|
||
| auto [grid_size, block_size] = get_cuda_max_occupancy(storage_corpus.metadatas.size(), (void*)cull_storages_mark_kernel); | ||
| cull_storages_mark_kernel<<<grid_size, block_size>>>(storage_corpus.metadatas.view(), passes_mask, storage_corpus.kv_sizes.view(), max_size); | ||
| } | ||
|
|
||
| // Given a pass mask, removes & compacts the storage corpus | ||
| // TODO think about whether ancestor handling should be done in a GC-like way instead | ||
| void cull_storages_filter(FuzzerState& fuzz_state, GpuArrayView<bool> passes_mask) { | ||
| // We get back the culled storages from the compaction | ||
|
|
||
| auto culled_storages = fuzz_state.storage_corpus.filter(passes_mask); | ||
| GpuArray<bool> ancestor_mask = fuzz_state.storage_corpus.get_ancestor_mask(culled_storages); | ||
| // NB ignoring return value here, we just want the ones that remain after keeping ancestors | ||
| culled_storages.filter(ancestor_mask); | ||
| // TODO make sure batch actually checks hashes) | ||
| fuzz_state.ancestor_corpus.push_batch(culled_storages); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| void fuzz_cull(FuzzerState& fuzz_state) { | ||
| std::cout << "hello world I am culling!" << std::endl; | ||
| #ifdef GPUEVM_DEBUG | ||
| std::cout << "Culling from corpus size " << fuzz_state.corpus.size() << std::endl; | ||
| #endif | ||
| gpuevm_nvtx_push("fuzz_cull"); | ||
|
|
||
| long long num_passes_before = fuzz_state.storage_corpus.size; | ||
|
|
||
| // TODO cache the various tempbuffers here | ||
| GpuArray<bool> passes_mask(fuzz_state.storage_corpus.size); | ||
|
|
||
| // We get back a mask that has true if the storage passed, and false if it should be culled (with no regard for ancestor-keeping) | ||
| fuzz_culling::cull_storages_mark(fuzz_state.storage_corpus, passes_mask.view()); | ||
| safeStreamSync(); | ||
| CUDA_SAFE_CALL(cudaDeviceSynchronize()); | ||
|
|
||
| // Don't allow removing all states, so check that enough are kept before actually culling | ||
| if (device_array_count<bool>(passes_mask, true) >= 32) { | ||
| // We take care of ancestors here | ||
| fuzz_culling::cull_storages_filter(fuzz_state, passes_mask.view()); | ||
| safeStreamSync(); | ||
|
|
||
| CUDA_SAFE_CALL(cudaDeviceSynchronize()); | ||
| // Count and print some stats about the passes_mask | ||
| long long num_passes_after = device_array_count<bool>(passes_mask, true); | ||
| long long failed = num_passes_before - num_passes_after; | ||
| if (failed > 0) { | ||
| std::cout << "[Cull: " << num_passes_after << " pass, " << failed << " failed | "; | ||
| std::cout << "Rate " << std::fixed << std::setprecision(1) << 100 * failed / (float)num_passes_before << "%]"; | ||
| } else { | ||
| std::cout << "P"; | ||
| } | ||
| } else { | ||
| std::cout << "N"; | ||
| } | ||
|
|
||
| gpuevm_nvtx_pop(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,20 @@ namespace fuzz_dedup { | |
| return marked_inputs; | ||
| } | ||
|
|
||
| __global__ void add_votes_to_storages_kernel(const GpuArrayView<bool> interesting_mask, const GpuArrayView<size_t> storage_indices, GpuArrayView<StorageCaseMetadata> metadata) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, use the indices to access the metadata
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or we can use the 2darray to make the code clearer |
||
| int tid = blockIdx.x * blockDim.x + threadIdx.x; | ||
| if (tid >= interesting_mask.size) return; | ||
| if (interesting_mask.data[tid]) | ||
| atomicAdd((unsigned long long*)&(metadata[storage_indices[tid]].fuzz_case_metadata.votes), (unsigned long long)1); | ||
| } | ||
|
|
||
| void add_votes_to_storages(const GpuArrayView<bool> interesting_mask, const GpuArrayView<size_t> storage_indices, GpuArrayView<StorageCaseMetadata> metadata) { | ||
| gpuevm_nvtx_push("add_votes_to_storages"); | ||
| auto [grid_size, block_size] = get_cuda_max_occupancy(interesting_mask.size, (void*)add_votes_to_storages_kernel); | ||
| add_votes_to_storages_kernel<<<grid_size, block_size>>>(interesting_mask, storage_indices, metadata); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we add some asserts here to make sure the kernel function has enough threads? |
||
| gpuevm_nvtx_pop(); | ||
| } | ||
|
|
||
| __global__ void get_revert_mask_kernel(uint8_t* const bytecode, const GpuArrayView<evm_tracer_t> tracers_in, GpuArrayView<bool> reverting_mask) { | ||
| int tid = blockIdx.x * blockDim.x + threadIdx.x; | ||
| if (tid >= tracers_in.size) return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can set the index of storage_indices with an offset like n_tested_storages. For example, updating storage_indices by using storage_indices[n_tested_storages + tid];