diff --git a/netkat/BUILD.bazel b/netkat/BUILD.bazel index 9121fef..9c15abc 100644 --- a/netkat/BUILD.bazel +++ b/netkat/BUILD.bazel @@ -397,6 +397,16 @@ cc_test( ], ) +cc_binary( + name = "paged_stable_vector_benchmark", + testonly = True, + srcs = ["paged_stable_vector_benchmark.cc"], + deps = [ + ":paged_stable_vector", + "@com_google_benchmark//:benchmark_main", + ], +) + cc_test( name = "packet_transformer_test", srcs = ["packet_transformer_test.cc"], diff --git a/netkat/packet_set.h b/netkat/packet_set.h index f11088f..8b24d51 100644 --- a/netkat/packet_set.h +++ b/netkat/packet_set.h @@ -343,10 +343,15 @@ class PacketSetManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); + // The page size of the `nodes_` vector: 512 nodes, or 12 KiB. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB): this way, short-lived managers + // recycle pages through the allocator's freelists instead of paying an + // mmap/munmap syscall pair per manager. A power of two so that indexing + // into the vector -- which is on the hot path of nearly every operation -- + // compiles to shifts and masks rather than multiply sequences. + static constexpr size_t kPageSize = size_t{1} << 9; // The decision nodes forming the BDD-style DAG representation of packet sets. // `PacketSetHandle::node_index_` indexes into this vector. diff --git a/netkat/packet_set_benchmark.cc b/netkat/packet_set_benchmark.cc index 7de2fd0..7685713 100644 --- a/netkat/packet_set_benchmark.cc +++ b/netkat/packet_set_benchmark.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include #include "absl/strings/str_cat.h" #include "benchmark/benchmark.h" @@ -120,4 +122,82 @@ void BM_ReCompileOverlappingPredicate(benchmark::State& state) { } BENCHMARK(BM_ReCompileOverlappingPredicate); +// -- Large-scale benchmarks --------------------------------------------------- +// +// The benchmarks above build BDDs of only tens of nodes, so they cannot detect +// effects that only manifest at scale (node arena performance, unique-table +// pressure, algorithmic complexity of the set operations). The benchmarks +// below operate on sets of pseudo-random members of a 16^5 ~= 1M element +// space, encoded over 5 hex-digit fields. Random sets have incompressible +// BDDs, so node counts scale with set size, mimicking large real-world NetKAT +// models. + +constexpr int kNumDigits = 5; + +// The `i`-th pseudo-random member of the space, under the given `seed`. +// Distinct `i` mostly yield distinct members; collisions just shrink the set. +uint32_t Member(uint32_t i, uint32_t seed) { + uint64_t state = (i + seed) * 6364136223846793005ULL + 1442695040888963407ULL; + return static_cast(state >> 33) & ((1u << (4 * kNumDigits)) - 1); +} + +// Matches exactly the packets whose digit fields encode `member`. +PredicateProto MemberPredicate(uint32_t member) { + PredicateProto pred = MatchProto("f0", member & 15); + for (int d = 1; d < kNumDigits; ++d) { + pred = AndProto(std::move(pred), + MatchProto(absl::StrCat("f", d), (member >> (4 * d)) & 15)); + } + return pred; +} + +// A balanced Or-tree over members [lo, hi) -- balanced to keep proto/compile +// recursion depth logarithmic. +PredicateProto RandomSetPredicate(uint32_t lo, uint32_t hi, uint32_t seed) { + if (hi - lo == 1) return MemberPredicate(Member(lo, seed)); + uint32_t mid = lo + (hi - lo) / 2; + return OrProto(RandomSetPredicate(lo, mid, seed), + RandomSetPredicate(mid, hi, seed)); +} + +// Benchmarks first-time compilation of a large random set, dominated by node +// creation: unique-table hashing and arena appends. +void BM_CompileLargeRandomSet(benchmark::State& state) { + PredicateProto pred = RandomSetPredicate(0, state.range(0), /*seed=*/1); + for (auto s : state) { + PacketSetManager manager; + PacketSetHandle set = manager.Compile(pred); + benchmark::DoNotOptimize(set); + } +} +BENCHMARK(BM_CompileLargeRandomSet)->Arg(1 << 12)->Arg(1 << 15); + +// Benchmarks `Not` of a large random set: a full traversal that copies every +// node of the operand (no complement edges yet, see b/382380335). +void BM_NotOfLargeRandomSet(benchmark::State& state) { + PacketSetManager manager; + PacketSetHandle set = + manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/1)); + for (auto s : state) { + PacketSetHandle result = manager.Not(set); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK(BM_NotOfLargeRandomSet)->Arg(1 << 12)->Arg(1 << 15); + +// Benchmarks `Xor` of two large random sets: a compound operation (two `And`s, +// several `Not`s) that traverses both operands and creates many nodes. +void BM_XorOfLargeRandomSets(benchmark::State& state) { + PacketSetManager manager; + PacketSetHandle lhs = + manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/1)); + PacketSetHandle rhs = + manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/2)); + for (auto s : state) { + PacketSetHandle result = manager.Xor(lhs, rhs); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK(BM_XorOfLargeRandomSets)->Arg(1 << 12)->Arg(1 << 15); + } // namespace netkat diff --git a/netkat/packet_transformer.h b/netkat/packet_transformer.h index 4c9c90d..ae9ab40 100644 --- a/netkat/packet_transformer.h +++ b/netkat/packet_transformer.h @@ -399,10 +399,15 @@ class PacketTransformerManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); + // The page size of the `nodes_` vector: 256 nodes, or 16 KiB. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB): this way, short-lived managers + // recycle pages through the allocator's freelists instead of paying an + // mmap/munmap syscall pair per manager. A power of two so that indexing + // into the vector -- which is on the hot path of nearly every operation -- + // compiles to shifts and masks rather than multiply sequences. + static constexpr size_t kPageSize = size_t{1} << 8; // Helper functions to deal with DecisionNodes directly. // TODO(dilo): Is there a convenient way to either avoid these or avoid making diff --git a/netkat/paged_stable_vector.h b/netkat/paged_stable_vector.h index 04ac895..13b355c 100644 --- a/netkat/paged_stable_vector.h +++ b/netkat/paged_stable_vector.h @@ -19,6 +19,7 @@ #ifndef GOOGLE_NETKAT_NETKAT_PAGED_STABLE_VECTOR_H_ #define GOOGLE_NETKAT_NETKAT_PAGED_STABLE_VECTOR_H_ +#include #include #include #include @@ -39,6 +40,12 @@ namespace netkat { template class PagedStableVector { public: + // Index arithmetic (`operator[]`, `size()`) is on our clients' hot paths. + // Requiring a power-of-two `PageSize` guarantees it compiles to shifts and + // masks rather than multiply sequences. + static_assert(std::has_single_bit(PageSize), + "PageSize must be a power of two"); + PagedStableVector() = default; size_t size() const { @@ -48,13 +55,13 @@ class PagedStableVector { template void push_back(Value&& value) { - if (size() % PageSize == 0) data_.emplace_back().reserve(PageSize); + ReserveSpaceForNextElement(); data_.back().push_back(std::forward(value)); } template void emplace_back(Args&&... value) { - if (size() % PageSize == 0) data_.emplace_back().reserve(PageSize); + ReserveSpaceForNextElement(); data_.back().emplace_back(std::forward(value)...); } @@ -66,6 +73,15 @@ class PagedStableVector { } private: + void ReserveSpaceForNextElement() { + if (data_.empty() || data_.back().size() == PageSize) { + // Reserving each page upfront is what guarantees pointer stability: a + // page never grows beyond its initial capacity, so its elements are + // never relocated. + data_.emplace_back().reserve(PageSize); + } + } + std::vector> data_; }; diff --git a/netkat/paged_stable_vector_benchmark.cc b/netkat/paged_stable_vector_benchmark.cc new file mode 100644 index 0000000..41c6386 --- /dev/null +++ b/netkat/paged_stable_vector_benchmark.cc @@ -0,0 +1,122 @@ +// Copyright 2026 The NetKAT authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Benchmarks for `PagedStableVector`, exercising the access patterns of its +// only clients (`PacketSetManager`/`PacketTransformerManager`): indexed reads +// of decision nodes during BDD traversal, and appends of new nodes. +// +// A flat `std::vector` (no paging, no pointer stability) serves as the +// reference: it bounds read performance from above (no double indirection, +// perfect contiguity) and append performance from below (it must relocate all +// elements whenever it grows beyond its capacity). + +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "netkat/paged_stable_vector.h" + +namespace netkat { +namespace { + +// Same size and alignment as `PacketSetManager::DecisionNode`. +struct FakeNode { + uint64_t a = 0; + uint64_t b = 0; + uint64_t c = 0; +}; +static_assert(sizeof(FakeNode) == 24); + +// The page size of `PacketSetManager::nodes_`: 512 nodes, or 12 KiB. +constexpr size_t kPageSize = size_t{1} << 9; + +template +Vector MakeFilledVector(size_t size) { + Vector vec; + for (size_t i = 0; i < size; ++i) { + vec.push_back(FakeNode{.a = i, .b = i, .c = i}); + } + return vec; +} + +// Returns `size` indices in [0, size) in pseudo-random order, simulating the +// data-dependent node lookups of BDD traversal. Uses a fixed-seed LCG so all +// instantiations see the identical sequence. +std::vector PseudoRandomIndices(size_t size) { + std::vector indices; + indices.reserve(size); + uint64_t state = 42; + for (size_t i = 0; i < size; ++i) { + state = state * 6364136223846793005ULL + 1442695040888963407ULL; + indices.push_back(static_cast((state >> 33) % size)); + } + return indices; +} + +template +void BM_PushBack(benchmark::State& state) { + const size_t size = state.range(0); + for (auto s : state) { + Vector vec = MakeFilledVector(size); + benchmark::DoNotOptimize(vec); + } + state.SetItemsProcessed(state.iterations() * size); +} + +template +void BM_SequentialRead(benchmark::State& state) { + const size_t size = state.range(0); + Vector vec = MakeFilledVector(size); + for (auto s : state) { + uint64_t sum = 0; + for (size_t i = 0; i < size; ++i) sum += vec[i].a; + benchmark::DoNotOptimize(sum); + } + state.SetItemsProcessed(state.iterations() * size); +} + +template +void BM_RandomRead(benchmark::State& state) { + const size_t size = state.range(0); + Vector vec = MakeFilledVector(size); + const std::vector indices = PseudoRandomIndices(size); + for (auto s : state) { + uint64_t sum = 0; + for (uint32_t index : indices) sum += vec[index].a; + benchmark::DoNotOptimize(sum); + } + state.SetItemsProcessed(state.iterations() * size); +} + +// 4M elements ≈ 96 MiB: spans multiple pages and far exceeds L3, like the +// node vectors of large NetKAT models. 256k elements ≈ 6 MiB: fits in L3, +// making the index arithmetic (rather than memory stalls) the bottleneck. +constexpr size_t kSmall = size_t{1} << 18; +constexpr size_t kLarge = size_t{1} << 22; + +using PagedVector = PagedStableVector; +using FlatVector = std::vector; + +BENCHMARK_TEMPLATE(BM_PushBack, PagedVector)->Arg(kSmall)->Arg(kLarge); +BENCHMARK_TEMPLATE(BM_PushBack, FlatVector)->Arg(kSmall)->Arg(kLarge); + +BENCHMARK_TEMPLATE(BM_SequentialRead, PagedVector)->Arg(kSmall)->Arg(kLarge); +BENCHMARK_TEMPLATE(BM_SequentialRead, FlatVector)->Arg(kSmall)->Arg(kLarge); + +BENCHMARK_TEMPLATE(BM_RandomRead, PagedVector)->Arg(kSmall)->Arg(kLarge); +BENCHMARK_TEMPLATE(BM_RandomRead, FlatVector)->Arg(kSmall)->Arg(kLarge); + +} // namespace +} // namespace netkat diff --git a/netkat/paged_stable_vector_test.cc b/netkat/paged_stable_vector_test.cc index 5b73580..9ad2345 100644 --- a/netkat/paged_stable_vector_test.cc +++ b/netkat/paged_stable_vector_test.cc @@ -25,7 +25,8 @@ namespace { // A small, but otherwise random page size used throughout the tests. // Using a small page size is useful for exercising the page replacement logic. -static constexpr int kSmallPageSize = 3; +// Must be a power of two, as required by `PagedStableVector`. +static constexpr int kSmallPageSize = 4; void PushBackInreasesSize(std::vector elements) { PagedStableVector vector; @@ -84,11 +85,15 @@ FUZZ_TEST(PagedStableVectorTest, BracketAssigmentWorks); TEST(PagedStableVectorTest, ReferencesDontGetInvalidated) { PagedStableVector vector; - // Store a few references. - vector.push_back("first element"); - std::string* first_element_ptr = &vector[0]; - vector.push_back("second element"); - std::string* second_element_ptr = &vector[1]; + // Store a reference to every element as it is added, spanning several pages + // so that some references point to elements right before and right after + // page boundaries -- the positions most at risk when a new page or a larger + // page table gets allocated. + std::vector element_ptrs; + for (int i = 0; i < 10 * kSmallPageSize; ++i) { + vector.push_back(std::to_string(i)); + element_ptrs.push_back(&vector[i]); + } // Push a ton of elements to trigger page allocation. // If this were a regular std::vector, the references would be invalidated. @@ -97,8 +102,10 @@ TEST(PagedStableVectorTest, ReferencesDontGetInvalidated) { } // Check that the references are still valid. - EXPECT_EQ(&vector[0], first_element_ptr); - EXPECT_EQ(&vector[1], second_element_ptr); + for (int i = 0; i < element_ptrs.size(); ++i) { + EXPECT_EQ(&vector[i], element_ptrs[i]); + EXPECT_EQ(*element_ptrs[i], std::to_string(i)); + } }; } // namespace