Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions netkat/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,16 @@ cc_test(
],
)

cc_binary(
name = "paged_stable_vector_benchmark",
testonly = True,
srcs = ["paged_stable_vector_benchmark.cc"],
deps = [
":paged_stable_vector",
"@com_google_benchmark//:benchmark_main",
],
)

cc_test(
name = "packet_transformer_test",
srcs = ["packet_transformer_test.cc"],
Expand Down
13 changes: 9 additions & 4 deletions netkat/packet_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,15 @@ class PacketSetManager {

[[nodiscard]] std::string ToString(const DecisionNode& node) const;

// The page size of the `nodes_` vector: 64 MiB or ~ 67 MB.
// Chosen large enough to reduce the cost of dynamic allocation, and small
// enough to avoid excessive memory overhead.
static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode);
// The page size of the `nodes_` vector: 512 nodes, or 12 KiB.
// Chosen large enough to amortize the cost of dynamic allocation over
// hundreds of nodes, and small enough that pages stay below the malloc
// mmap/trim thresholds (typically 128 KiB): this way, short-lived managers
// recycle pages through the allocator's freelists instead of paying an
// mmap/munmap syscall pair per manager. A power of two so that indexing
// into the vector -- which is on the hot path of nearly every operation --
// compiles to shifts and masks rather than multiply sequences.
static constexpr size_t kPageSize = size_t{1} << 9;

// The decision nodes forming the BDD-style DAG representation of packet sets.
// `PacketSetHandle::node_index_` indexes into this vector.
Expand Down
80 changes: 80 additions & 0 deletions netkat/packet_set_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdint>
#include <optional>
#include <utility>

#include "absl/strings/str_cat.h"
#include "benchmark/benchmark.h"
Expand Down Expand Up @@ -120,4 +122,82 @@ void BM_ReCompileOverlappingPredicate(benchmark::State& state) {
}
BENCHMARK(BM_ReCompileOverlappingPredicate);

// -- Large-scale benchmarks ---------------------------------------------------
//
// The benchmarks above build BDDs of only tens of nodes, so they cannot detect
// effects that only manifest at scale (node arena performance, unique-table
// pressure, algorithmic complexity of the set operations). The benchmarks
// below operate on sets of pseudo-random members of a 16^5 ~= 1M element
// space, encoded over 5 hex-digit fields. Random sets have incompressible
// BDDs, so node counts scale with set size, mimicking large real-world NetKAT
// models.

constexpr int kNumDigits = 5;

// The `i`-th pseudo-random member of the space, under the given `seed`.
// Distinct `i` mostly yield distinct members; collisions just shrink the set.
uint32_t Member(uint32_t i, uint32_t seed) {
uint64_t state = (i + seed) * 6364136223846793005ULL + 1442695040888963407ULL;
return static_cast<uint32_t>(state >> 33) & ((1u << (4 * kNumDigits)) - 1);
}

// Matches exactly the packets whose digit fields encode `member`.
PredicateProto MemberPredicate(uint32_t member) {
PredicateProto pred = MatchProto("f0", member & 15);
for (int d = 1; d < kNumDigits; ++d) {
pred = AndProto(std::move(pred),
MatchProto(absl::StrCat("f", d), (member >> (4 * d)) & 15));
}
return pred;
}

// A balanced Or-tree over members [lo, hi) -- balanced to keep proto/compile
// recursion depth logarithmic.
PredicateProto RandomSetPredicate(uint32_t lo, uint32_t hi, uint32_t seed) {
if (hi - lo == 1) return MemberPredicate(Member(lo, seed));
uint32_t mid = lo + (hi - lo) / 2;
return OrProto(RandomSetPredicate(lo, mid, seed),
RandomSetPredicate(mid, hi, seed));
}

// Benchmarks first-time compilation of a large random set, dominated by node
// creation: unique-table hashing and arena appends.
void BM_CompileLargeRandomSet(benchmark::State& state) {
PredicateProto pred = RandomSetPredicate(0, state.range(0), /*seed=*/1);
for (auto s : state) {
PacketSetManager manager;
PacketSetHandle set = manager.Compile(pred);
benchmark::DoNotOptimize(set);
}
}
BENCHMARK(BM_CompileLargeRandomSet)->Arg(1 << 12)->Arg(1 << 15);

// Benchmarks `Not` of a large random set: a full traversal that copies every
// node of the operand (no complement edges yet, see b/382380335).
void BM_NotOfLargeRandomSet(benchmark::State& state) {
PacketSetManager manager;
PacketSetHandle set =
manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/1));
for (auto s : state) {
PacketSetHandle result = manager.Not(set);
benchmark::DoNotOptimize(result);
}
}
BENCHMARK(BM_NotOfLargeRandomSet)->Arg(1 << 12)->Arg(1 << 15);

// Benchmarks `Xor` of two large random sets: a compound operation (two `And`s,
// several `Not`s) that traverses both operands and creates many nodes.
void BM_XorOfLargeRandomSets(benchmark::State& state) {
PacketSetManager manager;
PacketSetHandle lhs =
manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/1));
PacketSetHandle rhs =
manager.Compile(RandomSetPredicate(0, state.range(0), /*seed=*/2));
for (auto s : state) {
PacketSetHandle result = manager.Xor(lhs, rhs);
benchmark::DoNotOptimize(result);
}
}
BENCHMARK(BM_XorOfLargeRandomSets)->Arg(1 << 12)->Arg(1 << 15);

} // namespace netkat
13 changes: 9 additions & 4 deletions netkat/packet_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,15 @@ class PacketTransformerManager {

[[nodiscard]] std::string ToString(const DecisionNode& node) const;

// The page size of the `nodes_` vector: 64 MiB or ~ 67 MB.
// Chosen large enough to reduce the cost of dynamic allocation, and small
// enough to avoid excessive memory overhead.
static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode);
// The page size of the `nodes_` vector: 256 nodes, or 16 KiB.
// Chosen large enough to amortize the cost of dynamic allocation over
// hundreds of nodes, and small enough that pages stay below the malloc
// mmap/trim thresholds (typically 128 KiB): this way, short-lived managers
// recycle pages through the allocator's freelists instead of paying an
// mmap/munmap syscall pair per manager. A power of two so that indexing
// into the vector -- which is on the hot path of nearly every operation --
// compiles to shifts and masks rather than multiply sequences.
static constexpr size_t kPageSize = size_t{1} << 8;

// Helper functions to deal with DecisionNodes directly.
// TODO(dilo): Is there a convenient way to either avoid these or avoid making
Expand Down
20 changes: 18 additions & 2 deletions netkat/paged_stable_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#ifndef GOOGLE_NETKAT_NETKAT_PAGED_STABLE_VECTOR_H_
#define GOOGLE_NETKAT_NETKAT_PAGED_STABLE_VECTOR_H_

#include <bit>
#include <cstddef>
#include <utility>
#include <vector>
Expand All @@ -39,6 +40,12 @@ namespace netkat {
template <class T, size_t PageSize>
class PagedStableVector {
public:
// Index arithmetic (`operator[]`, `size()`) is on our clients' hot paths.
// Requiring a power-of-two `PageSize` guarantees it compiles to shifts and
// masks rather than multiply sequences.
static_assert(std::has_single_bit(PageSize),
"PageSize must be a power of two");

PagedStableVector() = default;

size_t size() const {
Expand All @@ -48,13 +55,13 @@ class PagedStableVector {

template <class Value>
void push_back(Value&& value) {
if (size() % PageSize == 0) data_.emplace_back().reserve(PageSize);
ReserveSpaceForNextElement();
data_.back().push_back(std::forward<Value>(value));
}

template <class... Args>
void emplace_back(Args&&... value) {
if (size() % PageSize == 0) data_.emplace_back().reserve(PageSize);
ReserveSpaceForNextElement();
data_.back().emplace_back(std::forward<Args>(value)...);
}

Expand All @@ -66,6 +73,15 @@ class PagedStableVector {
}

private:
void ReserveSpaceForNextElement() {
if (data_.empty() || data_.back().size() == PageSize) {
// Reserving each page upfront is what guarantees pointer stability: a
// page never grows beyond its initial capacity, so its elements are
// never relocated.
data_.emplace_back().reserve(PageSize);
}
}

std::vector<std::vector<T>> data_;
};

Expand Down
122 changes: 122 additions & 0 deletions netkat/paged_stable_vector_benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright 2026 The NetKAT authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Benchmarks for `PagedStableVector`, exercising the access patterns of its
// only clients (`PacketSetManager`/`PacketTransformerManager`): indexed reads
// of decision nodes during BDD traversal, and appends of new nodes.
//
// A flat `std::vector` (no paging, no pointer stability) serves as the
// reference: it bounds read performance from above (no double indirection,
// perfect contiguity) and append performance from below (it must relocate all
// elements whenever it grows beyond its capacity).

#include <cstddef>
#include <cstdint>
#include <vector>

#include "benchmark/benchmark.h"
#include "netkat/paged_stable_vector.h"

namespace netkat {
namespace {

// Same size and alignment as `PacketSetManager::DecisionNode`.
struct FakeNode {
uint64_t a = 0;
uint64_t b = 0;
uint64_t c = 0;
};
static_assert(sizeof(FakeNode) == 24);

// The page size of `PacketSetManager::nodes_`: 512 nodes, or 12 KiB.
constexpr size_t kPageSize = size_t{1} << 9;

template <class Vector>
Vector MakeFilledVector(size_t size) {
Vector vec;
for (size_t i = 0; i < size; ++i) {
vec.push_back(FakeNode{.a = i, .b = i, .c = i});
}
return vec;
}

// Returns `size` indices in [0, size) in pseudo-random order, simulating the
// data-dependent node lookups of BDD traversal. Uses a fixed-seed LCG so all
// instantiations see the identical sequence.
std::vector<uint32_t> PseudoRandomIndices(size_t size) {
std::vector<uint32_t> indices;
indices.reserve(size);
uint64_t state = 42;
for (size_t i = 0; i < size; ++i) {
state = state * 6364136223846793005ULL + 1442695040888963407ULL;
indices.push_back(static_cast<uint32_t>((state >> 33) % size));
}
return indices;
}

template <class Vector>
void BM_PushBack(benchmark::State& state) {
const size_t size = state.range(0);
for (auto s : state) {
Vector vec = MakeFilledVector<Vector>(size);
benchmark::DoNotOptimize(vec);
}
state.SetItemsProcessed(state.iterations() * size);
}

template <class Vector>
void BM_SequentialRead(benchmark::State& state) {
const size_t size = state.range(0);
Vector vec = MakeFilledVector<Vector>(size);
for (auto s : state) {
uint64_t sum = 0;
for (size_t i = 0; i < size; ++i) sum += vec[i].a;
benchmark::DoNotOptimize(sum);
}
state.SetItemsProcessed(state.iterations() * size);
}

template <class Vector>
void BM_RandomRead(benchmark::State& state) {
const size_t size = state.range(0);
Vector vec = MakeFilledVector<Vector>(size);
const std::vector<uint32_t> indices = PseudoRandomIndices(size);
for (auto s : state) {
uint64_t sum = 0;
for (uint32_t index : indices) sum += vec[index].a;
benchmark::DoNotOptimize(sum);
}
state.SetItemsProcessed(state.iterations() * size);
}

// 4M elements ≈ 96 MiB: spans multiple pages and far exceeds L3, like the
// node vectors of large NetKAT models. 256k elements ≈ 6 MiB: fits in L3,
// making the index arithmetic (rather than memory stalls) the bottleneck.
constexpr size_t kSmall = size_t{1} << 18;
constexpr size_t kLarge = size_t{1} << 22;

using PagedVector = PagedStableVector<FakeNode, kPageSize>;
using FlatVector = std::vector<FakeNode>;

BENCHMARK_TEMPLATE(BM_PushBack, PagedVector)->Arg(kSmall)->Arg(kLarge);
BENCHMARK_TEMPLATE(BM_PushBack, FlatVector)->Arg(kSmall)->Arg(kLarge);

BENCHMARK_TEMPLATE(BM_SequentialRead, PagedVector)->Arg(kSmall)->Arg(kLarge);
BENCHMARK_TEMPLATE(BM_SequentialRead, FlatVector)->Arg(kSmall)->Arg(kLarge);

BENCHMARK_TEMPLATE(BM_RandomRead, PagedVector)->Arg(kSmall)->Arg(kLarge);
BENCHMARK_TEMPLATE(BM_RandomRead, FlatVector)->Arg(kSmall)->Arg(kLarge);

} // namespace
} // namespace netkat
23 changes: 15 additions & 8 deletions netkat/paged_stable_vector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ namespace {

// A small, but otherwise random page size used throughout the tests.
// Using a small page size is useful for exercising the page replacement logic.
static constexpr int kSmallPageSize = 3;
// Must be a power of two, as required by `PagedStableVector`.
static constexpr int kSmallPageSize = 4;

void PushBackInreasesSize(std::vector<std::string> elements) {
PagedStableVector<std::string, kSmallPageSize> vector;
Expand Down Expand Up @@ -84,11 +85,15 @@ FUZZ_TEST(PagedStableVectorTest, BracketAssigmentWorks);
TEST(PagedStableVectorTest, ReferencesDontGetInvalidated) {
PagedStableVector<std::string, kSmallPageSize> vector;

// Store a few references.
vector.push_back("first element");
std::string* first_element_ptr = &vector[0];
vector.push_back("second element");
std::string* second_element_ptr = &vector[1];
// Store a reference to every element as it is added, spanning several pages
// so that some references point to elements right before and right after
// page boundaries -- the positions most at risk when a new page or a larger
// page table gets allocated.
std::vector<std::string*> element_ptrs;
for (int i = 0; i < 10 * kSmallPageSize; ++i) {
vector.push_back(std::to_string(i));
element_ptrs.push_back(&vector[i]);
}

// Push a ton of elements to trigger page allocation.
// If this were a regular std::vector, the references would be invalidated.
Expand All @@ -97,8 +102,10 @@ TEST(PagedStableVectorTest, ReferencesDontGetInvalidated) {
}

// Check that the references are still valid.
EXPECT_EQ(&vector[0], first_element_ptr);
EXPECT_EQ(&vector[1], second_element_ptr);
for (int i = 0; i < element_ptrs.size(); ++i) {
EXPECT_EQ(&vector[i], element_ptrs[i]);
EXPECT_EQ(*element_ptrs[i], std::to_string(i));
}
};

} // namespace
Expand Down