From 374cbffd7789589b15ea0cd6fff33b2318386b35 Mon Sep 17 00:00:00 2001 From: Steffen Smolka Date: Wed, 10 Jun 2026 03:17:01 -0700 Subject: [PATCH 1/2] [NetKAT] Intern decision nodes via per-field unique tables keyed by node index. Previously, the unique tables (packet_by_node_, transformer_by_node_) stored a full copy of every decision node as the map key, duplicating all node storage. This was especially costly for PacketTransformerManager, whose nodes own heap-allocated btree maps. Now the tables store 4-byte node indices and hash/compare by dereferencing into nodes_, so each node is stored exactly once. Splitting the tables by packet field keeps each table - and thus its probe sequences - small, and lays the groundwork for storing the nodes themselves by field/level (standard practice in BDD packages, where it improves locality and enables variable reordering). A follow-up change builds on this. Implementation notes: * Interning probes the table with the candidate node via heterogeneous lookup (no node is added to nodes_ unless it is new), keeping the common already-interned case copy- and allocation-free. * The table functors reference nodes_ through a heap-allocated location slot so that they survive manager moves; move operations repoint the slot. Benchmarks are flat overall: first-time compilation is unchanged, recompile microbenchmarks regress by ~8% (~50ns) due to the indirection in the unique tables, in exchange for halving node-related memory. Co-Authored-By: Claude Fable 5 --- netkat/BUILD.bazel | 2 + netkat/packet_field.h | 5 +++ netkat/packet_set.cc | 83 +++++++++++++++++++++++++++------- netkat/packet_set.h | 68 +++++++++++++++++++++++++--- netkat/packet_transformer.cc | 86 ++++++++++++++++++++++++++++-------- netkat/packet_transformer.h | 69 ++++++++++++++++++++++++++--- 6 files changed, 265 insertions(+), 48 deletions(-) diff --git a/netkat/BUILD.bazel b/netkat/BUILD.bazel index 9121fef..33d04a0 100644 --- a/netkat/BUILD.bazel +++ b/netkat/BUILD.bazel @@ -132,6 +132,7 @@ cc_library( "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/hash", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", @@ -376,6 +377,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/hash", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", diff --git a/netkat/packet_field.h b/netkat/packet_field.h index 35400cf..2bccbbc 100644 --- a/netkat/packet_field.h +++ b/netkat/packet_field.h @@ -78,6 +78,11 @@ class [[nodiscard]] PacketFieldHandle { // 2^16 ~= 65k fields. uint16_t index_; explicit PacketFieldHandle(uint16_t index) : index_(index) {} + + // `PacketSetManager` and `PacketTransformerManager` organize their node + // storage by field, using `index_` to address the per-field data structures. + friend class PacketSetManager; + friend class PacketTransformerManager; }; // Protect against regressions in the memory layout, as it affects performance. diff --git a/netkat/packet_set.cc b/netkat/packet_set.cc index 2c7bd1c..0b14380 100644 --- a/netkat/packet_set.cc +++ b/netkat/packet_set.cc @@ -63,6 +63,25 @@ std::string PacketSetHandle::ToString() const { } } +PacketSetManager::PacketSetManager(PacketSetManager&& other) + : nodes_(std::move(other.nodes_)), + nodes_location_(std::move(other.nodes_location_)), + unique_table_by_field_(std::move(other.unique_table_by_field_)), + packet_set_by_hash_(std::move(other.packet_set_by_hash_)), + field_manager_(std::move(other.field_manager_)) { + *nodes_location_ = &nodes_; +} + +PacketSetManager& PacketSetManager::operator=(PacketSetManager&& other) { + nodes_ = std::move(other.nodes_); + nodes_location_ = std::move(other.nodes_location_); + unique_table_by_field_ = std::move(other.unique_table_by_field_); + packet_set_by_hash_ = std::move(other.packet_set_by_hash_); + field_manager_ = std::move(other.field_manager_); + *nodes_location_ = &nodes_; + return *this; +} + PacketSetHandle PacketSetManager::EmptySet() const { return PacketSetHandle(SentinelNodeIndex::kEmptySet); } @@ -109,16 +128,33 @@ PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) { } #endif - auto [it, inserted] = - packet_by_node_.try_emplace(node, PacketSetHandle(nodes_.size())); - if (inserted) { - nodes_.push_back(std::move(node)); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; + // Probe the unique table by node content first (heterogeneous lookup): the + // common case is that an equal node has already been interned, and probing + // with the candidate node avoids touching `nodes_` in that case. + UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); + if (auto it = unique_table.find(node); it != unique_table.end()) { + return PacketSetHandle(*it); } - return it->second; + uint32_t node_index = nodes_.size(); + nodes_.push_back(std::move(node)); + unique_table.insert(node_index); + LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) + << "Internal invariant violated: Proper and sentinel node indices must " + "be disjoint. This indicates that we allocated more nodes than are " + "supported (> 2^32 - 2)."; + return PacketSetHandle(node_index); +} + +PacketSetManager::UniqueNodeTable& PacketSetManager::GetOrCreateUniqueTable( + PacketFieldHandle field) { + if (field.index_ >= unique_table_by_field_.size()) { + unique_table_by_field_.resize( + field.index_ + 1, + UniqueNodeTable(/*bucket_count=*/0, + InternedNodeHash{nodes_location_.get()}, + InternedNodeEq{nodes_location_.get()})); + } + return unique_table_by_field_[field.index_]; } bool PacketSetManager::Contains(PacketSetHandle packet_set, @@ -483,16 +519,29 @@ absl::Status PacketSetManager::CheckInternalInvariants() const { // Invariant: Proper and sentinel node indices are disjoint. RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - // Invariant: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - for (const auto& [node, packet] : packet_by_node_) { - RET_CHECK(packet.node_index_ < nodes_.size()); - RET_CHECK(nodes_[packet.node_index_] == node); + // Invariant: `unique_table_by_field_[f]` contains `i` iff + // `nodes_[i].field.index_ == f`. Every valid node index is in exactly one + // table, and no two interned nodes are equal. + size_t total_table_size = 0; + for (size_t f = 0; f < unique_table_by_field_.size(); ++f) { + const UniqueNodeTable& unique_table = unique_table_by_field_[f]; + total_table_size += unique_table.size(); + for (uint32_t node_index : unique_table) { + RET_CHECK(node_index < nodes_.size()); + RET_CHECK(nodes_[node_index].field.index_ == f); + } } - for (int i = 0; i < nodes_.size(); ++i) { + RET_CHECK(total_table_size == nodes_.size()); + for (uint32_t i = 0; i < nodes_.size(); ++i) { const DecisionNode& node = nodes_[i]; - auto it = packet_by_node_.find(node); - RET_CHECK(it != packet_by_node_.end()); - RET_CHECK(it->second == PacketSetHandle(i)); + RET_CHECK(node.field.index_ < unique_table_by_field_.size()); + const UniqueNodeTable& unique_table = + unique_table_by_field_[node.field.index_]; + // Looking up `i` probes by node content; finding exactly `i` proves that + // no other interned node has the same content. + auto it = unique_table.find(i); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == i); } // Node Invariants. diff --git a/netkat/packet_set.h b/netkat/packet_set.h index f11088f..c9237bb 100644 --- a/netkat/packet_set.h +++ b/netkat/packet_set.h @@ -46,12 +46,15 @@ #include #include +#include #include #include #include #include "absl/container/fixed_array.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -139,10 +142,13 @@ class PacketSetManager { PacketSetManager() = default; // The class is move-only: not copyable, but movable. + // Moves are implemented manually (in the cc file) because the unique tables + // reference `nodes_` through `nodes_location_`, which must be repointed at + // the new `nodes_` member on move. PacketSetManager(const PacketSetManager&) = delete; PacketSetManager& operator=(const PacketSetManager&) = delete; - PacketSetManager(PacketSetManager&&) = default; - PacketSetManager& operator=(PacketSetManager&&) = default; + PacketSetManager(PacketSetManager&&); + PacketSetManager& operator=(PacketSetManager&&); // Returns true iff this packet set represents the empty set of packets. bool IsEmptySet(PacketSetHandle packet_set) const; @@ -356,11 +362,61 @@ class PacketSetManager { // `And`, `Or`, `Not`). The class also avoids expensive relocations. PagedStableVector nodes_; - // A so called "unique table" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketSetHandle::node_index`. + // The location of `nodes_`, behind a level of indirection that remains + // stable when the manager object is moved: the unique tables below hash and + // compare node indices by dereferencing into `nodes_`, and their + // hasher/equality functors would otherwise dangle on move. Move operations + // repoint the location at the new manager's `nodes_` member. + std::unique_ptr*> + nodes_location_ = std::make_unique< + const PagedStableVector*>(&nodes_); + + // Hasher and equality for unique table entries, which are indices into + // `nodes_`. Hashing/comparing the *node* (rather than the index) is what + // makes the tables deduplicate by node content. + // The `DecisionNode` overloads enable heterogeneous lookup, so that a + // candidate node can be probed before it is added to `nodes_`. + struct InternedNodeHash { + using is_transparent = void; + const PagedStableVector* const* nodes; + size_t operator()(uint32_t node_index) const { + return absl::HashOf((**nodes)[node_index]); + } + size_t operator()(const DecisionNode& node) const { + return absl::HashOf(node); + } + }; + struct InternedNodeEq { + using is_transparent = void; + const PagedStableVector* const* nodes; + bool operator()(uint32_t a, uint32_t b) const { + return (**nodes)[a] == (**nodes)[b]; + } + bool operator()(uint32_t a, const DecisionNode& b) const { + return (**nodes)[a] == b; + } + bool operator()(const DecisionNode& a, uint32_t b) const { + return (**nodes)[b] == a; + } + }; + using UniqueNodeTable = + absl::flat_hash_set; + + // So called "unique tables" to ensure each node is only added to `nodes_` + // once, and thus has a unique `PacketSetHandle::node_index`. One table per + // packet field: a node's entry lives in the table of the field it branches + // on. Splitting by field keeps the tables, and thus probe sequences, small, + // and storing indices (instead of node copies) keeps each node stored + // exactly once, in `nodes_`. // - // INVARIANT: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - absl::flat_hash_map packet_by_node_; + // INVARIANT: `unique_table_by_field_[f]` contains `i` iff + // `nodes_[i].field.index_ == f`. Every valid node index is contained in + // exactly one table. + std::vector unique_table_by_field_; + + // Returns the unique table for nodes branching on `field`, creating it (and + // any tables for smaller fields) if it does not exist yet. + UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PredicateProto` to a `PacketSetHandle`. // diff --git a/netkat/packet_transformer.cc b/netkat/packet_transformer.cc index a125c8b..9a9915e 100644 --- a/netkat/packet_transformer.cc +++ b/netkat/packet_transformer.cc @@ -61,6 +61,27 @@ enum SentinelNodeIndex : uint32_t { PacketTransformerHandle::PacketTransformerHandle() : node_index_(SentinelNodeIndex::kDeny) {} +PacketTransformerManager::PacketTransformerManager( + PacketTransformerManager&& other) + : nodes_(std::move(other.nodes_)), + nodes_location_(std::move(other.nodes_location_)), + unique_table_by_field_(std::move(other.unique_table_by_field_)), + transformer_by_hash_(std::move(other.transformer_by_hash_)), + packet_set_manager_(std::move(other.packet_set_manager_)) { + *nodes_location_ = &nodes_; +} + +PacketTransformerManager& PacketTransformerManager::operator=( + PacketTransformerManager&& other) { + nodes_ = std::move(other.nodes_); + nodes_location_ = std::move(other.nodes_location_); + unique_table_by_field_ = std::move(other.unique_table_by_field_); + transformer_by_hash_ = std::move(other.transformer_by_hash_); + packet_set_manager_ = std::move(other.packet_set_manager_); + *nodes_location_ = &nodes_; + return *this; +} + std::string PacketTransformerHandle::ToString() const { if (node_index_ == SentinelNodeIndex::kDeny) { return "PacketTransformerHandle"; @@ -172,16 +193,33 @@ PacketTransformerHandle PacketTransformerManager::NodeToTransformer( node.default_branch_by_field_modification.empty()) return node.default_branch; - auto [it, inserted] = transformer_by_node_.try_emplace( - node, PacketTransformerHandle(nodes_.size())); - if (inserted) { - nodes_.push_back(std::move(node)); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; + // Probe the unique table by node content first (heterogeneous lookup): the + // common case is that an equal node has already been interned, and probing + // with the candidate node avoids touching `nodes_` in that case. + UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); + if (auto it = unique_table.find(node); it != unique_table.end()) { + return PacketTransformerHandle(*it); } - return it->second; + uint32_t node_index = nodes_.size(); + nodes_.push_back(std::move(node)); + unique_table.insert(node_index); + LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) + << "Internal invariant violated: Proper and sentinel node indices must " + "be disjoint. This indicates that we allocated more nodes than are " + "supported (> 2^32 - 2)."; + return PacketTransformerHandle(node_index); +} + +PacketTransformerManager::UniqueNodeTable& +PacketTransformerManager::GetOrCreateUniqueTable(PacketFieldHandle field) { + if (field.index_ >= unique_table_by_field_.size()) { + unique_table_by_field_.resize( + field.index_ + 1, + UniqueNodeTable(/*bucket_count=*/0, + InternedNodeHash{nodes_location_.get()}, + InternedNodeEq{nodes_location_.get()})); + } + return unique_table_by_field_[field.index_]; } bool PacketTransformerManager::IsDeny( @@ -1104,17 +1142,29 @@ absl::Status PacketTransformerManager::CheckInternalInvariants() const { // Invariant: Proper and sentinel node indices are disjoint. RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - // Invariant: `transformer_by_node_[n] = s` iff `nodes_[s.node_index_] == - // n`. - for (const auto& [node, transformer] : transformer_by_node_) { - RET_CHECK(transformer.node_index_ < nodes_.size()); - RET_CHECK(nodes_[transformer.node_index_] == node); + // Invariant: `unique_table_by_field_[f]` contains `i` iff + // `nodes_[i].field.index_ == f`. Every valid node index is in exactly one + // table, and no two interned nodes are equal. + size_t total_table_size = 0; + for (size_t f = 0; f < unique_table_by_field_.size(); ++f) { + const UniqueNodeTable& unique_table = unique_table_by_field_[f]; + total_table_size += unique_table.size(); + for (uint32_t node_index : unique_table) { + RET_CHECK(node_index < nodes_.size()); + RET_CHECK(nodes_[node_index].field.index_ == f); + } } - for (int i = 0; i < nodes_.size(); ++i) { + RET_CHECK(total_table_size == nodes_.size()); + for (uint32_t i = 0; i < nodes_.size(); ++i) { const DecisionNode& node = nodes_[i]; - auto it = transformer_by_node_.find(node); - RET_CHECK(it != transformer_by_node_.end()); - RET_CHECK(it->second == PacketTransformerHandle(i)); + RET_CHECK(node.field.index_ < unique_table_by_field_.size()); + const UniqueNodeTable& unique_table = + unique_table_by_field_[node.field.index_]; + // Looking up `i` probes by node content; finding exactly `i` proves that + // no other interned node has the same content. + auto it = unique_table.find(i); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == i); } // Node Invariants. diff --git a/netkat/packet_transformer.h b/netkat/packet_transformer.h index 4c9c90d..7a3d8be 100644 --- a/netkat/packet_transformer.h +++ b/netkat/packet_transformer.h @@ -39,12 +39,15 @@ #include #include +#include #include #include +#include #include "absl/container/btree_map.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -147,10 +150,13 @@ class PacketTransformerManager { // The class is move-only: not copyable, but movable. // `PacketSetHandles` and `PacketTransformerHandles` returned by this class // are not invalidated on move. + // Moves are implemented manually (in the cc file) because the unique tables + // reference `nodes_` through `nodes_location_`, which must be repointed at + // the new `nodes_` member on move. PacketTransformerManager(const PacketTransformerManager&) = delete; PacketTransformerManager& operator=(const PacketTransformerManager&) = delete; - PacketTransformerManager(PacketTransformerManager&&) = default; - PacketTransformerManager& operator=(PacketTransformerManager&&) = default; + PacketTransformerManager(PacketTransformerManager&&); + PacketTransformerManager& operator=(PacketTransformerManager&&); // Returns the `PacketSetManager` used by this object to compile // predicates. @@ -424,12 +430,61 @@ class PacketTransformerManager { // expensive relocations. PagedStableVector nodes_; - // A so called "unique table" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketTransformerHandle::node_index`. + // The location of `nodes_`, behind a level of indirection that remains + // stable when the manager object is moved: the unique tables below hash and + // compare node indices by dereferencing into `nodes_`, and their + // hasher/equality functors would otherwise dangle on move. Move operations + // repoint the location at the new manager's `nodes_` member. + std::unique_ptr*> + nodes_location_ = std::make_unique< + const PagedStableVector*>(&nodes_); + + // Hasher and equality for unique table entries, which are indices into + // `nodes_`. Hashing/comparing the *node* (rather than the index) is what + // makes the tables deduplicate by node content. + // The `DecisionNode` overloads enable heterogeneous lookup, so that a + // candidate node can be probed before it is added to `nodes_`. + struct InternedNodeHash { + using is_transparent = void; + const PagedStableVector* const* nodes; + size_t operator()(uint32_t node_index) const { + return absl::HashOf((**nodes)[node_index]); + } + size_t operator()(const DecisionNode& node) const { + return absl::HashOf(node); + } + }; + struct InternedNodeEq { + using is_transparent = void; + const PagedStableVector* const* nodes; + bool operator()(uint32_t a, uint32_t b) const { + return (**nodes)[a] == (**nodes)[b]; + } + bool operator()(uint32_t a, const DecisionNode& b) const { + return (**nodes)[a] == b; + } + bool operator()(const DecisionNode& a, uint32_t b) const { + return (**nodes)[b] == a; + } + }; + using UniqueNodeTable = + absl::flat_hash_set; + + // So called "unique tables" to ensure each node is only added to `nodes_` + // once, and thus has a unique `PacketTransformerHandle::node_index`. One + // table per packet field: a node's entry lives in the table of the field it + // branches on. Splitting by field keeps the tables, and thus probe + // sequences, small, and storing indices (instead of node copies) keeps each + // node stored exactly once, in `nodes_`. // - // INVARIANT: `transformer_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - absl::flat_hash_map - transformer_by_node_; + // INVARIANT: `unique_table_by_field_[f]` contains `i` iff + // `nodes_[i].field.index_ == f`. Every valid node index is contained in + // exactly one table. + std::vector unique_table_by_field_; + + // Returns the unique table for nodes branching on `field`, creating it (and + // any tables for smaller fields) if it does not exist yet. + UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PolicyProto` to a `PacketTransformerHandle`. // From eb28b805db69afb79c17f59c8dcda4852be8dcd3 Mon Sep 17 00:00:00 2001 From: Steffen Smolka Date: Wed, 10 Jun 2026 03:27:25 -0700 Subject: [PATCH 2/2] [NetKAT] Pack the field into handles and store decision nodes by field. Builds on the per-field unique tables: node storage itself is now organized by field/level, the standard organization in BDD packages. PacketSetHandle and PacketTransformerHandle now encode a (field, slot) pair (10/22 bit split) instead of a flat index, and the managers store nodes in one arena per field. This yields two performance properties: * The field of a node is available directly from the handle, without loading the node from memory. Operations consult the field on every recursive step to maintain the field-ordering invariant; e.g. And() now decides its field-mismatch case and canonicalizes argument order purely on handle bits, and never loads the higher-field operand's node in the mismatch case. * Nodes of the same field are clustered in memory. Operations tend to sweep the graph level by level, improving locality on large models. Benchmarks are flat to substantially better; most notably, first-time compilation of a small predicate improves 3x (10.8us -> 3.7us). This is because arena pages shrink from 64 MiB to 16 KiB: short-lived managers previously paid an mmap/munmap pair per compilation, while small pages are recycled through the allocator's freelists (this also motivates 16 KiB over larger sizes, which exceed typical malloc mmap/trim thresholds). Tradeoffs and choices: * The 10/22 split supports 1023 fields and ~4M nodes per field, enforced by CHECKs at node creation. The previous flat encoding supported ~4B nodes total; per-field stats from real models should inform revisiting the split. * Handles now print as field:slot (e.g. PacketSetHandle<1:7>), making debug output more informative; the golden files are regenerated accordingly. * Nodes keep their (now redundant) field member: removing it saves no memory due to padding, and keeping it avoids churning every node-construction site. CheckInternalInvariants verifies it stays consistent with the arena. Co-Authored-By: Claude Fable 5 --- netkat/packet_set.cc | 241 +++++++++++++----------- netkat/packet_set.h | 142 ++++++++------ netkat/packet_set_test.expected | 162 ++++++++-------- netkat/packet_transformer.cc | 223 ++++++++++++---------- netkat/packet_transformer.h | 132 +++++++------ netkat/packet_transformer_test.expected | 192 +++++++++---------- 6 files changed, 598 insertions(+), 494 deletions(-) diff --git a/netkat/packet_set.cc b/netkat/packet_set.cc index 0b14380..466b3ee 100644 --- a/netkat/packet_set.cc +++ b/netkat/packet_set.cc @@ -37,52 +37,57 @@ namespace netkat { // The empty and full set of packets are not decision nodes, and thus we cannot -// associate an index into the `nodes_` vector with them. Instead, we represent -// them using sentinel values, chosen maximally to avoid collisions with proper -// indices. +// associate a node location with them. Instead, we represent them using +// sentinel values whose field bits are `PacketSetHandle::kSentinelFieldIndex`, +// a field index reserved for this purpose (and enforced to never be used by +// proper nodes at node-creation time). enum SentinelNodeIndex : uint32_t { // Encodes the empty set of packets. kEmptySet = std::numeric_limits::max(), // Encodes the full set of packets. kFullSet = std::numeric_limits::max() - 1, - // The minimum sentinel node index. - // Smaller values are reserved for proper indices into the `nodes_` vector. - kMinSentinel = kFullSet, }; -PacketSetHandle::PacketSetHandle() - : node_index_(SentinelNodeIndex::kEmptySet) {} +PacketSetHandle::PacketSetHandle() : bits_(SentinelNodeIndex::kEmptySet) {} std::string PacketSetHandle::ToString() const { - if (node_index_ == SentinelNodeIndex::kEmptySet) { + if (bits_ == SentinelNodeIndex::kEmptySet) { return "PacketSetHandle"; - } else if (node_index_ == SentinelNodeIndex::kFullSet) { + } else if (bits_ == SentinelNodeIndex::kFullSet) { return "PacketSetHandle"; } else { - return absl::StrFormat("PacketSetHandle<%d>", node_index_); + // Print the packed node location as `field_index:slot`. + return absl::StrFormat("PacketSetHandle<%d:%d>", field_index(), slot()); } } PacketSetManager::PacketSetManager(PacketSetManager&& other) - : nodes_(std::move(other.nodes_)), + : nodes_by_field_(std::move(other.nodes_by_field_)), nodes_location_(std::move(other.nodes_location_)), unique_table_by_field_(std::move(other.unique_table_by_field_)), packet_set_by_hash_(std::move(other.packet_set_by_hash_)), field_manager_(std::move(other.field_manager_)) { - *nodes_location_ = &nodes_; + *nodes_location_ = &nodes_by_field_; } PacketSetManager& PacketSetManager::operator=(PacketSetManager&& other) { - nodes_ = std::move(other.nodes_); + nodes_by_field_ = std::move(other.nodes_by_field_); nodes_location_ = std::move(other.nodes_location_); unique_table_by_field_ = std::move(other.unique_table_by_field_); packet_set_by_hash_ = std::move(other.packet_set_by_hash_); field_manager_ = std::move(other.field_manager_); - *nodes_location_ = &nodes_; + *nodes_location_ = &nodes_by_field_; return *this; } PacketSetHandle PacketSetManager::EmptySet() const { + // The sentinel values must carry the reserved sentinel field index, so they + // can never collide with proper node locations. (Asserted here because both + // the sentinel enum and the handle's private encoding are in scope.) + static_assert(SentinelNodeIndex::kEmptySet >> PacketSetHandle::kSlotBits == + PacketSetHandle::kSentinelFieldIndex); + static_assert(SentinelNodeIndex::kFullSet >> PacketSetHandle::kSlotBits == + PacketSetHandle::kSentinelFieldIndex); return PacketSetHandle(SentinelNodeIndex::kEmptySet); } @@ -100,10 +105,12 @@ bool PacketSetManager::IsFullSet(PacketSetHandle packet_set) const { const PacketSetManager::DecisionNode& PacketSetManager::GetNodeOrDie( PacketSetHandle packet_set) const { - CHECK_LT(packet_set.node_index_, nodes_.size()) + CHECK_LT(packet_set.field_index(), nodes_by_field_.size()) << "Did you call this function on a leaf node (i.e. FullSet() or " "EmptySet())? "; // Crash ok - return nodes_[packet_set.node_index_]; + const NodeArena& arena = nodes_by_field_[packet_set.field_index()]; + CHECK_LT(packet_set.slot(), arena.size()); // Crash ok + return arena[packet_set.slot()]; } PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) { @@ -128,31 +135,37 @@ PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) { } #endif - // Probe the unique table by node content first (heterogeneous lookup): the - // common case is that an equal node has already been interned, and probing - // with the candidate node avoids touching `nodes_` in that case. + // Probe the unique table by node content first (heterogeneous lookup): + // the common case is that an equal node has already been interned, and + // probing with the candidate node avoids touching the arena in that case. + const uint32_t field_index = node.field.index_; UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); if (auto it = unique_table.find(node); it != unique_table.end()) { - return PacketSetHandle(*it); + return PacketSetHandle(field_index, *it); } - uint32_t node_index = nodes_.size(); - nodes_.push_back(std::move(node)); - unique_table.insert(node_index); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; - return PacketSetHandle(node_index); + NodeArena& arena = nodes_by_field_[field_index]; + uint32_t slot = arena.size(); + CHECK_LT(slot, PacketSetHandle::kMaxSlotsPerField) // Crash ok + << "PacketSetManager supports at most 2^" << PacketSetHandle::kSlotBits + << " decision nodes per packet field."; + arena.push_back(std::move(node)); + unique_table.insert(slot); + return PacketSetHandle(field_index, slot); } PacketSetManager::UniqueNodeTable& PacketSetManager::GetOrCreateUniqueTable( PacketFieldHandle field) { + CHECK_LT(field.index_, PacketSetHandle::kSentinelFieldIndex) // Crash ok + << "PacketSetManager supports at most 2^" << PacketSetHandle::kFieldBits + << " - 1 packet fields."; if (field.index_ >= unique_table_by_field_.size()) { - unique_table_by_field_.resize( - field.index_ + 1, - UniqueNodeTable(/*bucket_count=*/0, - InternedNodeHash{nodes_location_.get()}, - InternedNodeEq{nodes_location_.get()})); + nodes_by_field_.resize(field.index_ + 1); + for (size_t f = unique_table_by_field_.size(); f <= field.index_; ++f) { + unique_table_by_field_.push_back(UniqueNodeTable( + /*bucket_count=*/0, + InternedNodeHash{nodes_location_.get(), static_cast(f)}, + InternedNodeEq{nodes_location_.get(), static_cast(f)})); + } } return unique_table_by_field_[field.index_]; } @@ -206,19 +219,19 @@ std::string PacketSetManager::ToDot(PacketSetHandle packet_set) const { const DecisionNode& node = GetNodeOrDie(packet_set); absl::StrAppendFormat(&result, " %d [label=\"%s\"]\n", - packet_set.node_index_, + packet_set.bits_, field_manager_.GetFieldName(node.field)); for (const auto& [value, branch] : node.branch_by_field_value) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%d\"]\n", - packet_set.node_index_, branch.node_index_, value); + packet_set.bits_, branch.bits_, value); if (IsFullSet(branch) || IsEmptySet(branch)) continue; bool new_branch = visited.insert(branch).second; if (new_branch) work_list.push(branch); } PacketSetHandle fallthrough = node.default_branch; absl::StrAppendFormat(&result, " %d -> %d [style=dashed]\n", - packet_set.node_index_, fallthrough.node_index_); + packet_set.bits_, fallthrough.bits_); if (IsFullSet(fallthrough) || IsEmptySet(fallthrough)) continue; bool new_branch = visited.insert(fallthrough).second; if (new_branch) work_list.push(fallthrough); @@ -317,29 +330,27 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, // reduce the number of nodes we need to visit exponentially. // Compute result the hard way. - const DecisionNode* left_node = &GetNodeOrDie(left); - const DecisionNode* right_node = &GetNodeOrDie(right); - + // // We exploit that `And` is commutative to canonicalize the order of the - // arguments, reducing the number of cases by 1. - if (left_node->field > right_node->field) { - std::swap(left, right); - std::swap(left_node, right_node); - } - - // Case 1: left_node->field < right_node->field: branch on left field. - if (left_node->field < right_node->field) { - PacketSetHandle default_branch = And(left_node->default_branch, right); + // arguments, reducing the number of cases by 1. The field comparisons below + // read the field bits packed into the handles, requiring no node loads. + if (left.field_index() > right.field_index()) std::swap(left, right); + + // Case 1: left field < right field: branch on left field. + // Note that the right node is never loaded from memory in this case. + if (left.field_index() < right.field_index()) { + const DecisionNode& left_node = GetNodeOrDie(left); + PacketSetHandle default_branch = And(left_node.default_branch, right); absl::FixedArray> branch_by_field_value( - left_node->branch_by_field_value.size()); + left_node.branch_by_field_value.size()); int num_branches = 0; - for (const auto& [value, left_branch] : left_node->branch_by_field_value) { + for (const auto& [value, left_branch] : left_node.branch_by_field_value) { PacketSetHandle branch = And(left_branch, right); if (branch == default_branch) continue; branch_by_field_value[num_branches++] = std::make_pair(value, branch); } return NodeToPacket(DecisionNode{ - .field = left_node->field, + .field = left_node.field, .default_branch = default_branch, .branch_by_field_value{ branch_by_field_value.begin(), @@ -348,30 +359,32 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, }); } - // Case 2: left_node->field == right_node->field: branch on shared field. - DCHECK(left_node->field == right_node->field); + // Case 2: left field == right field: branch on shared field. + const DecisionNode& left_node = GetNodeOrDie(left); + const DecisionNode& right_node = GetNodeOrDie(right); + DCHECK(left_node.field == right_node.field); PacketSetHandle default_branch = - And(left_node->default_branch, right_node->default_branch); + And(left_node.default_branch, right_node.default_branch); absl::FixedArray> branch_by_field_value( - left_node->branch_by_field_value.size() + - right_node->branch_by_field_value.size()); + left_node.branch_by_field_value.size() + + right_node.branch_by_field_value.size()); int num_branches = 0; auto add_branch = [&](int value, PacketSetHandle branch) { if (branch == default_branch) return; branch_by_field_value[num_branches++] = std::make_pair(value, branch); }; - auto left_it = left_node->branch_by_field_value.begin(); - auto left_end = left_node->branch_by_field_value.end(); - auto right_it = right_node->branch_by_field_value.begin(); - auto right_end = right_node->branch_by_field_value.end(); + auto left_it = left_node.branch_by_field_value.begin(); + auto left_end = left_node.branch_by_field_value.end(); + auto right_it = right_node.branch_by_field_value.begin(); + auto right_end = right_node.branch_by_field_value.end(); while (left_it != left_end && right_it != right_end) { auto [left_value, left_branch] = *left_it; auto [right_value, right_branch] = *right_it; if (left_value < right_value) { - add_branch(left_value, And(left_branch, right_node->default_branch)); + add_branch(left_value, And(left_branch, right_node.default_branch)); ++left_it; } else if (left_value > right_value) { - add_branch(right_value, And(left_node->default_branch, right_branch)); + add_branch(right_value, And(left_node.default_branch, right_branch)); ++right_it; } else { // left_value == right_value add_branch(left_value, And(left_branch, right_branch)); @@ -381,14 +394,14 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, } for (; left_it != left_end; ++left_it) { auto [left_value, left_branch] = *left_it; - add_branch(left_value, And(left_branch, right_node->default_branch)); + add_branch(left_value, And(left_branch, right_node.default_branch)); } for (; right_it != right_end; ++right_it) { auto [right_value, right_branch] = *right_it; - add_branch(right_value, And(left_node->default_branch, right_branch)); + add_branch(right_value, And(left_node.default_branch, right_branch)); } return NodeToPacket(DecisionNode{ - .field = left_node->field, + .field = left_node.field, .default_branch = default_branch, .branch_by_field_value{ branch_by_field_value.begin(), @@ -516,56 +529,64 @@ std::string PacketSetManager::ToString(const DecisionNode& node) const { } absl::Status PacketSetManager::CheckInternalInvariants() const { - // Invariant: Proper and sentinel node indices are disjoint. - RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - - // Invariant: `unique_table_by_field_[f]` contains `i` iff - // `nodes_[i].field.index_ == f`. Every valid node index is in exactly one - // table, and no two interned nodes are equal. - size_t total_table_size = 0; - for (size_t f = 0; f < unique_table_by_field_.size(); ++f) { + // Invariant: Proper node locations and sentinel values are disjoint, i.e. + // the field/slot bounds of the packed handle encoding are respected. + RET_CHECK(nodes_by_field_.size() <= PacketSetHandle::kSentinelFieldIndex); + RET_CHECK(unique_table_by_field_.size() == nodes_by_field_.size()); + + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + RET_CHECK(arena.size() <= PacketSetHandle::kMaxSlotsPerField); + + // Invariant: `unique_table_by_field_[f]` contains exactly the slots of + // `nodes_by_field_[f]`, and no two interned nodes are equal. const UniqueNodeTable& unique_table = unique_table_by_field_[f]; - total_table_size += unique_table.size(); - for (uint32_t node_index : unique_table) { - RET_CHECK(node_index < nodes_.size()); - RET_CHECK(nodes_[node_index].field.index_ == f); + RET_CHECK(unique_table.size() == arena.size()); + for (uint32_t slot : unique_table) { + RET_CHECK(slot < arena.size()); + } + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + // Invariant: Nodes are stored in the arena of their field. + RET_CHECK(arena[slot].field.index_ == f); + // Looking up `slot` probes by node content; finding exactly `slot` + // proves that no other interned node has the same content. + auto it = unique_table.find(slot); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == slot); } - } - RET_CHECK(total_table_size == nodes_.size()); - for (uint32_t i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - RET_CHECK(node.field.index_ < unique_table_by_field_.size()); - const UniqueNodeTable& unique_table = - unique_table_by_field_[node.field.index_]; - // Looking up `i` probes by node content; finding exactly `i` proves that - // no other interned node has the same content. - auto it = unique_table.find(i); - RET_CHECK(it != unique_table.end()); - RET_CHECK(*it == i); } // Node Invariants. - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - // Invariant: `branch_by_field_value` is non-empty. - // Maintained by `NodeToPacket`. - RET_CHECK(!node.branch_by_field_value.empty()); - - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsFullSet(node.default_branch) || - IsEmptySet(node.default_branch) || - GetNodeOrDie(node.default_branch).field > node.field); - for (const auto& [value, branch] : node.branch_by_field_value) { - RET_CHECK(IsFullSet(branch) || IsEmptySet(branch) || - GetNodeOrDie(branch).field > node.field); - - // Invariant: Each case in `branch_by_field_value` is != - // `default_branch`. - RET_CHECK(branch != node.default_branch); + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + const DecisionNode& node = arena[slot]; + // Invariant: `branch_by_field_value` is non-empty. + // Maintained by `NodeToPacket`. + RET_CHECK(!node.branch_by_field_value.empty()); + + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsFullSet(node.default_branch) || + IsEmptySet(node.default_branch) || + GetNodeOrDie(node.default_branch).field > node.field); + for (const auto& [value, branch] : node.branch_by_field_value) { + RET_CHECK(IsFullSet(branch) || IsEmptySet(branch) || + GetNodeOrDie(branch).field > node.field); + + // Invariant: Each case in `branch_by_field_value` is != + // `default_branch`. + RET_CHECK(branch != node.default_branch); + + // Invariant: The field encoded in a branch handle matches the field + // of the node it points to. + if (!IsFullSet(branch) && !IsEmptySet(branch)) { + RET_CHECK(GetNodeOrDie(branch).field.index_ == branch.field_index()); + } + } + + // Invariant: node field is interned by `field_manager_`. + field_manager_.GetFieldName(node.field); // No crash. } - - // Invariant: node field is interned by `field_manager_`. - field_manager_.GetFieldName(node.field); // No crash. } return absl::OkStatus(); diff --git a/netkat/packet_set.h b/netkat/packet_set.h index c9237bb..48c60cf 100644 --- a/netkat/packet_set.h +++ b/netkat/packet_set.h @@ -93,7 +93,7 @@ class [[nodiscard]] PacketSetHandle { // Hashing, see https://abseil.io/docs/cpp/guides/hash. template friend H AbslHashValue(H h, PacketSetHandle packet_set) { - return H::combine(std::move(h), packet_set.node_index_); + return H::combine(std::move(h), packet_set.bits_); } // Formatting, see https://abseil.io/docs/cpp/guides/abslstringify. @@ -106,18 +106,36 @@ class [[nodiscard]] PacketSetHandle { std::string ToString() const; private: - // An index into the `nodes_` vector of the `PacketSetManager` object - // associated with this `PacketSetHandle`. The semantics of this packet set - // is entirely determined by the node `nodes_[node_index_]`. The index is - // otherwise arbitrary and meaningless. + // The location of the decision node defining the semantics of this packet + // set, packed into 32 bits: the top `kFieldBits` bits hold the index of the + // packet field the node branches on (`kSentinelFieldIndex` for the sentinel + // handles representing the empty/full set), and the bottom `kSlotBits` bits + // hold the node's slot in the `PacketSetManager`'s arena for that field + // (`nodes_by_field_`). The location is otherwise arbitrary and meaningless. // - // We use a 32-bit index as a tradeoff between minimizing memory usage and + // Packing the field into the handle makes the field of a node available + // without loading the node from memory, which operations like `And` consult + // on every recursive step (this variant of BDDs orders nodes by field along + // every path through the node graph). + // + // We use 32 bits total as a tradeoff between minimizing memory usage and // maximizing the number of `PacketSetHandle`s that can be created, both - // aspects that impact how well we scale to large NetKAT models. We expect - // millions, but not billions, of packet sets in practice, and 2^32 ~= 4 - // billion. - uint32_t node_index_; - explicit PacketSetHandle(uint32_t node_index) : node_index_(node_index) {} + // aspects that impact how well we scale to large NetKAT models. The + // field/slot split is a further tradeoff between the number of supported + // fields (2^10 - 1 ~= 1k) and the number of supported decision nodes per + // field (2^22 ~= 4M); both bounds are enforced at node-creation time. + static constexpr uint32_t kSlotBits = 22; + static constexpr uint32_t kFieldBits = 32 - kSlotBits; + static constexpr uint32_t kMaxSlotsPerField = uint32_t{1} << kSlotBits; + static constexpr uint32_t kSentinelFieldIndex = + (uint32_t{1} << kFieldBits) - 1; + + uint32_t bits_; + explicit PacketSetHandle(uint32_t bits) : bits_(bits) {} + PacketSetHandle(uint32_t field_index, uint32_t slot) + : bits_((field_index << kSlotBits) | slot) {} + uint32_t field_index() const { return bits_ >> kSlotBits; } + uint32_t slot() const { return bits_ & (kMaxSlotsPerField - 1); } friend class PacketSetManager; }; @@ -349,38 +367,51 @@ class PacketSetManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); - - // The decision nodes forming the BDD-style DAG representation of packet sets. - // `PacketSetHandle::node_index_` indexes into this vector. + // The page size of the per-field node arenas: 16 KiB per page. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that (a) models touching many fields + // don't pay a large per-field memory overhead (each non-empty arena + // allocates at least one page), and (b) pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB), so short-lived managers recycle + // pages through the allocator's freelists instead of syscalls. + static constexpr size_t kPageSize = (1 << 14) / sizeof(DecisionNode); + using NodeArena = PagedStableVector; + + // The decision nodes forming the BDD-style DAG representation of packet + // sets, stored by the field they branch on: a node with field index `f` and + // slot `s` (in the sense of `PacketSetHandle::bits_`) is + // `nodes_by_field_[f][s]`. Storing nodes by field clusters the nodes of a + // field together in memory; operations tend to process nodes field by + // field, so this improves locality. + // + // We use a custom vector class for the arenas that provides pointer + // stability, allowing us to create new nodes while traversing the graph + // (e.g. during operations like `And`, `Or`, `Not`). The class also avoids + // expensive relocations. // - // We use a custom vector class that provides pointer stability, allowing us - // to create new nodes while traversing the graph (e.g. during operations like - // `And`, `Or`, `Not`). The class also avoids expensive relocations. - PagedStableVector nodes_; - - // The location of `nodes_`, behind a level of indirection that remains - // stable when the manager object is moved: the unique tables below hash and - // compare node indices by dereferencing into `nodes_`, and their - // hasher/equality functors would otherwise dangle on move. Move operations - // repoint the location at the new manager's `nodes_` member. - std::unique_ptr*> - nodes_location_ = std::make_unique< - const PagedStableVector*>(&nodes_); - - // Hasher and equality for unique table entries, which are indices into - // `nodes_`. Hashing/comparing the *node* (rather than the index) is what - // makes the tables deduplicate by node content. + // INVARIANT: `nodes_by_field_[f][s].field.index_ == f`. + std::vector nodes_by_field_; + + // The location of `nodes_by_field_`, behind a level of indirection that + // remains stable when the manager object is moved: the unique tables below + // hash and compare node slots by dereferencing into `nodes_by_field_`, and + // their hasher/equality functors would otherwise dangle on move. Move + // operations repoint the location at the new manager's `nodes_by_field_` + // member. + std::unique_ptr*> nodes_location_ = + std::make_unique*>(&nodes_by_field_); + + // Hasher and equality for unique table entries, which are slots in the + // node arena of the table's field. Hashing/comparing the *node* (rather + // than the slot) is what makes the tables deduplicate by node content. // The `DecisionNode` overloads enable heterogeneous lookup, so that a - // candidate node can be probed before it is added to `nodes_`. + // candidate node can be probed before it is added to the arena. struct InternedNodeHash { using is_transparent = void; - const PagedStableVector* const* nodes; - size_t operator()(uint32_t node_index) const { - return absl::HashOf((**nodes)[node_index]); + const std::vector* const* nodes_by_field; + uint32_t field_index; + size_t operator()(uint32_t slot) const { + return absl::HashOf((**nodes_by_field)[field_index][slot]); } size_t operator()(const DecisionNode& node) const { return absl::HashOf(node); @@ -388,34 +419,37 @@ class PacketSetManager { }; struct InternedNodeEq { using is_transparent = void; - const PagedStableVector* const* nodes; + const std::vector* const* nodes_by_field; + uint32_t field_index; bool operator()(uint32_t a, uint32_t b) const { - return (**nodes)[a] == (**nodes)[b]; + const NodeArena& arena = (**nodes_by_field)[field_index]; + return arena[a] == arena[b]; } bool operator()(uint32_t a, const DecisionNode& b) const { - return (**nodes)[a] == b; + return (**nodes_by_field)[field_index][a] == b; } bool operator()(const DecisionNode& a, uint32_t b) const { - return (**nodes)[b] == a; + return (**nodes_by_field)[field_index][b] == a; } }; using UniqueNodeTable = absl::flat_hash_set; - // So called "unique tables" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketSetHandle::node_index`. One table per - // packet field: a node's entry lives in the table of the field it branches - // on. Splitting by field keeps the tables, and thus probe sequences, small, - // and storing indices (instead of node copies) keeps each node stored - // exactly once, in `nodes_`. + // So called "unique tables" to ensure each node is only added to + // `nodes_by_field_` once, and thus has a unique slot. One table per packet + // field: a node's entry lives in the table of the field it branches on. + // Splitting by field keeps the tables, and thus probe sequences, small, and + // storing slots (instead of node copies) keeps each node stored exactly + // once, in `nodes_by_field_`. // - // INVARIANT: `unique_table_by_field_[f]` contains `i` iff - // `nodes_[i].field.index_ == f`. Every valid node index is contained in - // exactly one table. + // INVARIANT: `unique_table_by_field_.size() == nodes_by_field_.size()`. + // INVARIANT: `unique_table_by_field_[f]` contains `s` iff + // `s < nodes_by_field_[f].size()`; no two nodes in an arena are equal. std::vector unique_table_by_field_; - // Returns the unique table for nodes branching on `field`, creating it (and - // any tables for smaller fields) if it does not exist yet. + // Returns the unique table for nodes branching on `field`, creating it and + // the field's node arena (as well as those of any smaller fields) if they + // don't exist yet. UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PredicateProto` to a `PacketSetHandle`. diff --git a/netkat/packet_set_test.expected b/netkat/packet_set_test.expected index 7cbf36d..6311de9 100644 --- a/netkat/packet_set_test.expected +++ b/netkat/packet_set_test.expected @@ -24,17 +24,17 @@ digraph { Test case: p := (a=3 && b=4) || (b!=5 && c=5). Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<14>: - PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<13> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<6> -PacketSetHandle<13>: +PacketSetHandle<0:4>: + PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<1:7> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:3> +PacketSetHandle<1:7>: PacketFieldHandle<1>:'b' == 4 -> PacketSetHandle PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<6>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<1:3>: PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<5>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<2:0>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -43,34 +43,34 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 14 [label="a"] - 14 -> 13 [label="3"] - 14 -> 6 [style=dashed] - 13 [label="b"] - 13 -> 4294967294 [label="4"] - 13 -> 4294967295 [label="5"] - 13 -> 5 [style=dashed] - 6 [label="b"] - 6 -> 4294967295 [label="5"] - 6 -> 5 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="5"] - 5 -> 4294967295 [style=dashed] + 4 [label="a"] + 4 -> 4194311 [label="3"] + 4 -> 4194307 [style=dashed] + 4194311 [label="b"] + 4194311 -> 4294967294 [label="4"] + 4194311 -> 4294967295 [label="5"] + 4194311 -> 8388608 [style=dashed] + 4194307 [label="b"] + 4194307 -> 4294967295 [label="5"] + 4194307 -> 8388608 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="5"] + 8388608 -> 4294967295 [style=dashed] } ================================================================================ Test case: q := (b=3 && c=4) || (a=5 && c!=5). Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<24>: - PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<9> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<17> -PacketSetHandle<9>: +PacketSetHandle<0:9>: + PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<2:1> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:9> +PacketSetHandle<2:1>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<17>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<16> +PacketSetHandle<1:9>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:2> PacketFieldHandle<1>:'b' == * -> PacketSetHandle -PacketSetHandle<16>: +PacketSetHandle<2:2>: PacketFieldHandle<2>:'c' == 4 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -79,47 +79,47 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 24 [label="a"] - 24 -> 9 [label="5"] - 24 -> 17 [style=dashed] - 9 [label="c"] - 9 -> 4294967295 [label="5"] - 9 -> 4294967294 [style=dashed] - 17 [label="b"] - 17 -> 16 [label="3"] - 17 -> 4294967295 [style=dashed] - 16 [label="c"] - 16 -> 4294967294 [label="4"] - 16 -> 4294967295 [style=dashed] + 9 [label="a"] + 9 -> 8388609 [label="5"] + 9 -> 4194313 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967295 [label="5"] + 8388609 -> 4294967294 [style=dashed] + 4194313 [label="b"] + 4194313 -> 8388610 [label="3"] + 4194313 -> 4294967295 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="4"] + 8388610 -> 4294967295 [style=dashed] } ================================================================================ Test case: p + q. Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<34>: - PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<32> - PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<33> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<31> -PacketSetHandle<32>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<30> +PacketSetHandle<0:11>: + PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<1:15> + PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<1:16> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:14> +PacketSetHandle<1:15>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:5> PacketFieldHandle<1>:'b' == 4 -> PacketSetHandle PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<33>: - PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle<9> + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<1:16>: + PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle<2:1> PacketFieldHandle<1>:'b' == * -> PacketSetHandle -PacketSetHandle<31>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<30> +PacketSetHandle<1:14>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:5> PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<30>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<2:5>: PacketFieldHandle<2>:'c' == 4 -> PacketSetHandle PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<5>: +PacketSetHandle<2:0>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<9>: +PacketSetHandle<2:1>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -128,30 +128,30 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 34 [label="a"] - 34 -> 32 [label="3"] - 34 -> 33 [label="5"] - 34 -> 31 [style=dashed] - 32 [label="b"] - 32 -> 30 [label="3"] - 32 -> 4294967294 [label="4"] - 32 -> 4294967295 [label="5"] - 32 -> 5 [style=dashed] - 33 [label="b"] - 33 -> 9 [label="5"] - 33 -> 4294967294 [style=dashed] - 31 [label="b"] - 31 -> 30 [label="3"] - 31 -> 4294967295 [label="5"] - 31 -> 5 [style=dashed] - 30 [label="c"] - 30 -> 4294967294 [label="4"] - 30 -> 4294967294 [label="5"] - 30 -> 4294967295 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="5"] - 5 -> 4294967295 [style=dashed] - 9 [label="c"] - 9 -> 4294967295 [label="5"] - 9 -> 4294967294 [style=dashed] + 11 [label="a"] + 11 -> 4194319 [label="3"] + 11 -> 4194320 [label="5"] + 11 -> 4194318 [style=dashed] + 4194319 [label="b"] + 4194319 -> 8388613 [label="3"] + 4194319 -> 4294967294 [label="4"] + 4194319 -> 4294967295 [label="5"] + 4194319 -> 8388608 [style=dashed] + 4194320 [label="b"] + 4194320 -> 8388609 [label="5"] + 4194320 -> 4294967294 [style=dashed] + 4194318 [label="b"] + 4194318 -> 8388613 [label="3"] + 4194318 -> 4294967295 [label="5"] + 4194318 -> 8388608 [style=dashed] + 8388613 [label="c"] + 8388613 -> 4294967294 [label="4"] + 8388613 -> 4294967294 [label="5"] + 8388613 -> 4294967295 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="5"] + 8388608 -> 4294967295 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967295 [label="5"] + 8388609 -> 4294967294 [style=dashed] } diff --git a/netkat/packet_transformer.cc b/netkat/packet_transformer.cc index 9a9915e..29a3db5 100644 --- a/netkat/packet_transformer.cc +++ b/netkat/packet_transformer.cc @@ -45,58 +45,61 @@ namespace netkat { // The Deny and Accept transformers are not decision nodes, and thus we cannot -// associate an index into the `nodes_` vector with them. Instead, we represent -// them using sentinel values, chosen maximally to avoid collisions with proper -// indices. +// associate a node location with them. Instead, we represent them using +// sentinel values whose field bits are +// `PacketTransformerHandle::kSentinelFieldIndex`, a field index reserved for +// this purpose (and enforced to never be used by proper nodes at +// node-creation time). enum SentinelNodeIndex : uint32_t { // Encodes the Deny transformer. kDeny = std::numeric_limits::max(), // Encodes the Accept transformer. kAccept = std::numeric_limits::max() - 1, - // The minimum sentinel node index. - // Smaller values are reserved for proper indices into the `nodes_` vector. - kMinSentinel = kAccept, }; PacketTransformerHandle::PacketTransformerHandle() - : node_index_(SentinelNodeIndex::kDeny) {} + : bits_(SentinelNodeIndex::kDeny) {} PacketTransformerManager::PacketTransformerManager( PacketTransformerManager&& other) - : nodes_(std::move(other.nodes_)), + : nodes_by_field_(std::move(other.nodes_by_field_)), nodes_location_(std::move(other.nodes_location_)), unique_table_by_field_(std::move(other.unique_table_by_field_)), transformer_by_hash_(std::move(other.transformer_by_hash_)), packet_set_manager_(std::move(other.packet_set_manager_)) { - *nodes_location_ = &nodes_; + *nodes_location_ = &nodes_by_field_; } PacketTransformerManager& PacketTransformerManager::operator=( PacketTransformerManager&& other) { - nodes_ = std::move(other.nodes_); + nodes_by_field_ = std::move(other.nodes_by_field_); nodes_location_ = std::move(other.nodes_location_); unique_table_by_field_ = std::move(other.unique_table_by_field_); transformer_by_hash_ = std::move(other.transformer_by_hash_); packet_set_manager_ = std::move(other.packet_set_manager_); - *nodes_location_ = &nodes_; + *nodes_location_ = &nodes_by_field_; return *this; } std::string PacketTransformerHandle::ToString() const { - if (node_index_ == SentinelNodeIndex::kDeny) { + if (bits_ == SentinelNodeIndex::kDeny) { return "PacketTransformerHandle"; - } else if (node_index_ == SentinelNodeIndex::kAccept) { + } else if (bits_ == SentinelNodeIndex::kAccept) { return "PacketTransformerHandle"; } else { - return absl::StrFormat("PacketTransformerHandle<%d>", node_index_); + // Print the packed node location as `field_index:slot`. + return absl::StrFormat("PacketTransformerHandle<%d:%d>", field_index(), + slot()); } } const PacketTransformerManager::DecisionNode& PacketTransformerManager::GetNodeOrDie( PacketTransformerHandle transformer) const { - CHECK_LT(transformer.node_index_, nodes_.size()); // Crash ok - return nodes_[transformer.node_index_]; + CHECK_LT(transformer.field_index(), nodes_by_field_.size()); // Crash ok + const NodeArena& arena = nodes_by_field_[transformer.field_index()]; + CHECK_LT(transformer.slot(), arena.size()); // Crash ok + return arena[transformer.slot()]; } // TODO(dilo): Creating as many map copies as this method facilitates is @@ -193,31 +196,39 @@ PacketTransformerHandle PacketTransformerManager::NodeToTransformer( node.default_branch_by_field_modification.empty()) return node.default_branch; - // Probe the unique table by node content first (heterogeneous lookup): the - // common case is that an equal node has already been interned, and probing - // with the candidate node avoids touching `nodes_` in that case. + // Probe the unique table by node content first (heterogeneous lookup): + // the common case is that an equal node has already been interned, and + // probing with the candidate node avoids touching the arena in that case. + const uint32_t field_index = node.field.index_; UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); if (auto it = unique_table.find(node); it != unique_table.end()) { - return PacketTransformerHandle(*it); + return PacketTransformerHandle(field_index, *it); } - uint32_t node_index = nodes_.size(); - nodes_.push_back(std::move(node)); - unique_table.insert(node_index); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; - return PacketTransformerHandle(node_index); + NodeArena& arena = nodes_by_field_[field_index]; + uint32_t slot = arena.size(); + CHECK_LT(slot, PacketTransformerHandle::kMaxSlotsPerField) // Crash ok + << "PacketTransformerManager supports at most 2^" + << PacketTransformerHandle::kSlotBits + << " decision nodes per packet field."; + arena.push_back(std::move(node)); + unique_table.insert(slot); + return PacketTransformerHandle(field_index, slot); } PacketTransformerManager::UniqueNodeTable& PacketTransformerManager::GetOrCreateUniqueTable(PacketFieldHandle field) { + CHECK_LT(field.index_, // Crash ok + PacketTransformerHandle::kSentinelFieldIndex) + << "PacketTransformerManager supports at most 2^" + << PacketTransformerHandle::kFieldBits << " - 1 packet fields."; if (field.index_ >= unique_table_by_field_.size()) { - unique_table_by_field_.resize( - field.index_ + 1, - UniqueNodeTable(/*bucket_count=*/0, - InternedNodeHash{nodes_location_.get()}, - InternedNodeEq{nodes_location_.get()})); + nodes_by_field_.resize(field.index_ + 1); + for (size_t f = unique_table_by_field_.size(); f <= field.index_; ++f) { + unique_table_by_field_.push_back(UniqueNodeTable( + /*bucket_count=*/0, + InternedNodeHash{nodes_location_.get(), static_cast(f)}, + InternedNodeEq{nodes_location_.get(), static_cast(f)})); + } } return unique_table_by_field_[field.index_]; } @@ -360,6 +371,15 @@ PacketTransformerHandle PacketTransformerManager::Compile( } PacketTransformerHandle PacketTransformerManager::Deny() const { + // The sentinel values must carry the reserved sentinel field index, so they + // can never collide with proper node locations. (Asserted here because both + // the sentinel enum and the handle's private encoding are in scope.) + static_assert(SentinelNodeIndex::kDeny >> + PacketTransformerHandle::kSlotBits == + PacketTransformerHandle::kSentinelFieldIndex); + static_assert(SentinelNodeIndex::kAccept >> + PacketTransformerHandle::kSlotBits == + PacketTransformerHandle::kSentinelFieldIndex); return PacketTransformerHandle(SentinelNodeIndex::kDeny); } @@ -1100,17 +1120,17 @@ std::string PacketTransformerManager::ToDot( std::string field = packet_set_manager_.field_manager_.GetFieldName(node.field); absl::StrAppendFormat(&result, " %d [label=\"%s\"]\n", - transformer.node_index_, field); + transformer.bits_, field); for (const auto& [value, modify_map] : node.modify_branch_by_field_match) { if (modify_map.empty()) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%s==%s\"]\n", - transformer.node_index_, SentinelNodeIndex::kDeny, + transformer.bits_, SentinelNodeIndex::kDeny, field, absl::StrCat(value)); } for (const auto& [new_value, branch] : modify_map) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%s==%s; %s:=%d\"]\n", - transformer.node_index_, branch.node_index_, + transformer.bits_, branch.bits_, field, absl::StrCat(value), field, new_value); if (IsAccept(branch) || IsDeny(branch)) continue; bool new_branch = visited.insert(branch).second; @@ -1122,14 +1142,14 @@ std::string PacketTransformerManager::ToDot( node.default_branch_by_field_modification) { absl::StrAppendFormat( &result, " %d -> %d [label=\"%s:=%d\" style=dashed]\n", - transformer.node_index_, branch.node_index_, field, new_value); + transformer.bits_, branch.bits_, field, new_value); if (IsAccept(branch) || IsDeny(branch)) continue; bool new_branch = visited.insert(branch).second; if (new_branch) work_list.push(branch); } PacketTransformerHandle fallthrough = node.default_branch; absl::StrAppendFormat(&result, " %d -> %d [style=dashed]\n", - transformer.node_index_, fallthrough.node_index_); + transformer.bits_, fallthrough.bits_); if (IsAccept(fallthrough) || IsDeny(fallthrough)) continue; bool new_branch = visited.insert(fallthrough).second; if (new_branch) work_list.push(fallthrough); @@ -1139,78 +1159,81 @@ std::string PacketTransformerManager::ToDot( } absl::Status PacketTransformerManager::CheckInternalInvariants() const { - // Invariant: Proper and sentinel node indices are disjoint. - RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - - // Invariant: `unique_table_by_field_[f]` contains `i` iff - // `nodes_[i].field.index_ == f`. Every valid node index is in exactly one - // table, and no two interned nodes are equal. - size_t total_table_size = 0; - for (size_t f = 0; f < unique_table_by_field_.size(); ++f) { + // Invariant: Proper node locations and sentinel values are disjoint, i.e. + // the field/slot bounds of the packed handle encoding are respected. + RET_CHECK(nodes_by_field_.size() <= + PacketTransformerHandle::kSentinelFieldIndex); + RET_CHECK(unique_table_by_field_.size() == nodes_by_field_.size()); + + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + RET_CHECK(arena.size() <= PacketTransformerHandle::kMaxSlotsPerField); + + // Invariant: `unique_table_by_field_[f]` contains exactly the slots of + // `nodes_by_field_[f]`, and no two interned nodes are equal. const UniqueNodeTable& unique_table = unique_table_by_field_[f]; - total_table_size += unique_table.size(); - for (uint32_t node_index : unique_table) { - RET_CHECK(node_index < nodes_.size()); - RET_CHECK(nodes_[node_index].field.index_ == f); + RET_CHECK(unique_table.size() == arena.size()); + for (uint32_t slot : unique_table) { + RET_CHECK(slot < arena.size()); + } + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + // Invariant: Nodes are stored in the arena of their field. + RET_CHECK(arena[slot].field.index_ == f); + // Looking up `slot` probes by node content; finding exactly `slot` + // proves that no other interned node has the same content. + auto it = unique_table.find(slot); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == slot); } - } - RET_CHECK(total_table_size == nodes_.size()); - for (uint32_t i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - RET_CHECK(node.field.index_ < unique_table_by_field_.size()); - const UniqueNodeTable& unique_table = - unique_table_by_field_[node.field.index_]; - // Looking up `i` probes by node content; finding exactly `i` proves that - // no other interned node has the same content. - auto it = unique_table.find(i); - RET_CHECK(it != unique_table.end()); - RET_CHECK(*it == i); } // Node Invariants. - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - // Invariant: `modify_branch_by_field_match` or - // `default_branch_by_field_modification` is non-empty. - // Maintained by `NodeToTransformer`. - RET_CHECK(!node.modify_branch_by_field_match.empty() || - !node.default_branch_by_field_modification.empty()); - - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(node.default_branch) || IsDeny(node.default_branch) || - GetNodeOrDie(node.default_branch).field > node.field) - << ":\n" - << ToString(node); - - for (const auto& [match_value, branch_by_modify] : - node.modify_branch_by_field_match) { - for (const auto& [modify_value, branch] : branch_by_modify) { - // Invariant: Modify branches are not Deny unless `modify_value == - // match_value`. - RET_CHECK(!IsDeny(branch) || modify_value == match_value) - << ":\n" - << ToString(node); + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + const DecisionNode& node = arena[slot]; + // Invariant: `modify_branch_by_field_match` or + // `default_branch_by_field_modification` is non-empty. + // Maintained by `NodeToTransformer`. + RET_CHECK(!node.modify_branch_by_field_match.empty() || + !node.default_branch_by_field_modification.empty()); - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(branch) || IsDeny(branch) || - GetNodeOrDie(branch).field > node.field) - << ":\n" - << ToString(node); + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(node.default_branch) || IsDeny(node.default_branch) || + GetNodeOrDie(node.default_branch).field > node.field) + << ":\n" + << ToString(node); + + for (const auto& [match_value, branch_by_modify] : + node.modify_branch_by_field_match) { + for (const auto& [modify_value, branch] : branch_by_modify) { + // Invariant: Modify branches are not Deny unless `modify_value == + // match_value`. + RET_CHECK(!IsDeny(branch) || modify_value == match_value) + << ":\n" + << ToString(node); + + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(branch) || IsDeny(branch) || + GetNodeOrDie(branch).field > node.field) + << ":\n" + << ToString(node); + } } - } - for (const auto& [match_value, branch] : - node.default_branch_by_field_modification) { - // Invariant: Default modify branches are not Deny. - RET_CHECK(!IsDeny(branch)); + for (const auto& [match_value, branch] : + node.default_branch_by_field_modification) { + // Invariant: Default modify branches are not Deny. + RET_CHECK(!IsDeny(branch)); - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(branch) || GetNodeOrDie(branch).field > node.field); - } + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(branch) || GetNodeOrDie(branch).field > node.field); + } - // Invariant: node field is interned by - // `packet_set_manager_.field_manager_`. - packet_set_manager_.field_manager_.GetFieldName(node.field); // No crash. + // Invariant: node field is interned by + // `packet_set_manager_.field_manager_` (GetFieldName crashes if not). + packet_set_manager_.field_manager_.GetFieldName(node.field); + } } return absl::OkStatus(); diff --git a/netkat/packet_transformer.h b/netkat/packet_transformer.h index 7a3d8be..f9551eb 100644 --- a/netkat/packet_transformer.h +++ b/netkat/packet_transformer.h @@ -98,7 +98,7 @@ class [[nodiscard]] PacketTransformerHandle { // Hashing, see https://abseil.io/docs/cpp/guides/hash. template friend H AbslHashValue(H h, PacketTransformerHandle transformer) { - return H::combine(std::move(h), transformer.node_index_); + return H::combine(std::move(h), transformer.bits_); } // Formatting, see https://abseil.io/docs/cpp/guides/abslstringify. @@ -112,17 +112,28 @@ class [[nodiscard]] PacketTransformerHandle { std::string ToString() const; private: - // An index into the `nodes_` vector of the `PacketTransformerManager` - // object associated with this `PacketTransformerHandle`. The semantics of - // this packet transformer is entirely determined by the node - // `nodes_[node_index_]`. The index is otherwise arbitrary and meaningless. + // The location of the decision node defining the semantics of this packet + // transformer, packed into 32 bits: the top `kFieldBits` bits hold the + // index of the packet field the node branches on (`kSentinelFieldIndex` for + // the sentinel handles representing Deny/Accept), and the bottom + // `kSlotBits` bits hold the node's slot in the `PacketTransformerManager`'s + // arena for that field (`nodes_by_field_`). The location is otherwise + // arbitrary and meaningless. // - // We use a 32-bit index as a tradeoff between minimizing memory usage and - // maximizing the number of `PacketTransformerHandle`s that can be created, - // both aspects that impact how well we scale to large NetKAT models. - uint32_t node_index_; - explicit PacketTransformerHandle(uint32_t node_index) - : node_index_(node_index) {} + // The encoding (and its rationale) mirrors `PacketSetHandle::bits_`; see + // there for details. + static constexpr uint32_t kSlotBits = 22; + static constexpr uint32_t kFieldBits = 32 - kSlotBits; + static constexpr uint32_t kMaxSlotsPerField = uint32_t{1} << kSlotBits; + static constexpr uint32_t kSentinelFieldIndex = + (uint32_t{1} << kFieldBits) - 1; + + uint32_t bits_; + explicit PacketTransformerHandle(uint32_t bits) : bits_(bits) {} + PacketTransformerHandle(uint32_t field_index, uint32_t slot) + : bits_((field_index << kSlotBits) | slot) {} + uint32_t field_index() const { return bits_ >> kSlotBits; } + uint32_t slot() const { return bits_ & (kMaxSlotsPerField - 1); } friend class PacketTransformerManager; }; @@ -405,10 +416,15 @@ class PacketTransformerManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); + // The page size of the per-field node arenas: 16 KiB per page. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that (a) models touching many fields + // don't pay a large per-field memory overhead (each non-empty arena + // allocates at least one page), and (b) pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB), so short-lived managers recycle + // pages through the allocator's freelists instead of syscalls. + static constexpr size_t kPageSize = (1 << 14) / sizeof(DecisionNode); + using NodeArena = PagedStableVector; // Helper functions to deal with DecisionNodes directly. // TODO(dilo): Is there a convenient way to either avoid these or avoid making @@ -422,33 +438,40 @@ class PacketTransformerManager { absl::btree_map GetMapAtValue( const DecisionNode& node, int value); - // The decision nodes forming the BDD-style DAG representation of packets. - // `PacketTransformerHandle::node_index_` indexes into this vector. + // The decision nodes forming the BDD-style DAG representation of packet + // transformers, stored by the field they branch on: a node with field index + // `f` and slot `s` (in the sense of `PacketTransformerHandle::bits_`) is + // `nodes_by_field_[f][s]`. Storing nodes by field clusters the nodes of a + // field together in memory; operations tend to process nodes field by + // field, so this improves locality. // - // We use a custom vector class that provides pointer stability, allowing us - // to create new nodes while traversing the graph. The class also avoids - // expensive relocations. - PagedStableVector nodes_; - - // The location of `nodes_`, behind a level of indirection that remains - // stable when the manager object is moved: the unique tables below hash and - // compare node indices by dereferencing into `nodes_`, and their - // hasher/equality functors would otherwise dangle on move. Move operations - // repoint the location at the new manager's `nodes_` member. - std::unique_ptr*> - nodes_location_ = std::make_unique< - const PagedStableVector*>(&nodes_); - - // Hasher and equality for unique table entries, which are indices into - // `nodes_`. Hashing/comparing the *node* (rather than the index) is what - // makes the tables deduplicate by node content. + // We use a custom vector class for the arenas that provides pointer + // stability, allowing us to create new nodes while traversing the graph. + // The class also avoids expensive relocations. + // + // INVARIANT: `nodes_by_field_[f][s].field.index_ == f`. + std::vector nodes_by_field_; + + // The location of `nodes_by_field_`, behind a level of indirection that + // remains stable when the manager object is moved: the unique tables below + // hash and compare node slots by dereferencing into `nodes_by_field_`, and + // their hasher/equality functors would otherwise dangle on move. Move + // operations repoint the location at the new manager's `nodes_by_field_` + // member. + std::unique_ptr*> nodes_location_ = + std::make_unique*>(&nodes_by_field_); + + // Hasher and equality for unique table entries, which are slots in the + // node arena of the table's field. Hashing/comparing the *node* (rather + // than the slot) is what makes the tables deduplicate by node content. // The `DecisionNode` overloads enable heterogeneous lookup, so that a - // candidate node can be probed before it is added to `nodes_`. + // candidate node can be probed before it is added to the arena. struct InternedNodeHash { using is_transparent = void; - const PagedStableVector* const* nodes; - size_t operator()(uint32_t node_index) const { - return absl::HashOf((**nodes)[node_index]); + const std::vector* const* nodes_by_field; + uint32_t field_index; + size_t operator()(uint32_t slot) const { + return absl::HashOf((**nodes_by_field)[field_index][slot]); } size_t operator()(const DecisionNode& node) const { return absl::HashOf(node); @@ -456,34 +479,37 @@ class PacketTransformerManager { }; struct InternedNodeEq { using is_transparent = void; - const PagedStableVector* const* nodes; + const std::vector* const* nodes_by_field; + uint32_t field_index; bool operator()(uint32_t a, uint32_t b) const { - return (**nodes)[a] == (**nodes)[b]; + const NodeArena& arena = (**nodes_by_field)[field_index]; + return arena[a] == arena[b]; } bool operator()(uint32_t a, const DecisionNode& b) const { - return (**nodes)[a] == b; + return (**nodes_by_field)[field_index][a] == b; } bool operator()(const DecisionNode& a, uint32_t b) const { - return (**nodes)[b] == a; + return (**nodes_by_field)[field_index][b] == a; } }; using UniqueNodeTable = absl::flat_hash_set; - // So called "unique tables" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketTransformerHandle::node_index`. One - // table per packet field: a node's entry lives in the table of the field it - // branches on. Splitting by field keeps the tables, and thus probe - // sequences, small, and storing indices (instead of node copies) keeps each - // node stored exactly once, in `nodes_`. + // So called "unique tables" to ensure each node is only added to + // `nodes_by_field_` once, and thus has a unique slot. One table per packet + // field: a node's entry lives in the table of the field it branches on. + // Splitting by field keeps the tables, and thus probe sequences, small, and + // storing slots (instead of node copies) keeps each node stored exactly + // once, in `nodes_by_field_`. // - // INVARIANT: `unique_table_by_field_[f]` contains `i` iff - // `nodes_[i].field.index_ == f`. Every valid node index is contained in - // exactly one table. + // INVARIANT: `unique_table_by_field_.size() == nodes_by_field_.size()`. + // INVARIANT: `unique_table_by_field_[f]` contains `s` iff + // `s < nodes_by_field_[f].size()`; no two nodes in an arena are equal. std::vector unique_table_by_field_; - // Returns the unique table for nodes branching on `field`, creating it (and - // any tables for smaller fields) if it does not exist yet. + // Returns the unique table for nodes branching on `field`, creating it and + // the field's node arena (as well as those of any smaller fields) if they + // don't exist yet. UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PolicyProto` to a `PacketTransformerHandle`. diff --git a/netkat/packet_transformer_test.expected b/netkat/packet_transformer_test.expected index 9f01d94..1f6a07b 100644 --- a/netkat/packet_transformer_test.expected +++ b/netkat/packet_transformer_test.expected @@ -24,7 +24,7 @@ digraph { Test case: p := (a!=5). Empty modify branch creates a deny path. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<0>: +PacketTransformerHandle<0:0>: PacketFieldHandle<0>:'a' == 5: PacketFieldHandle<0>:'a' == *: PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle @@ -42,22 +42,22 @@ digraph { Test case: p := (a=5 + b=2);(b:=1 + c=5). Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<8>: +PacketTransformerHandle<0:3>: PacketFieldHandle<0>:'a' == 5: - PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<6> + PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<1:2> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<7> -PacketTransformerHandle<6>: + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:3> +PacketTransformerHandle<1:2>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<5> -PacketTransformerHandle<7>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:0> +PacketTransformerHandle<1:3>: PacketFieldHandle<1>:'b' == 2: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<5> + PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<2:0> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<5>: +PacketTransformerHandle<2:0>: PacketFieldHandle<2>:'c' == 5: PacketFieldHandle<2>:'c' := 5 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == *: @@ -68,50 +68,50 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 8 [label="a"] - 8 -> 6 [label="a==5; a:=5"] - 8 -> 7 [style=dashed] - 6 [label="b"] - 6 -> 4294967294 [label="b:=1" style=dashed] - 6 -> 5 [style=dashed] - 7 [label="b"] - 7 -> 4294967294 [label="b==2; b:=1"] - 7 -> 5 [label="b==2; b:=2"] - 7 -> 4294967295 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="c==5; c:=5"] - 5 -> 4294967295 [style=dashed] + 3 [label="a"] + 3 -> 4194306 [label="a==5; a:=5"] + 3 -> 4194307 [style=dashed] + 4194306 [label="b"] + 4194306 -> 4294967294 [label="b:=1" style=dashed] + 4194306 -> 8388608 [style=dashed] + 4194307 [label="b"] + 4194307 -> 4294967294 [label="b==2; b:=1"] + 4194307 -> 8388608 [label="b==2; b:=2"] + 4194307 -> 4294967295 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="c==5; c:=5"] + 8388608 -> 4294967295 [style=dashed] } ================================================================================ Test case: q := (b=1 + c:=4 + a:=5;b:=1). Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<17>: +PacketTransformerHandle<0:7>: PacketFieldHandle<0>:'a' == 1: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<14> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:5> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<4> - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<16> -PacketTransformerHandle<14>: + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:1> + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:6> +PacketTransformerHandle<1:5>: PacketFieldHandle<1>:'b' == 1: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<10> -PacketTransformerHandle<4>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:1> +PacketTransformerHandle<1:1>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<16>: +PacketTransformerHandle<1:6>: PacketFieldHandle<1>:'b' == 1: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> PacketFieldHandle<1>:'b' == *: - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<10> -PacketTransformerHandle<13>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:1> +PacketTransformerHandle<2:2>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle -PacketTransformerHandle<10>: +PacketTransformerHandle<2:1>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle @@ -121,64 +121,64 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 17 [label="a"] - 17 -> 14 [label="a==1; a:=1"] - 17 -> 4 [label="a:=1" style=dashed] - 17 -> 16 [style=dashed] - 14 [label="b"] - 14 -> 13 [label="b==1; b:=1"] - 14 -> 4294967294 [label="b:=1" style=dashed] - 14 -> 10 [style=dashed] - 4 [label="b"] - 4 -> 4294967294 [label="b:=1" style=dashed] - 4 -> 4294967295 [style=dashed] - 16 [label="b"] - 16 -> 13 [label="b==1; b:=1"] - 16 -> 10 [style=dashed] - 13 [label="c"] - 13 -> 4294967294 [label="c:=4" style=dashed] - 13 -> 4294967294 [style=dashed] - 10 [label="c"] - 10 -> 4294967294 [label="c:=4" style=dashed] - 10 -> 4294967295 [style=dashed] + 7 [label="a"] + 7 -> 4194309 [label="a==1; a:=1"] + 7 -> 4194305 [label="a:=1" style=dashed] + 7 -> 4194310 [style=dashed] + 4194309 [label="b"] + 4194309 -> 8388610 [label="b==1; b:=1"] + 4194309 -> 4294967294 [label="b:=1" style=dashed] + 4194309 -> 8388609 [style=dashed] + 4194305 [label="b"] + 4194305 -> 4294967294 [label="b:=1" style=dashed] + 4194305 -> 4294967295 [style=dashed] + 4194310 [label="b"] + 4194310 -> 8388610 [label="b==1; b:=1"] + 4194310 -> 8388609 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="c:=4" style=dashed] + 8388610 -> 4294967294 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967294 [label="c:=4" style=dashed] + 8388609 -> 4294967295 [style=dashed] } ================================================================================ Test case: p;q. Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<22>: +PacketTransformerHandle<0:8>: PacketFieldHandle<0>:'a' == 1: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<19> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:7> PacketFieldHandle<0>:'a' == 5: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<4> - PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<21> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:1> + PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<1:9> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<20> - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<19> -PacketTransformerHandle<19>: + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:8> + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:7> +PacketTransformerHandle<1:7>: PacketFieldHandle<1>:'b' == 2: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> - PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<18> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> + PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<2:3> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<4>: +PacketTransformerHandle<1:1>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<21>: +PacketTransformerHandle<1:9>: PacketFieldHandle<1>:'b' == *: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<18> -PacketTransformerHandle<20>: + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:3> +PacketTransformerHandle<1:8>: PacketFieldHandle<1>:'b' == 2: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<13>: +PacketTransformerHandle<2:2>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle -PacketTransformerHandle<18>: +PacketTransformerHandle<2:3>: PacketFieldHandle<2>:'c' == 5: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == *: @@ -189,29 +189,29 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 22 [label="a"] - 22 -> 19 [label="a==1; a:=1"] - 22 -> 4 [label="a==5; a:=1"] - 22 -> 21 [label="a==5; a:=5"] - 22 -> 20 [label="a:=1" style=dashed] - 22 -> 19 [style=dashed] - 19 [label="b"] - 19 -> 13 [label="b==2; b:=1"] - 19 -> 18 [label="b==2; b:=2"] - 19 -> 4294967295 [style=dashed] - 4 [label="b"] - 4 -> 4294967294 [label="b:=1" style=dashed] - 4 -> 4294967295 [style=dashed] - 21 [label="b"] - 21 -> 13 [label="b:=1" style=dashed] - 21 -> 18 [style=dashed] - 20 [label="b"] - 20 -> 4294967294 [label="b==2; b:=1"] - 20 -> 4294967295 [style=dashed] - 13 [label="c"] - 13 -> 4294967294 [label="c:=4" style=dashed] - 13 -> 4294967294 [style=dashed] - 18 [label="c"] - 18 -> 4294967294 [label="c==5; c:=4"] - 18 -> 4294967295 [style=dashed] + 8 [label="a"] + 8 -> 4194311 [label="a==1; a:=1"] + 8 -> 4194305 [label="a==5; a:=1"] + 8 -> 4194313 [label="a==5; a:=5"] + 8 -> 4194312 [label="a:=1" style=dashed] + 8 -> 4194311 [style=dashed] + 4194311 [label="b"] + 4194311 -> 8388610 [label="b==2; b:=1"] + 4194311 -> 8388611 [label="b==2; b:=2"] + 4194311 -> 4294967295 [style=dashed] + 4194305 [label="b"] + 4194305 -> 4294967294 [label="b:=1" style=dashed] + 4194305 -> 4294967295 [style=dashed] + 4194313 [label="b"] + 4194313 -> 8388610 [label="b:=1" style=dashed] + 4194313 -> 8388611 [style=dashed] + 4194312 [label="b"] + 4194312 -> 4294967294 [label="b==2; b:=1"] + 4194312 -> 4294967295 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="c:=4" style=dashed] + 8388610 -> 4294967294 [style=dashed] + 8388611 [label="c"] + 8388611 -> 4294967294 [label="c==5; c:=4"] + 8388611 -> 4294967295 [style=dashed] }