diff --git a/netkat/BUILD.bazel b/netkat/BUILD.bazel index 9121fef..33d04a0 100644 --- a/netkat/BUILD.bazel +++ b/netkat/BUILD.bazel @@ -132,6 +132,7 @@ cc_library( "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/hash", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", @@ -376,6 +377,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/hash", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", diff --git a/netkat/packet_field.h b/netkat/packet_field.h index 35400cf..2bccbbc 100644 --- a/netkat/packet_field.h +++ b/netkat/packet_field.h @@ -78,6 +78,11 @@ class [[nodiscard]] PacketFieldHandle { // 2^16 ~= 65k fields. uint16_t index_; explicit PacketFieldHandle(uint16_t index) : index_(index) {} + + // `PacketSetManager` and `PacketTransformerManager` organize their node + // storage by field, using `index_` to address the per-field data structures. + friend class PacketSetManager; + friend class PacketTransformerManager; }; // Protect against regressions in the memory layout, as it affects performance. diff --git a/netkat/packet_set.cc b/netkat/packet_set.cc index 2c7bd1c..466b3ee 100644 --- a/netkat/packet_set.cc +++ b/netkat/packet_set.cc @@ -37,33 +37,57 @@ namespace netkat { // The empty and full set of packets are not decision nodes, and thus we cannot -// associate an index into the `nodes_` vector with them. Instead, we represent -// them using sentinel values, chosen maximally to avoid collisions with proper -// indices. +// associate a node location with them. Instead, we represent them using +// sentinel values whose field bits are `PacketSetHandle::kSentinelFieldIndex`, +// a field index reserved for this purpose (and enforced to never be used by +// proper nodes at node-creation time). enum SentinelNodeIndex : uint32_t { // Encodes the empty set of packets. kEmptySet = std::numeric_limits::max(), // Encodes the full set of packets. kFullSet = std::numeric_limits::max() - 1, - // The minimum sentinel node index. - // Smaller values are reserved for proper indices into the `nodes_` vector. - kMinSentinel = kFullSet, }; -PacketSetHandle::PacketSetHandle() - : node_index_(SentinelNodeIndex::kEmptySet) {} +PacketSetHandle::PacketSetHandle() : bits_(SentinelNodeIndex::kEmptySet) {} std::string PacketSetHandle::ToString() const { - if (node_index_ == SentinelNodeIndex::kEmptySet) { + if (bits_ == SentinelNodeIndex::kEmptySet) { return "PacketSetHandle"; - } else if (node_index_ == SentinelNodeIndex::kFullSet) { + } else if (bits_ == SentinelNodeIndex::kFullSet) { return "PacketSetHandle"; } else { - return absl::StrFormat("PacketSetHandle<%d>", node_index_); + // Print the packed node location as `field_index:slot`. + return absl::StrFormat("PacketSetHandle<%d:%d>", field_index(), slot()); } } +PacketSetManager::PacketSetManager(PacketSetManager&& other) + : nodes_by_field_(std::move(other.nodes_by_field_)), + nodes_location_(std::move(other.nodes_location_)), + unique_table_by_field_(std::move(other.unique_table_by_field_)), + packet_set_by_hash_(std::move(other.packet_set_by_hash_)), + field_manager_(std::move(other.field_manager_)) { + *nodes_location_ = &nodes_by_field_; +} + +PacketSetManager& PacketSetManager::operator=(PacketSetManager&& other) { + nodes_by_field_ = std::move(other.nodes_by_field_); + nodes_location_ = std::move(other.nodes_location_); + unique_table_by_field_ = std::move(other.unique_table_by_field_); + packet_set_by_hash_ = std::move(other.packet_set_by_hash_); + field_manager_ = std::move(other.field_manager_); + *nodes_location_ = &nodes_by_field_; + return *this; +} + PacketSetHandle PacketSetManager::EmptySet() const { + // The sentinel values must carry the reserved sentinel field index, so they + // can never collide with proper node locations. (Asserted here because both + // the sentinel enum and the handle's private encoding are in scope.) + static_assert(SentinelNodeIndex::kEmptySet >> PacketSetHandle::kSlotBits == + PacketSetHandle::kSentinelFieldIndex); + static_assert(SentinelNodeIndex::kFullSet >> PacketSetHandle::kSlotBits == + PacketSetHandle::kSentinelFieldIndex); return PacketSetHandle(SentinelNodeIndex::kEmptySet); } @@ -81,10 +105,12 @@ bool PacketSetManager::IsFullSet(PacketSetHandle packet_set) const { const PacketSetManager::DecisionNode& PacketSetManager::GetNodeOrDie( PacketSetHandle packet_set) const { - CHECK_LT(packet_set.node_index_, nodes_.size()) + CHECK_LT(packet_set.field_index(), nodes_by_field_.size()) << "Did you call this function on a leaf node (i.e. FullSet() or " "EmptySet())? "; // Crash ok - return nodes_[packet_set.node_index_]; + const NodeArena& arena = nodes_by_field_[packet_set.field_index()]; + CHECK_LT(packet_set.slot(), arena.size()); // Crash ok + return arena[packet_set.slot()]; } PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) { @@ -109,16 +135,39 @@ PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) { } #endif - auto [it, inserted] = - packet_by_node_.try_emplace(node, PacketSetHandle(nodes_.size())); - if (inserted) { - nodes_.push_back(std::move(node)); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; + // Probe the unique table by node content first (heterogeneous lookup): + // the common case is that an equal node has already been interned, and + // probing with the candidate node avoids touching the arena in that case. + const uint32_t field_index = node.field.index_; + UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); + if (auto it = unique_table.find(node); it != unique_table.end()) { + return PacketSetHandle(field_index, *it); + } + NodeArena& arena = nodes_by_field_[field_index]; + uint32_t slot = arena.size(); + CHECK_LT(slot, PacketSetHandle::kMaxSlotsPerField) // Crash ok + << "PacketSetManager supports at most 2^" << PacketSetHandle::kSlotBits + << " decision nodes per packet field."; + arena.push_back(std::move(node)); + unique_table.insert(slot); + return PacketSetHandle(field_index, slot); +} + +PacketSetManager::UniqueNodeTable& PacketSetManager::GetOrCreateUniqueTable( + PacketFieldHandle field) { + CHECK_LT(field.index_, PacketSetHandle::kSentinelFieldIndex) // Crash ok + << "PacketSetManager supports at most 2^" << PacketSetHandle::kFieldBits + << " - 1 packet fields."; + if (field.index_ >= unique_table_by_field_.size()) { + nodes_by_field_.resize(field.index_ + 1); + for (size_t f = unique_table_by_field_.size(); f <= field.index_; ++f) { + unique_table_by_field_.push_back(UniqueNodeTable( + /*bucket_count=*/0, + InternedNodeHash{nodes_location_.get(), static_cast(f)}, + InternedNodeEq{nodes_location_.get(), static_cast(f)})); + } } - return it->second; + return unique_table_by_field_[field.index_]; } bool PacketSetManager::Contains(PacketSetHandle packet_set, @@ -170,19 +219,19 @@ std::string PacketSetManager::ToDot(PacketSetHandle packet_set) const { const DecisionNode& node = GetNodeOrDie(packet_set); absl::StrAppendFormat(&result, " %d [label=\"%s\"]\n", - packet_set.node_index_, + packet_set.bits_, field_manager_.GetFieldName(node.field)); for (const auto& [value, branch] : node.branch_by_field_value) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%d\"]\n", - packet_set.node_index_, branch.node_index_, value); + packet_set.bits_, branch.bits_, value); if (IsFullSet(branch) || IsEmptySet(branch)) continue; bool new_branch = visited.insert(branch).second; if (new_branch) work_list.push(branch); } PacketSetHandle fallthrough = node.default_branch; absl::StrAppendFormat(&result, " %d -> %d [style=dashed]\n", - packet_set.node_index_, fallthrough.node_index_); + packet_set.bits_, fallthrough.bits_); if (IsFullSet(fallthrough) || IsEmptySet(fallthrough)) continue; bool new_branch = visited.insert(fallthrough).second; if (new_branch) work_list.push(fallthrough); @@ -281,29 +330,27 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, // reduce the number of nodes we need to visit exponentially. // Compute result the hard way. - const DecisionNode* left_node = &GetNodeOrDie(left); - const DecisionNode* right_node = &GetNodeOrDie(right); - + // // We exploit that `And` is commutative to canonicalize the order of the - // arguments, reducing the number of cases by 1. - if (left_node->field > right_node->field) { - std::swap(left, right); - std::swap(left_node, right_node); - } - - // Case 1: left_node->field < right_node->field: branch on left field. - if (left_node->field < right_node->field) { - PacketSetHandle default_branch = And(left_node->default_branch, right); + // arguments, reducing the number of cases by 1. The field comparisons below + // read the field bits packed into the handles, requiring no node loads. + if (left.field_index() > right.field_index()) std::swap(left, right); + + // Case 1: left field < right field: branch on left field. + // Note that the right node is never loaded from memory in this case. + if (left.field_index() < right.field_index()) { + const DecisionNode& left_node = GetNodeOrDie(left); + PacketSetHandle default_branch = And(left_node.default_branch, right); absl::FixedArray> branch_by_field_value( - left_node->branch_by_field_value.size()); + left_node.branch_by_field_value.size()); int num_branches = 0; - for (const auto& [value, left_branch] : left_node->branch_by_field_value) { + for (const auto& [value, left_branch] : left_node.branch_by_field_value) { PacketSetHandle branch = And(left_branch, right); if (branch == default_branch) continue; branch_by_field_value[num_branches++] = std::make_pair(value, branch); } return NodeToPacket(DecisionNode{ - .field = left_node->field, + .field = left_node.field, .default_branch = default_branch, .branch_by_field_value{ branch_by_field_value.begin(), @@ -312,30 +359,32 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, }); } - // Case 2: left_node->field == right_node->field: branch on shared field. - DCHECK(left_node->field == right_node->field); + // Case 2: left field == right field: branch on shared field. + const DecisionNode& left_node = GetNodeOrDie(left); + const DecisionNode& right_node = GetNodeOrDie(right); + DCHECK(left_node.field == right_node.field); PacketSetHandle default_branch = - And(left_node->default_branch, right_node->default_branch); + And(left_node.default_branch, right_node.default_branch); absl::FixedArray> branch_by_field_value( - left_node->branch_by_field_value.size() + - right_node->branch_by_field_value.size()); + left_node.branch_by_field_value.size() + + right_node.branch_by_field_value.size()); int num_branches = 0; auto add_branch = [&](int value, PacketSetHandle branch) { if (branch == default_branch) return; branch_by_field_value[num_branches++] = std::make_pair(value, branch); }; - auto left_it = left_node->branch_by_field_value.begin(); - auto left_end = left_node->branch_by_field_value.end(); - auto right_it = right_node->branch_by_field_value.begin(); - auto right_end = right_node->branch_by_field_value.end(); + auto left_it = left_node.branch_by_field_value.begin(); + auto left_end = left_node.branch_by_field_value.end(); + auto right_it = right_node.branch_by_field_value.begin(); + auto right_end = right_node.branch_by_field_value.end(); while (left_it != left_end && right_it != right_end) { auto [left_value, left_branch] = *left_it; auto [right_value, right_branch] = *right_it; if (left_value < right_value) { - add_branch(left_value, And(left_branch, right_node->default_branch)); + add_branch(left_value, And(left_branch, right_node.default_branch)); ++left_it; } else if (left_value > right_value) { - add_branch(right_value, And(left_node->default_branch, right_branch)); + add_branch(right_value, And(left_node.default_branch, right_branch)); ++right_it; } else { // left_value == right_value add_branch(left_value, And(left_branch, right_branch)); @@ -345,14 +394,14 @@ PacketSetHandle PacketSetManager::And(PacketSetHandle left, } for (; left_it != left_end; ++left_it) { auto [left_value, left_branch] = *left_it; - add_branch(left_value, And(left_branch, right_node->default_branch)); + add_branch(left_value, And(left_branch, right_node.default_branch)); } for (; right_it != right_end; ++right_it) { auto [right_value, right_branch] = *right_it; - add_branch(right_value, And(left_node->default_branch, right_branch)); + add_branch(right_value, And(left_node.default_branch, right_branch)); } return NodeToPacket(DecisionNode{ - .field = left_node->field, + .field = left_node.field, .default_branch = default_branch, .branch_by_field_value{ branch_by_field_value.begin(), @@ -480,43 +529,64 @@ std::string PacketSetManager::ToString(const DecisionNode& node) const { } absl::Status PacketSetManager::CheckInternalInvariants() const { - // Invariant: Proper and sentinel node indices are disjoint. - RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - - // Invariant: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - for (const auto& [node, packet] : packet_by_node_) { - RET_CHECK(packet.node_index_ < nodes_.size()); - RET_CHECK(nodes_[packet.node_index_] == node); - } - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - auto it = packet_by_node_.find(node); - RET_CHECK(it != packet_by_node_.end()); - RET_CHECK(it->second == PacketSetHandle(i)); + // Invariant: Proper node locations and sentinel values are disjoint, i.e. + // the field/slot bounds of the packed handle encoding are respected. + RET_CHECK(nodes_by_field_.size() <= PacketSetHandle::kSentinelFieldIndex); + RET_CHECK(unique_table_by_field_.size() == nodes_by_field_.size()); + + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + RET_CHECK(arena.size() <= PacketSetHandle::kMaxSlotsPerField); + + // Invariant: `unique_table_by_field_[f]` contains exactly the slots of + // `nodes_by_field_[f]`, and no two interned nodes are equal. + const UniqueNodeTable& unique_table = unique_table_by_field_[f]; + RET_CHECK(unique_table.size() == arena.size()); + for (uint32_t slot : unique_table) { + RET_CHECK(slot < arena.size()); + } + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + // Invariant: Nodes are stored in the arena of their field. + RET_CHECK(arena[slot].field.index_ == f); + // Looking up `slot` probes by node content; finding exactly `slot` + // proves that no other interned node has the same content. + auto it = unique_table.find(slot); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == slot); + } } // Node Invariants. - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - // Invariant: `branch_by_field_value` is non-empty. - // Maintained by `NodeToPacket`. - RET_CHECK(!node.branch_by_field_value.empty()); - - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsFullSet(node.default_branch) || - IsEmptySet(node.default_branch) || - GetNodeOrDie(node.default_branch).field > node.field); - for (const auto& [value, branch] : node.branch_by_field_value) { - RET_CHECK(IsFullSet(branch) || IsEmptySet(branch) || - GetNodeOrDie(branch).field > node.field); - - // Invariant: Each case in `branch_by_field_value` is != - // `default_branch`. - RET_CHECK(branch != node.default_branch); + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + const DecisionNode& node = arena[slot]; + // Invariant: `branch_by_field_value` is non-empty. + // Maintained by `NodeToPacket`. + RET_CHECK(!node.branch_by_field_value.empty()); + + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsFullSet(node.default_branch) || + IsEmptySet(node.default_branch) || + GetNodeOrDie(node.default_branch).field > node.field); + for (const auto& [value, branch] : node.branch_by_field_value) { + RET_CHECK(IsFullSet(branch) || IsEmptySet(branch) || + GetNodeOrDie(branch).field > node.field); + + // Invariant: Each case in `branch_by_field_value` is != + // `default_branch`. + RET_CHECK(branch != node.default_branch); + + // Invariant: The field encoded in a branch handle matches the field + // of the node it points to. + if (!IsFullSet(branch) && !IsEmptySet(branch)) { + RET_CHECK(GetNodeOrDie(branch).field.index_ == branch.field_index()); + } + } + + // Invariant: node field is interned by `field_manager_`. + field_manager_.GetFieldName(node.field); // No crash. } - - // Invariant: node field is interned by `field_manager_`. - field_manager_.GetFieldName(node.field); // No crash. } return absl::OkStatus(); diff --git a/netkat/packet_set.h b/netkat/packet_set.h index f11088f..48c60cf 100644 --- a/netkat/packet_set.h +++ b/netkat/packet_set.h @@ -46,12 +46,15 @@ #include #include +#include #include #include #include #include "absl/container/fixed_array.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -90,7 +93,7 @@ class [[nodiscard]] PacketSetHandle { // Hashing, see https://abseil.io/docs/cpp/guides/hash. template friend H AbslHashValue(H h, PacketSetHandle packet_set) { - return H::combine(std::move(h), packet_set.node_index_); + return H::combine(std::move(h), packet_set.bits_); } // Formatting, see https://abseil.io/docs/cpp/guides/abslstringify. @@ -103,18 +106,36 @@ class [[nodiscard]] PacketSetHandle { std::string ToString() const; private: - // An index into the `nodes_` vector of the `PacketSetManager` object - // associated with this `PacketSetHandle`. The semantics of this packet set - // is entirely determined by the node `nodes_[node_index_]`. The index is - // otherwise arbitrary and meaningless. + // The location of the decision node defining the semantics of this packet + // set, packed into 32 bits: the top `kFieldBits` bits hold the index of the + // packet field the node branches on (`kSentinelFieldIndex` for the sentinel + // handles representing the empty/full set), and the bottom `kSlotBits` bits + // hold the node's slot in the `PacketSetManager`'s arena for that field + // (`nodes_by_field_`). The location is otherwise arbitrary and meaningless. // - // We use a 32-bit index as a tradeoff between minimizing memory usage and + // Packing the field into the handle makes the field of a node available + // without loading the node from memory, which operations like `And` consult + // on every recursive step (this variant of BDDs orders nodes by field along + // every path through the node graph). + // + // We use 32 bits total as a tradeoff between minimizing memory usage and // maximizing the number of `PacketSetHandle`s that can be created, both - // aspects that impact how well we scale to large NetKAT models. We expect - // millions, but not billions, of packet sets in practice, and 2^32 ~= 4 - // billion. - uint32_t node_index_; - explicit PacketSetHandle(uint32_t node_index) : node_index_(node_index) {} + // aspects that impact how well we scale to large NetKAT models. The + // field/slot split is a further tradeoff between the number of supported + // fields (2^10 - 1 ~= 1k) and the number of supported decision nodes per + // field (2^22 ~= 4M); both bounds are enforced at node-creation time. + static constexpr uint32_t kSlotBits = 22; + static constexpr uint32_t kFieldBits = 32 - kSlotBits; + static constexpr uint32_t kMaxSlotsPerField = uint32_t{1} << kSlotBits; + static constexpr uint32_t kSentinelFieldIndex = + (uint32_t{1} << kFieldBits) - 1; + + uint32_t bits_; + explicit PacketSetHandle(uint32_t bits) : bits_(bits) {} + PacketSetHandle(uint32_t field_index, uint32_t slot) + : bits_((field_index << kSlotBits) | slot) {} + uint32_t field_index() const { return bits_ >> kSlotBits; } + uint32_t slot() const { return bits_ & (kMaxSlotsPerField - 1); } friend class PacketSetManager; }; @@ -139,10 +160,13 @@ class PacketSetManager { PacketSetManager() = default; // The class is move-only: not copyable, but movable. + // Moves are implemented manually (in the cc file) because the unique tables + // reference `nodes_` through `nodes_location_`, which must be repointed at + // the new `nodes_` member on move. PacketSetManager(const PacketSetManager&) = delete; PacketSetManager& operator=(const PacketSetManager&) = delete; - PacketSetManager(PacketSetManager&&) = default; - PacketSetManager& operator=(PacketSetManager&&) = default; + PacketSetManager(PacketSetManager&&); + PacketSetManager& operator=(PacketSetManager&&); // Returns true iff this packet set represents the empty set of packets. bool IsEmptySet(PacketSetHandle packet_set) const; @@ -343,24 +367,90 @@ class PacketSetManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); - - // The decision nodes forming the BDD-style DAG representation of packet sets. - // `PacketSetHandle::node_index_` indexes into this vector. + // The page size of the per-field node arenas: 16 KiB per page. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that (a) models touching many fields + // don't pay a large per-field memory overhead (each non-empty arena + // allocates at least one page), and (b) pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB), so short-lived managers recycle + // pages through the allocator's freelists instead of syscalls. + static constexpr size_t kPageSize = (1 << 14) / sizeof(DecisionNode); + using NodeArena = PagedStableVector; + + // The decision nodes forming the BDD-style DAG representation of packet + // sets, stored by the field they branch on: a node with field index `f` and + // slot `s` (in the sense of `PacketSetHandle::bits_`) is + // `nodes_by_field_[f][s]`. Storing nodes by field clusters the nodes of a + // field together in memory; operations tend to process nodes field by + // field, so this improves locality. // - // We use a custom vector class that provides pointer stability, allowing us - // to create new nodes while traversing the graph (e.g. during operations like - // `And`, `Or`, `Not`). The class also avoids expensive relocations. - PagedStableVector nodes_; - - // A so called "unique table" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketSetHandle::node_index`. + // We use a custom vector class for the arenas that provides pointer + // stability, allowing us to create new nodes while traversing the graph + // (e.g. during operations like `And`, `Or`, `Not`). The class also avoids + // expensive relocations. + // + // INVARIANT: `nodes_by_field_[f][s].field.index_ == f`. + std::vector nodes_by_field_; + + // The location of `nodes_by_field_`, behind a level of indirection that + // remains stable when the manager object is moved: the unique tables below + // hash and compare node slots by dereferencing into `nodes_by_field_`, and + // their hasher/equality functors would otherwise dangle on move. Move + // operations repoint the location at the new manager's `nodes_by_field_` + // member. + std::unique_ptr*> nodes_location_ = + std::make_unique*>(&nodes_by_field_); + + // Hasher and equality for unique table entries, which are slots in the + // node arena of the table's field. Hashing/comparing the *node* (rather + // than the slot) is what makes the tables deduplicate by node content. + // The `DecisionNode` overloads enable heterogeneous lookup, so that a + // candidate node can be probed before it is added to the arena. + struct InternedNodeHash { + using is_transparent = void; + const std::vector* const* nodes_by_field; + uint32_t field_index; + size_t operator()(uint32_t slot) const { + return absl::HashOf((**nodes_by_field)[field_index][slot]); + } + size_t operator()(const DecisionNode& node) const { + return absl::HashOf(node); + } + }; + struct InternedNodeEq { + using is_transparent = void; + const std::vector* const* nodes_by_field; + uint32_t field_index; + bool operator()(uint32_t a, uint32_t b) const { + const NodeArena& arena = (**nodes_by_field)[field_index]; + return arena[a] == arena[b]; + } + bool operator()(uint32_t a, const DecisionNode& b) const { + return (**nodes_by_field)[field_index][a] == b; + } + bool operator()(const DecisionNode& a, uint32_t b) const { + return (**nodes_by_field)[field_index][b] == a; + } + }; + using UniqueNodeTable = + absl::flat_hash_set; + + // So called "unique tables" to ensure each node is only added to + // `nodes_by_field_` once, and thus has a unique slot. One table per packet + // field: a node's entry lives in the table of the field it branches on. + // Splitting by field keeps the tables, and thus probe sequences, small, and + // storing slots (instead of node copies) keeps each node stored exactly + // once, in `nodes_by_field_`. // - // INVARIANT: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - absl::flat_hash_map packet_by_node_; + // INVARIANT: `unique_table_by_field_.size() == nodes_by_field_.size()`. + // INVARIANT: `unique_table_by_field_[f]` contains `s` iff + // `s < nodes_by_field_[f].size()`; no two nodes in an arena are equal. + std::vector unique_table_by_field_; + + // Returns the unique table for nodes branching on `field`, creating it and + // the field's node arena (as well as those of any smaller fields) if they + // don't exist yet. + UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PredicateProto` to a `PacketSetHandle`. // diff --git a/netkat/packet_set_test.expected b/netkat/packet_set_test.expected index 7cbf36d..6311de9 100644 --- a/netkat/packet_set_test.expected +++ b/netkat/packet_set_test.expected @@ -24,17 +24,17 @@ digraph { Test case: p := (a=3 && b=4) || (b!=5 && c=5). Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<14>: - PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<13> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<6> -PacketSetHandle<13>: +PacketSetHandle<0:4>: + PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<1:7> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:3> +PacketSetHandle<1:7>: PacketFieldHandle<1>:'b' == 4 -> PacketSetHandle PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<6>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<1:3>: PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<5>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<2:0>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -43,34 +43,34 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 14 [label="a"] - 14 -> 13 [label="3"] - 14 -> 6 [style=dashed] - 13 [label="b"] - 13 -> 4294967294 [label="4"] - 13 -> 4294967295 [label="5"] - 13 -> 5 [style=dashed] - 6 [label="b"] - 6 -> 4294967295 [label="5"] - 6 -> 5 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="5"] - 5 -> 4294967295 [style=dashed] + 4 [label="a"] + 4 -> 4194311 [label="3"] + 4 -> 4194307 [style=dashed] + 4194311 [label="b"] + 4194311 -> 4294967294 [label="4"] + 4194311 -> 4294967295 [label="5"] + 4194311 -> 8388608 [style=dashed] + 4194307 [label="b"] + 4194307 -> 4294967295 [label="5"] + 4194307 -> 8388608 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="5"] + 8388608 -> 4294967295 [style=dashed] } ================================================================================ Test case: q := (b=3 && c=4) || (a=5 && c!=5). Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<24>: - PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<9> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<17> -PacketSetHandle<9>: +PacketSetHandle<0:9>: + PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<2:1> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:9> +PacketSetHandle<2:1>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<17>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<16> +PacketSetHandle<1:9>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:2> PacketFieldHandle<1>:'b' == * -> PacketSetHandle -PacketSetHandle<16>: +PacketSetHandle<2:2>: PacketFieldHandle<2>:'c' == 4 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -79,47 +79,47 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 24 [label="a"] - 24 -> 9 [label="5"] - 24 -> 17 [style=dashed] - 9 [label="c"] - 9 -> 4294967295 [label="5"] - 9 -> 4294967294 [style=dashed] - 17 [label="b"] - 17 -> 16 [label="3"] - 17 -> 4294967295 [style=dashed] - 16 [label="c"] - 16 -> 4294967294 [label="4"] - 16 -> 4294967295 [style=dashed] + 9 [label="a"] + 9 -> 8388609 [label="5"] + 9 -> 4194313 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967295 [label="5"] + 8388609 -> 4294967294 [style=dashed] + 4194313 [label="b"] + 4194313 -> 8388610 [label="3"] + 4194313 -> 4294967295 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="4"] + 8388610 -> 4294967295 [style=dashed] } ================================================================================ Test case: p + q. Example from Katch paper Fig 3. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketSetHandle<34>: - PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<32> - PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<33> - PacketFieldHandle<0>:'a' == * -> PacketSetHandle<31> -PacketSetHandle<32>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<30> +PacketSetHandle<0:11>: + PacketFieldHandle<0>:'a' == 3 -> PacketSetHandle<1:15> + PacketFieldHandle<0>:'a' == 5 -> PacketSetHandle<1:16> + PacketFieldHandle<0>:'a' == * -> PacketSetHandle<1:14> +PacketSetHandle<1:15>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:5> PacketFieldHandle<1>:'b' == 4 -> PacketSetHandle PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<33>: - PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle<9> + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<1:16>: + PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle<2:1> PacketFieldHandle<1>:'b' == * -> PacketSetHandle -PacketSetHandle<31>: - PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<30> +PacketSetHandle<1:14>: + PacketFieldHandle<1>:'b' == 3 -> PacketSetHandle<2:5> PacketFieldHandle<1>:'b' == 5 -> PacketSetHandle - PacketFieldHandle<1>:'b' == * -> PacketSetHandle<5> -PacketSetHandle<30>: + PacketFieldHandle<1>:'b' == * -> PacketSetHandle<2:0> +PacketSetHandle<2:5>: PacketFieldHandle<2>:'c' == 4 -> PacketSetHandle PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<5>: +PacketSetHandle<2:0>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -PacketSetHandle<9>: +PacketSetHandle<2:1>: PacketFieldHandle<2>:'c' == 5 -> PacketSetHandle PacketFieldHandle<2>:'c' == * -> PacketSetHandle -- DOT ------------------------------------------------------------------------- @@ -128,30 +128,30 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 34 [label="a"] - 34 -> 32 [label="3"] - 34 -> 33 [label="5"] - 34 -> 31 [style=dashed] - 32 [label="b"] - 32 -> 30 [label="3"] - 32 -> 4294967294 [label="4"] - 32 -> 4294967295 [label="5"] - 32 -> 5 [style=dashed] - 33 [label="b"] - 33 -> 9 [label="5"] - 33 -> 4294967294 [style=dashed] - 31 [label="b"] - 31 -> 30 [label="3"] - 31 -> 4294967295 [label="5"] - 31 -> 5 [style=dashed] - 30 [label="c"] - 30 -> 4294967294 [label="4"] - 30 -> 4294967294 [label="5"] - 30 -> 4294967295 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="5"] - 5 -> 4294967295 [style=dashed] - 9 [label="c"] - 9 -> 4294967295 [label="5"] - 9 -> 4294967294 [style=dashed] + 11 [label="a"] + 11 -> 4194319 [label="3"] + 11 -> 4194320 [label="5"] + 11 -> 4194318 [style=dashed] + 4194319 [label="b"] + 4194319 -> 8388613 [label="3"] + 4194319 -> 4294967294 [label="4"] + 4194319 -> 4294967295 [label="5"] + 4194319 -> 8388608 [style=dashed] + 4194320 [label="b"] + 4194320 -> 8388609 [label="5"] + 4194320 -> 4294967294 [style=dashed] + 4194318 [label="b"] + 4194318 -> 8388613 [label="3"] + 4194318 -> 4294967295 [label="5"] + 4194318 -> 8388608 [style=dashed] + 8388613 [label="c"] + 8388613 -> 4294967294 [label="4"] + 8388613 -> 4294967294 [label="5"] + 8388613 -> 4294967295 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="5"] + 8388608 -> 4294967295 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967295 [label="5"] + 8388609 -> 4294967294 [style=dashed] } diff --git a/netkat/packet_transformer.cc b/netkat/packet_transformer.cc index a125c8b..29a3db5 100644 --- a/netkat/packet_transformer.cc +++ b/netkat/packet_transformer.cc @@ -45,37 +45,61 @@ namespace netkat { // The Deny and Accept transformers are not decision nodes, and thus we cannot -// associate an index into the `nodes_` vector with them. Instead, we represent -// them using sentinel values, chosen maximally to avoid collisions with proper -// indices. +// associate a node location with them. Instead, we represent them using +// sentinel values whose field bits are +// `PacketTransformerHandle::kSentinelFieldIndex`, a field index reserved for +// this purpose (and enforced to never be used by proper nodes at +// node-creation time). enum SentinelNodeIndex : uint32_t { // Encodes the Deny transformer. kDeny = std::numeric_limits::max(), // Encodes the Accept transformer. kAccept = std::numeric_limits::max() - 1, - // The minimum sentinel node index. - // Smaller values are reserved for proper indices into the `nodes_` vector. - kMinSentinel = kAccept, }; PacketTransformerHandle::PacketTransformerHandle() - : node_index_(SentinelNodeIndex::kDeny) {} + : bits_(SentinelNodeIndex::kDeny) {} + +PacketTransformerManager::PacketTransformerManager( + PacketTransformerManager&& other) + : nodes_by_field_(std::move(other.nodes_by_field_)), + nodes_location_(std::move(other.nodes_location_)), + unique_table_by_field_(std::move(other.unique_table_by_field_)), + transformer_by_hash_(std::move(other.transformer_by_hash_)), + packet_set_manager_(std::move(other.packet_set_manager_)) { + *nodes_location_ = &nodes_by_field_; +} + +PacketTransformerManager& PacketTransformerManager::operator=( + PacketTransformerManager&& other) { + nodes_by_field_ = std::move(other.nodes_by_field_); + nodes_location_ = std::move(other.nodes_location_); + unique_table_by_field_ = std::move(other.unique_table_by_field_); + transformer_by_hash_ = std::move(other.transformer_by_hash_); + packet_set_manager_ = std::move(other.packet_set_manager_); + *nodes_location_ = &nodes_by_field_; + return *this; +} std::string PacketTransformerHandle::ToString() const { - if (node_index_ == SentinelNodeIndex::kDeny) { + if (bits_ == SentinelNodeIndex::kDeny) { return "PacketTransformerHandle"; - } else if (node_index_ == SentinelNodeIndex::kAccept) { + } else if (bits_ == SentinelNodeIndex::kAccept) { return "PacketTransformerHandle"; } else { - return absl::StrFormat("PacketTransformerHandle<%d>", node_index_); + // Print the packed node location as `field_index:slot`. + return absl::StrFormat("PacketTransformerHandle<%d:%d>", field_index(), + slot()); } } const PacketTransformerManager::DecisionNode& PacketTransformerManager::GetNodeOrDie( PacketTransformerHandle transformer) const { - CHECK_LT(transformer.node_index_, nodes_.size()); // Crash ok - return nodes_[transformer.node_index_]; + CHECK_LT(transformer.field_index(), nodes_by_field_.size()); // Crash ok + const NodeArena& arena = nodes_by_field_[transformer.field_index()]; + CHECK_LT(transformer.slot(), arena.size()); // Crash ok + return arena[transformer.slot()]; } // TODO(dilo): Creating as many map copies as this method facilitates is @@ -172,16 +196,41 @@ PacketTransformerHandle PacketTransformerManager::NodeToTransformer( node.default_branch_by_field_modification.empty()) return node.default_branch; - auto [it, inserted] = transformer_by_node_.try_emplace( - node, PacketTransformerHandle(nodes_.size())); - if (inserted) { - nodes_.push_back(std::move(node)); - LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel) - << "Internal invariant violated: Proper and sentinel node indices must " - "be disjoint. This indicates that we allocated more nodes than are " - "supported (> 2^32 - 2)."; + // Probe the unique table by node content first (heterogeneous lookup): + // the common case is that an equal node has already been interned, and + // probing with the candidate node avoids touching the arena in that case. + const uint32_t field_index = node.field.index_; + UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field); + if (auto it = unique_table.find(node); it != unique_table.end()) { + return PacketTransformerHandle(field_index, *it); } - return it->second; + NodeArena& arena = nodes_by_field_[field_index]; + uint32_t slot = arena.size(); + CHECK_LT(slot, PacketTransformerHandle::kMaxSlotsPerField) // Crash ok + << "PacketTransformerManager supports at most 2^" + << PacketTransformerHandle::kSlotBits + << " decision nodes per packet field."; + arena.push_back(std::move(node)); + unique_table.insert(slot); + return PacketTransformerHandle(field_index, slot); +} + +PacketTransformerManager::UniqueNodeTable& +PacketTransformerManager::GetOrCreateUniqueTable(PacketFieldHandle field) { + CHECK_LT(field.index_, // Crash ok + PacketTransformerHandle::kSentinelFieldIndex) + << "PacketTransformerManager supports at most 2^" + << PacketTransformerHandle::kFieldBits << " - 1 packet fields."; + if (field.index_ >= unique_table_by_field_.size()) { + nodes_by_field_.resize(field.index_ + 1); + for (size_t f = unique_table_by_field_.size(); f <= field.index_; ++f) { + unique_table_by_field_.push_back(UniqueNodeTable( + /*bucket_count=*/0, + InternedNodeHash{nodes_location_.get(), static_cast(f)}, + InternedNodeEq{nodes_location_.get(), static_cast(f)})); + } + } + return unique_table_by_field_[field.index_]; } bool PacketTransformerManager::IsDeny( @@ -322,6 +371,15 @@ PacketTransformerHandle PacketTransformerManager::Compile( } PacketTransformerHandle PacketTransformerManager::Deny() const { + // The sentinel values must carry the reserved sentinel field index, so they + // can never collide with proper node locations. (Asserted here because both + // the sentinel enum and the handle's private encoding are in scope.) + static_assert(SentinelNodeIndex::kDeny >> + PacketTransformerHandle::kSlotBits == + PacketTransformerHandle::kSentinelFieldIndex); + static_assert(SentinelNodeIndex::kAccept >> + PacketTransformerHandle::kSlotBits == + PacketTransformerHandle::kSentinelFieldIndex); return PacketTransformerHandle(SentinelNodeIndex::kDeny); } @@ -1062,17 +1120,17 @@ std::string PacketTransformerManager::ToDot( std::string field = packet_set_manager_.field_manager_.GetFieldName(node.field); absl::StrAppendFormat(&result, " %d [label=\"%s\"]\n", - transformer.node_index_, field); + transformer.bits_, field); for (const auto& [value, modify_map] : node.modify_branch_by_field_match) { if (modify_map.empty()) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%s==%s\"]\n", - transformer.node_index_, SentinelNodeIndex::kDeny, + transformer.bits_, SentinelNodeIndex::kDeny, field, absl::StrCat(value)); } for (const auto& [new_value, branch] : modify_map) { absl::StrAppendFormat(&result, " %d -> %d [label=\"%s==%s; %s:=%d\"]\n", - transformer.node_index_, branch.node_index_, + transformer.bits_, branch.bits_, field, absl::StrCat(value), field, new_value); if (IsAccept(branch) || IsDeny(branch)) continue; bool new_branch = visited.insert(branch).second; @@ -1084,14 +1142,14 @@ std::string PacketTransformerManager::ToDot( node.default_branch_by_field_modification) { absl::StrAppendFormat( &result, " %d -> %d [label=\"%s:=%d\" style=dashed]\n", - transformer.node_index_, branch.node_index_, field, new_value); + transformer.bits_, branch.bits_, field, new_value); if (IsAccept(branch) || IsDeny(branch)) continue; bool new_branch = visited.insert(branch).second; if (new_branch) work_list.push(branch); } PacketTransformerHandle fallthrough = node.default_branch; absl::StrAppendFormat(&result, " %d -> %d [style=dashed]\n", - transformer.node_index_, fallthrough.node_index_); + transformer.bits_, fallthrough.bits_); if (IsAccept(fallthrough) || IsDeny(fallthrough)) continue; bool new_branch = visited.insert(fallthrough).second; if (new_branch) work_list.push(fallthrough); @@ -1101,66 +1159,81 @@ std::string PacketTransformerManager::ToDot( } absl::Status PacketTransformerManager::CheckInternalInvariants() const { - // Invariant: Proper and sentinel node indices are disjoint. - RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel); - - // Invariant: `transformer_by_node_[n] = s` iff `nodes_[s.node_index_] == - // n`. - for (const auto& [node, transformer] : transformer_by_node_) { - RET_CHECK(transformer.node_index_ < nodes_.size()); - RET_CHECK(nodes_[transformer.node_index_] == node); - } - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - auto it = transformer_by_node_.find(node); - RET_CHECK(it != transformer_by_node_.end()); - RET_CHECK(it->second == PacketTransformerHandle(i)); + // Invariant: Proper node locations and sentinel values are disjoint, i.e. + // the field/slot bounds of the packed handle encoding are respected. + RET_CHECK(nodes_by_field_.size() <= + PacketTransformerHandle::kSentinelFieldIndex); + RET_CHECK(unique_table_by_field_.size() == nodes_by_field_.size()); + + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + RET_CHECK(arena.size() <= PacketTransformerHandle::kMaxSlotsPerField); + + // Invariant: `unique_table_by_field_[f]` contains exactly the slots of + // `nodes_by_field_[f]`, and no two interned nodes are equal. + const UniqueNodeTable& unique_table = unique_table_by_field_[f]; + RET_CHECK(unique_table.size() == arena.size()); + for (uint32_t slot : unique_table) { + RET_CHECK(slot < arena.size()); + } + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + // Invariant: Nodes are stored in the arena of their field. + RET_CHECK(arena[slot].field.index_ == f); + // Looking up `slot` probes by node content; finding exactly `slot` + // proves that no other interned node has the same content. + auto it = unique_table.find(slot); + RET_CHECK(it != unique_table.end()); + RET_CHECK(*it == slot); + } } // Node Invariants. - for (int i = 0; i < nodes_.size(); ++i) { - const DecisionNode& node = nodes_[i]; - // Invariant: `modify_branch_by_field_match` or - // `default_branch_by_field_modification` is non-empty. - // Maintained by `NodeToTransformer`. - RET_CHECK(!node.modify_branch_by_field_match.empty() || - !node.default_branch_by_field_modification.empty()); - - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(node.default_branch) || IsDeny(node.default_branch) || - GetNodeOrDie(node.default_branch).field > node.field) - << ":\n" - << ToString(node); - - for (const auto& [match_value, branch_by_modify] : - node.modify_branch_by_field_match) { - for (const auto& [modify_value, branch] : branch_by_modify) { - // Invariant: Modify branches are not Deny unless `modify_value == - // match_value`. - RET_CHECK(!IsDeny(branch) || modify_value == match_value) - << ":\n" - << ToString(node); + for (size_t f = 0; f < nodes_by_field_.size(); ++f) { + const NodeArena& arena = nodes_by_field_[f]; + for (uint32_t slot = 0; slot < arena.size(); ++slot) { + const DecisionNode& node = arena[slot]; + // Invariant: `modify_branch_by_field_match` or + // `default_branch_by_field_modification` is non-empty. + // Maintained by `NodeToTransformer`. + RET_CHECK(!node.modify_branch_by_field_match.empty() || + !node.default_branch_by_field_modification.empty()); - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(branch) || IsDeny(branch) || - GetNodeOrDie(branch).field > node.field) - << ":\n" - << ToString(node); + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(node.default_branch) || IsDeny(node.default_branch) || + GetNodeOrDie(node.default_branch).field > node.field) + << ":\n" + << ToString(node); + + for (const auto& [match_value, branch_by_modify] : + node.modify_branch_by_field_match) { + for (const auto& [modify_value, branch] : branch_by_modify) { + // Invariant: Modify branches are not Deny unless `modify_value == + // match_value`. + RET_CHECK(!IsDeny(branch) || modify_value == match_value) + << ":\n" + << ToString(node); + + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(branch) || IsDeny(branch) || + GetNodeOrDie(branch).field > node.field) + << ":\n" + << ToString(node); + } } - } - for (const auto& [match_value, branch] : - node.default_branch_by_field_modification) { - // Invariant: Default modify branches are not Deny. - RET_CHECK(!IsDeny(branch)); + for (const auto& [match_value, branch] : + node.default_branch_by_field_modification) { + // Invariant: Default modify branches are not Deny. + RET_CHECK(!IsDeny(branch)); - // Invariant: node field is strictly smaller than sub-node fields. - RET_CHECK(IsAccept(branch) || GetNodeOrDie(branch).field > node.field); - } + // Invariant: node field is strictly smaller than sub-node fields. + RET_CHECK(IsAccept(branch) || GetNodeOrDie(branch).field > node.field); + } - // Invariant: node field is interned by - // `packet_set_manager_.field_manager_`. - packet_set_manager_.field_manager_.GetFieldName(node.field); // No crash. + // Invariant: node field is interned by + // `packet_set_manager_.field_manager_` (GetFieldName crashes if not). + packet_set_manager_.field_manager_.GetFieldName(node.field); + } } return absl::OkStatus(); diff --git a/netkat/packet_transformer.h b/netkat/packet_transformer.h index 4c9c90d..f9551eb 100644 --- a/netkat/packet_transformer.h +++ b/netkat/packet_transformer.h @@ -39,12 +39,15 @@ #include #include +#include #include #include +#include #include "absl/container/btree_map.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -95,7 +98,7 @@ class [[nodiscard]] PacketTransformerHandle { // Hashing, see https://abseil.io/docs/cpp/guides/hash. template friend H AbslHashValue(H h, PacketTransformerHandle transformer) { - return H::combine(std::move(h), transformer.node_index_); + return H::combine(std::move(h), transformer.bits_); } // Formatting, see https://abseil.io/docs/cpp/guides/abslstringify. @@ -109,17 +112,28 @@ class [[nodiscard]] PacketTransformerHandle { std::string ToString() const; private: - // An index into the `nodes_` vector of the `PacketTransformerManager` - // object associated with this `PacketTransformerHandle`. The semantics of - // this packet transformer is entirely determined by the node - // `nodes_[node_index_]`. The index is otherwise arbitrary and meaningless. + // The location of the decision node defining the semantics of this packet + // transformer, packed into 32 bits: the top `kFieldBits` bits hold the + // index of the packet field the node branches on (`kSentinelFieldIndex` for + // the sentinel handles representing Deny/Accept), and the bottom + // `kSlotBits` bits hold the node's slot in the `PacketTransformerManager`'s + // arena for that field (`nodes_by_field_`). The location is otherwise + // arbitrary and meaningless. // - // We use a 32-bit index as a tradeoff between minimizing memory usage and - // maximizing the number of `PacketTransformerHandle`s that can be created, - // both aspects that impact how well we scale to large NetKAT models. - uint32_t node_index_; - explicit PacketTransformerHandle(uint32_t node_index) - : node_index_(node_index) {} + // The encoding (and its rationale) mirrors `PacketSetHandle::bits_`; see + // there for details. + static constexpr uint32_t kSlotBits = 22; + static constexpr uint32_t kFieldBits = 32 - kSlotBits; + static constexpr uint32_t kMaxSlotsPerField = uint32_t{1} << kSlotBits; + static constexpr uint32_t kSentinelFieldIndex = + (uint32_t{1} << kFieldBits) - 1; + + uint32_t bits_; + explicit PacketTransformerHandle(uint32_t bits) : bits_(bits) {} + PacketTransformerHandle(uint32_t field_index, uint32_t slot) + : bits_((field_index << kSlotBits) | slot) {} + uint32_t field_index() const { return bits_ >> kSlotBits; } + uint32_t slot() const { return bits_ & (kMaxSlotsPerField - 1); } friend class PacketTransformerManager; }; @@ -147,10 +161,13 @@ class PacketTransformerManager { // The class is move-only: not copyable, but movable. // `PacketSetHandles` and `PacketTransformerHandles` returned by this class // are not invalidated on move. + // Moves are implemented manually (in the cc file) because the unique tables + // reference `nodes_` through `nodes_location_`, which must be repointed at + // the new `nodes_` member on move. PacketTransformerManager(const PacketTransformerManager&) = delete; PacketTransformerManager& operator=(const PacketTransformerManager&) = delete; - PacketTransformerManager(PacketTransformerManager&&) = default; - PacketTransformerManager& operator=(PacketTransformerManager&&) = default; + PacketTransformerManager(PacketTransformerManager&&); + PacketTransformerManager& operator=(PacketTransformerManager&&); // Returns the `PacketSetManager` used by this object to compile // predicates. @@ -399,10 +416,15 @@ class PacketTransformerManager { [[nodiscard]] std::string ToString(const DecisionNode& node) const; - // The page size of the `nodes_` vector: 64 MiB or ~ 67 MB. - // Chosen large enough to reduce the cost of dynamic allocation, and small - // enough to avoid excessive memory overhead. - static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode); + // The page size of the per-field node arenas: 16 KiB per page. + // Chosen large enough to amortize the cost of dynamic allocation over + // hundreds of nodes, and small enough that (a) models touching many fields + // don't pay a large per-field memory overhead (each non-empty arena + // allocates at least one page), and (b) pages stay below the malloc + // mmap/trim thresholds (typically 128 KiB), so short-lived managers recycle + // pages through the allocator's freelists instead of syscalls. + static constexpr size_t kPageSize = (1 << 14) / sizeof(DecisionNode); + using NodeArena = PagedStableVector; // Helper functions to deal with DecisionNodes directly. // TODO(dilo): Is there a convenient way to either avoid these or avoid making @@ -416,20 +438,79 @@ class PacketTransformerManager { absl::btree_map GetMapAtValue( const DecisionNode& node, int value); - // The decision nodes forming the BDD-style DAG representation of packets. - // `PacketTransformerHandle::node_index_` indexes into this vector. + // The decision nodes forming the BDD-style DAG representation of packet + // transformers, stored by the field they branch on: a node with field index + // `f` and slot `s` (in the sense of `PacketTransformerHandle::bits_`) is + // `nodes_by_field_[f][s]`. Storing nodes by field clusters the nodes of a + // field together in memory; operations tend to process nodes field by + // field, so this improves locality. // - // We use a custom vector class that provides pointer stability, allowing us - // to create new nodes while traversing the graph. The class also avoids - // expensive relocations. - PagedStableVector nodes_; - - // A so called "unique table" to ensure each node is only added to `nodes_` - // once, and thus has a unique `PacketTransformerHandle::node_index`. + // We use a custom vector class for the arenas that provides pointer + // stability, allowing us to create new nodes while traversing the graph. + // The class also avoids expensive relocations. + // + // INVARIANT: `nodes_by_field_[f][s].field.index_ == f`. + std::vector nodes_by_field_; + + // The location of `nodes_by_field_`, behind a level of indirection that + // remains stable when the manager object is moved: the unique tables below + // hash and compare node slots by dereferencing into `nodes_by_field_`, and + // their hasher/equality functors would otherwise dangle on move. Move + // operations repoint the location at the new manager's `nodes_by_field_` + // member. + std::unique_ptr*> nodes_location_ = + std::make_unique*>(&nodes_by_field_); + + // Hasher and equality for unique table entries, which are slots in the + // node arena of the table's field. Hashing/comparing the *node* (rather + // than the slot) is what makes the tables deduplicate by node content. + // The `DecisionNode` overloads enable heterogeneous lookup, so that a + // candidate node can be probed before it is added to the arena. + struct InternedNodeHash { + using is_transparent = void; + const std::vector* const* nodes_by_field; + uint32_t field_index; + size_t operator()(uint32_t slot) const { + return absl::HashOf((**nodes_by_field)[field_index][slot]); + } + size_t operator()(const DecisionNode& node) const { + return absl::HashOf(node); + } + }; + struct InternedNodeEq { + using is_transparent = void; + const std::vector* const* nodes_by_field; + uint32_t field_index; + bool operator()(uint32_t a, uint32_t b) const { + const NodeArena& arena = (**nodes_by_field)[field_index]; + return arena[a] == arena[b]; + } + bool operator()(uint32_t a, const DecisionNode& b) const { + return (**nodes_by_field)[field_index][a] == b; + } + bool operator()(const DecisionNode& a, uint32_t b) const { + return (**nodes_by_field)[field_index][b] == a; + } + }; + using UniqueNodeTable = + absl::flat_hash_set; + + // So called "unique tables" to ensure each node is only added to + // `nodes_by_field_` once, and thus has a unique slot. One table per packet + // field: a node's entry lives in the table of the field it branches on. + // Splitting by field keeps the tables, and thus probe sequences, small, and + // storing slots (instead of node copies) keeps each node stored exactly + // once, in `nodes_by_field_`. // - // INVARIANT: `transformer_by_node_[n] = s` iff `nodes_[s.node_index_] == n`. - absl::flat_hash_map - transformer_by_node_; + // INVARIANT: `unique_table_by_field_.size() == nodes_by_field_.size()`. + // INVARIANT: `unique_table_by_field_[f]` contains `s` iff + // `s < nodes_by_field_[f].size()`; no two nodes in an arena are equal. + std::vector unique_table_by_field_; + + // Returns the unique table for nodes branching on `field`, creating it and + // the field's node arena (as well as those of any smaller fields) if they + // don't exist yet. + UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field); // A map of a given `PolicyProto` to a `PacketTransformerHandle`. // diff --git a/netkat/packet_transformer_test.expected b/netkat/packet_transformer_test.expected index 9f01d94..1f6a07b 100644 --- a/netkat/packet_transformer_test.expected +++ b/netkat/packet_transformer_test.expected @@ -24,7 +24,7 @@ digraph { Test case: p := (a!=5). Empty modify branch creates a deny path. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<0>: +PacketTransformerHandle<0:0>: PacketFieldHandle<0>:'a' == 5: PacketFieldHandle<0>:'a' == *: PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle @@ -42,22 +42,22 @@ digraph { Test case: p := (a=5 + b=2);(b:=1 + c=5). Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<8>: +PacketTransformerHandle<0:3>: PacketFieldHandle<0>:'a' == 5: - PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<6> + PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<1:2> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<7> -PacketTransformerHandle<6>: + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:3> +PacketTransformerHandle<1:2>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<5> -PacketTransformerHandle<7>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:0> +PacketTransformerHandle<1:3>: PacketFieldHandle<1>:'b' == 2: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<5> + PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<2:0> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<5>: +PacketTransformerHandle<2:0>: PacketFieldHandle<2>:'c' == 5: PacketFieldHandle<2>:'c' := 5 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == *: @@ -68,50 +68,50 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 8 [label="a"] - 8 -> 6 [label="a==5; a:=5"] - 8 -> 7 [style=dashed] - 6 [label="b"] - 6 -> 4294967294 [label="b:=1" style=dashed] - 6 -> 5 [style=dashed] - 7 [label="b"] - 7 -> 4294967294 [label="b==2; b:=1"] - 7 -> 5 [label="b==2; b:=2"] - 7 -> 4294967295 [style=dashed] - 5 [label="c"] - 5 -> 4294967294 [label="c==5; c:=5"] - 5 -> 4294967295 [style=dashed] + 3 [label="a"] + 3 -> 4194306 [label="a==5; a:=5"] + 3 -> 4194307 [style=dashed] + 4194306 [label="b"] + 4194306 -> 4294967294 [label="b:=1" style=dashed] + 4194306 -> 8388608 [style=dashed] + 4194307 [label="b"] + 4194307 -> 4294967294 [label="b==2; b:=1"] + 4194307 -> 8388608 [label="b==2; b:=2"] + 4194307 -> 4294967295 [style=dashed] + 8388608 [label="c"] + 8388608 -> 4294967294 [label="c==5; c:=5"] + 8388608 -> 4294967295 [style=dashed] } ================================================================================ Test case: q := (b=1 + c:=4 + a:=5;b:=1). Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<17>: +PacketTransformerHandle<0:7>: PacketFieldHandle<0>:'a' == 1: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<14> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:5> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<4> - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<16> -PacketTransformerHandle<14>: + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:1> + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:6> +PacketTransformerHandle<1:5>: PacketFieldHandle<1>:'b' == 1: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<10> -PacketTransformerHandle<4>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:1> +PacketTransformerHandle<1:1>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<16>: +PacketTransformerHandle<1:6>: PacketFieldHandle<1>:'b' == 1: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> PacketFieldHandle<1>:'b' == *: - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<10> -PacketTransformerHandle<13>: + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:1> +PacketTransformerHandle<2:2>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle -PacketTransformerHandle<10>: +PacketTransformerHandle<2:1>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle @@ -121,64 +121,64 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 17 [label="a"] - 17 -> 14 [label="a==1; a:=1"] - 17 -> 4 [label="a:=1" style=dashed] - 17 -> 16 [style=dashed] - 14 [label="b"] - 14 -> 13 [label="b==1; b:=1"] - 14 -> 4294967294 [label="b:=1" style=dashed] - 14 -> 10 [style=dashed] - 4 [label="b"] - 4 -> 4294967294 [label="b:=1" style=dashed] - 4 -> 4294967295 [style=dashed] - 16 [label="b"] - 16 -> 13 [label="b==1; b:=1"] - 16 -> 10 [style=dashed] - 13 [label="c"] - 13 -> 4294967294 [label="c:=4" style=dashed] - 13 -> 4294967294 [style=dashed] - 10 [label="c"] - 10 -> 4294967294 [label="c:=4" style=dashed] - 10 -> 4294967295 [style=dashed] + 7 [label="a"] + 7 -> 4194309 [label="a==1; a:=1"] + 7 -> 4194305 [label="a:=1" style=dashed] + 7 -> 4194310 [style=dashed] + 4194309 [label="b"] + 4194309 -> 8388610 [label="b==1; b:=1"] + 4194309 -> 4294967294 [label="b:=1" style=dashed] + 4194309 -> 8388609 [style=dashed] + 4194305 [label="b"] + 4194305 -> 4294967294 [label="b:=1" style=dashed] + 4194305 -> 4294967295 [style=dashed] + 4194310 [label="b"] + 4194310 -> 8388610 [label="b==1; b:=1"] + 4194310 -> 8388609 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="c:=4" style=dashed] + 8388610 -> 4294967294 [style=dashed] + 8388609 [label="c"] + 8388609 -> 4294967294 [label="c:=4" style=dashed] + 8388609 -> 4294967295 [style=dashed] } ================================================================================ Test case: p;q. Example from Katch paper Fig 5. ================================================================================ -- STRING ---------------------------------------------------------------------- -PacketTransformerHandle<22>: +PacketTransformerHandle<0:8>: PacketFieldHandle<0>:'a' == 1: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<19> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:7> PacketFieldHandle<0>:'a' == 5: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<4> - PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<21> + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:1> + PacketFieldHandle<0>:'a' := 5 -> PacketTransformerHandle<1:9> PacketFieldHandle<0>:'a' == *: - PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<20> - PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<19> -PacketTransformerHandle<19>: + PacketFieldHandle<0>:'a' := 1 -> PacketTransformerHandle<1:8> + PacketFieldHandle<0>:'a' == * -> PacketTransformerHandle<1:7> +PacketTransformerHandle<1:7>: PacketFieldHandle<1>:'b' == 2: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> - PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<18> + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> + PacketFieldHandle<1>:'b' := 2 -> PacketTransformerHandle<2:3> PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<4>: +PacketTransformerHandle<1:1>: PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<21>: +PacketTransformerHandle<1:9>: PacketFieldHandle<1>:'b' == *: - PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<13> - PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<18> -PacketTransformerHandle<20>: + PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle<2:2> + PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle<2:3> +PacketTransformerHandle<1:8>: PacketFieldHandle<1>:'b' == 2: PacketFieldHandle<1>:'b' := 1 -> PacketTransformerHandle PacketFieldHandle<1>:'b' == *: PacketFieldHandle<1>:'b' == * -> PacketTransformerHandle -PacketTransformerHandle<13>: +PacketTransformerHandle<2:2>: PacketFieldHandle<2>:'c' == *: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == * -> PacketTransformerHandle -PacketTransformerHandle<18>: +PacketTransformerHandle<2:3>: PacketFieldHandle<2>:'c' == 5: PacketFieldHandle<2>:'c' := 4 -> PacketTransformerHandle PacketFieldHandle<2>:'c' == *: @@ -189,29 +189,29 @@ digraph { edge [fontsize = 12] 4294967294 [label="T" shape=box] 4294967295 [label="F" shape=box] - 22 [label="a"] - 22 -> 19 [label="a==1; a:=1"] - 22 -> 4 [label="a==5; a:=1"] - 22 -> 21 [label="a==5; a:=5"] - 22 -> 20 [label="a:=1" style=dashed] - 22 -> 19 [style=dashed] - 19 [label="b"] - 19 -> 13 [label="b==2; b:=1"] - 19 -> 18 [label="b==2; b:=2"] - 19 -> 4294967295 [style=dashed] - 4 [label="b"] - 4 -> 4294967294 [label="b:=1" style=dashed] - 4 -> 4294967295 [style=dashed] - 21 [label="b"] - 21 -> 13 [label="b:=1" style=dashed] - 21 -> 18 [style=dashed] - 20 [label="b"] - 20 -> 4294967294 [label="b==2; b:=1"] - 20 -> 4294967295 [style=dashed] - 13 [label="c"] - 13 -> 4294967294 [label="c:=4" style=dashed] - 13 -> 4294967294 [style=dashed] - 18 [label="c"] - 18 -> 4294967294 [label="c==5; c:=4"] - 18 -> 4294967295 [style=dashed] + 8 [label="a"] + 8 -> 4194311 [label="a==1; a:=1"] + 8 -> 4194305 [label="a==5; a:=1"] + 8 -> 4194313 [label="a==5; a:=5"] + 8 -> 4194312 [label="a:=1" style=dashed] + 8 -> 4194311 [style=dashed] + 4194311 [label="b"] + 4194311 -> 8388610 [label="b==2; b:=1"] + 4194311 -> 8388611 [label="b==2; b:=2"] + 4194311 -> 4294967295 [style=dashed] + 4194305 [label="b"] + 4194305 -> 4294967294 [label="b:=1" style=dashed] + 4194305 -> 4294967295 [style=dashed] + 4194313 [label="b"] + 4194313 -> 8388610 [label="b:=1" style=dashed] + 4194313 -> 8388611 [style=dashed] + 4194312 [label="b"] + 4194312 -> 4294967294 [label="b==2; b:=1"] + 4194312 -> 4294967295 [style=dashed] + 8388610 [label="c"] + 8388610 -> 4294967294 [label="c:=4" style=dashed] + 8388610 -> 4294967294 [style=dashed] + 8388611 [label="c"] + 8388611 -> 4294967294 [label="c==5; c:=4"] + 8388611 -> 4294967295 [style=dashed] }