Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions netkat/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ cc_library(
"@com_google_absl//absl/container:fixed_array",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
Expand Down Expand Up @@ -376,6 +377,7 @@ cc_library(
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/functional:any_invocable",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
Expand Down
5 changes: 5 additions & 0 deletions netkat/packet_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ class [[nodiscard]] PacketFieldHandle {
// 2^16 ~= 65k fields.
uint16_t index_;
explicit PacketFieldHandle(uint16_t index) : index_(index) {}

// `PacketSetManager` and `PacketTransformerManager` organize their node
// storage by field, using `index_` to address the per-field data structures.
friend class PacketSetManager;
friend class PacketTransformerManager;
};

// Protect against regressions in the memory layout, as it affects performance.
Expand Down
244 changes: 157 additions & 87 deletions netkat/packet_set.cc

Large diffs are not rendered by default.

148 changes: 119 additions & 29 deletions netkat/packet_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,15 @@

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/container/fixed_array.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
Expand Down Expand Up @@ -90,7 +93,7 @@ class [[nodiscard]] PacketSetHandle {
// Hashing, see https://abseil.io/docs/cpp/guides/hash.
template <typename H>
friend H AbslHashValue(H h, PacketSetHandle packet_set) {
return H::combine(std::move(h), packet_set.node_index_);
return H::combine(std::move(h), packet_set.bits_);
}

// Formatting, see https://abseil.io/docs/cpp/guides/abslstringify.
Expand All @@ -103,18 +106,36 @@ class [[nodiscard]] PacketSetHandle {
std::string ToString() const;

private:
// An index into the `nodes_` vector of the `PacketSetManager` object
// associated with this `PacketSetHandle`. The semantics of this packet set
// is entirely determined by the node `nodes_[node_index_]`. The index is
// otherwise arbitrary and meaningless.
// The location of the decision node defining the semantics of this packet
// set, packed into 32 bits: the top `kFieldBits` bits hold the index of the
// packet field the node branches on (`kSentinelFieldIndex` for the sentinel
// handles representing the empty/full set), and the bottom `kSlotBits` bits
// hold the node's slot in the `PacketSetManager`'s arena for that field
// (`nodes_by_field_`). The location is otherwise arbitrary and meaningless.
//
// We use a 32-bit index as a tradeoff between minimizing memory usage and
// Packing the field into the handle makes the field of a node available
// without loading the node from memory, which operations like `And` consult
// on every recursive step (this variant of BDDs orders nodes by field along
// every path through the node graph).
//
// We use 32 bits total as a tradeoff between minimizing memory usage and
// maximizing the number of `PacketSetHandle`s that can be created, both
// aspects that impact how well we scale to large NetKAT models. We expect
// millions, but not billions, of packet sets in practice, and 2^32 ~= 4
// billion.
uint32_t node_index_;
explicit PacketSetHandle(uint32_t node_index) : node_index_(node_index) {}
// aspects that impact how well we scale to large NetKAT models. The
// field/slot split is a further tradeoff between the number of supported
// fields (2^10 - 1 ~= 1k) and the number of supported decision nodes per
// field (2^22 ~= 4M); both bounds are enforced at node-creation time.
static constexpr uint32_t kSlotBits = 22;
static constexpr uint32_t kFieldBits = 32 - kSlotBits;
static constexpr uint32_t kMaxSlotsPerField = uint32_t{1} << kSlotBits;
static constexpr uint32_t kSentinelFieldIndex =
(uint32_t{1} << kFieldBits) - 1;

uint32_t bits_;
explicit PacketSetHandle(uint32_t bits) : bits_(bits) {}
PacketSetHandle(uint32_t field_index, uint32_t slot)
: bits_((field_index << kSlotBits) | slot) {}
uint32_t field_index() const { return bits_ >> kSlotBits; }
uint32_t slot() const { return bits_ & (kMaxSlotsPerField - 1); }
friend class PacketSetManager;
};

Expand All @@ -139,10 +160,13 @@ class PacketSetManager {
PacketSetManager() = default;

// The class is move-only: not copyable, but movable.
// Moves are implemented manually (in the cc file) because the unique tables
// reference `nodes_` through `nodes_location_`, which must be repointed at
// the new `nodes_` member on move.
PacketSetManager(const PacketSetManager&) = delete;
PacketSetManager& operator=(const PacketSetManager&) = delete;
PacketSetManager(PacketSetManager&&) = default;
PacketSetManager& operator=(PacketSetManager&&) = default;
PacketSetManager(PacketSetManager&&);
PacketSetManager& operator=(PacketSetManager&&);

// Returns true iff this packet set represents the empty set of packets.
bool IsEmptySet(PacketSetHandle packet_set) const;
Expand Down Expand Up @@ -343,24 +367,90 @@ class PacketSetManager {

[[nodiscard]] std::string ToString(const DecisionNode& node) const;

// The page size of the `nodes_` vector: 64 MiB or ~ 67 MB.
// Chosen large enough to reduce the cost of dynamic allocation, and small
// enough to avoid excessive memory overhead.
static constexpr size_t kPageSize = (1 << 26) / sizeof(DecisionNode);

// The decision nodes forming the BDD-style DAG representation of packet sets.
// `PacketSetHandle::node_index_` indexes into this vector.
// The page size of the per-field node arenas: 16 KiB per page.
// Chosen large enough to amortize the cost of dynamic allocation over
// hundreds of nodes, and small enough that (a) models touching many fields
// don't pay a large per-field memory overhead (each non-empty arena
// allocates at least one page), and (b) pages stay below the malloc
// mmap/trim thresholds (typically 128 KiB), so short-lived managers recycle
// pages through the allocator's freelists instead of syscalls.
static constexpr size_t kPageSize = (1 << 14) / sizeof(DecisionNode);
using NodeArena = PagedStableVector<DecisionNode, kPageSize>;

// The decision nodes forming the BDD-style DAG representation of packet
// sets, stored by the field they branch on: a node with field index `f` and
// slot `s` (in the sense of `PacketSetHandle::bits_`) is
// `nodes_by_field_[f][s]`. Storing nodes by field clusters the nodes of a
// field together in memory; operations tend to process nodes field by
// field, so this improves locality.
//
// We use a custom vector class that provides pointer stability, allowing us
// to create new nodes while traversing the graph (e.g. during operations like
// `And`, `Or`, `Not`). The class also avoids expensive relocations.
PagedStableVector<DecisionNode, kPageSize> nodes_;

// A so called "unique table" to ensure each node is only added to `nodes_`
// once, and thus has a unique `PacketSetHandle::node_index`.
// We use a custom vector class for the arenas that provides pointer
// stability, allowing us to create new nodes while traversing the graph
// (e.g. during operations like `And`, `Or`, `Not`). The class also avoids
// expensive relocations.
//
// INVARIANT: `nodes_by_field_[f][s].field.index_ == f`.
std::vector<NodeArena> nodes_by_field_;

// The location of `nodes_by_field_`, behind a level of indirection that
// remains stable when the manager object is moved: the unique tables below
// hash and compare node slots by dereferencing into `nodes_by_field_`, and
// their hasher/equality functors would otherwise dangle on move. Move
// operations repoint the location at the new manager's `nodes_by_field_`
// member.
std::unique_ptr<const std::vector<NodeArena>*> nodes_location_ =
std::make_unique<const std::vector<NodeArena>*>(&nodes_by_field_);

// Hasher and equality for unique table entries, which are slots in the
// node arena of the table's field. Hashing/comparing the *node* (rather
// than the slot) is what makes the tables deduplicate by node content.
// The `DecisionNode` overloads enable heterogeneous lookup, so that a
// candidate node can be probed before it is added to the arena.
struct InternedNodeHash {
using is_transparent = void;
const std::vector<NodeArena>* const* nodes_by_field;
uint32_t field_index;
size_t operator()(uint32_t slot) const {
return absl::HashOf((**nodes_by_field)[field_index][slot]);
}
size_t operator()(const DecisionNode& node) const {
return absl::HashOf(node);
}
};
struct InternedNodeEq {
using is_transparent = void;
const std::vector<NodeArena>* const* nodes_by_field;
uint32_t field_index;
bool operator()(uint32_t a, uint32_t b) const {
const NodeArena& arena = (**nodes_by_field)[field_index];
return arena[a] == arena[b];
}
bool operator()(uint32_t a, const DecisionNode& b) const {
return (**nodes_by_field)[field_index][a] == b;
}
bool operator()(const DecisionNode& a, uint32_t b) const {
return (**nodes_by_field)[field_index][b] == a;
}
};
using UniqueNodeTable =
absl::flat_hash_set<uint32_t, InternedNodeHash, InternedNodeEq>;

// So called "unique tables" to ensure each node is only added to
// `nodes_by_field_` once, and thus has a unique slot. One table per packet
// field: a node's entry lives in the table of the field it branches on.
// Splitting by field keeps the tables, and thus probe sequences, small, and
// storing slots (instead of node copies) keeps each node stored exactly
// once, in `nodes_by_field_`.
//
// INVARIANT: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`.
absl::flat_hash_map<DecisionNode, PacketSetHandle> packet_by_node_;
// INVARIANT: `unique_table_by_field_.size() == nodes_by_field_.size()`.
// INVARIANT: `unique_table_by_field_[f]` contains `s` iff
// `s < nodes_by_field_[f].size()`; no two nodes in an arena are equal.
std::vector<UniqueNodeTable> unique_table_by_field_;

// Returns the unique table for nodes branching on `field`, creating it and
// the field's node arena (as well as those of any smaller fields) if they
// don't exist yet.
UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field);

// A map of a given `PredicateProto` to a `PacketSetHandle`.
//
Expand Down
Loading