Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions netkat/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ cc_library(
"@com_google_absl//absl/container:fixed_array",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
Expand Down Expand Up @@ -376,6 +377,7 @@ cc_library(
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/functional:any_invocable",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
Expand Down
5 changes: 5 additions & 0 deletions netkat/packet_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ class [[nodiscard]] PacketFieldHandle {
// 2^16 ~= 65k fields.
uint16_t index_;
explicit PacketFieldHandle(uint16_t index) : index_(index) {}

// `PacketSetManager` and `PacketTransformerManager` organize their node
// storage by field, using `index_` to address the per-field data structures.
friend class PacketSetManager;
friend class PacketTransformerManager;
};

// Protect against regressions in the memory layout, as it affects performance.
Expand Down
83 changes: 66 additions & 17 deletions netkat/packet_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,25 @@ std::string PacketSetHandle::ToString() const {
}
}

PacketSetManager::PacketSetManager(PacketSetManager&& other)
: nodes_(std::move(other.nodes_)),
nodes_location_(std::move(other.nodes_location_)),
unique_table_by_field_(std::move(other.unique_table_by_field_)),
packet_set_by_hash_(std::move(other.packet_set_by_hash_)),
field_manager_(std::move(other.field_manager_)) {
*nodes_location_ = &nodes_;
}

PacketSetManager& PacketSetManager::operator=(PacketSetManager&& other) {
nodes_ = std::move(other.nodes_);
nodes_location_ = std::move(other.nodes_location_);
unique_table_by_field_ = std::move(other.unique_table_by_field_);
packet_set_by_hash_ = std::move(other.packet_set_by_hash_);
field_manager_ = std::move(other.field_manager_);
*nodes_location_ = &nodes_;
return *this;
}

PacketSetHandle PacketSetManager::EmptySet() const {
return PacketSetHandle(SentinelNodeIndex::kEmptySet);
}
Expand Down Expand Up @@ -109,16 +128,33 @@ PacketSetHandle PacketSetManager::NodeToPacket(DecisionNode&& node) {
}
#endif

auto [it, inserted] =
packet_by_node_.try_emplace(node, PacketSetHandle(nodes_.size()));
if (inserted) {
nodes_.push_back(std::move(node));
LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel)
<< "Internal invariant violated: Proper and sentinel node indices must "
"be disjoint. This indicates that we allocated more nodes than are "
"supported (> 2^32 - 2).";
// Probe the unique table by node content first (heterogeneous lookup): the
// common case is that an equal node has already been interned, and probing
// with the candidate node avoids touching `nodes_` in that case.
UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field);
if (auto it = unique_table.find(node); it != unique_table.end()) {
return PacketSetHandle(*it);
}
return it->second;
uint32_t node_index = nodes_.size();
nodes_.push_back(std::move(node));
unique_table.insert(node_index);
LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel)
<< "Internal invariant violated: Proper and sentinel node indices must "
"be disjoint. This indicates that we allocated more nodes than are "
"supported (> 2^32 - 2).";
return PacketSetHandle(node_index);
}

PacketSetManager::UniqueNodeTable& PacketSetManager::GetOrCreateUniqueTable(
PacketFieldHandle field) {
if (field.index_ >= unique_table_by_field_.size()) {
unique_table_by_field_.resize(
field.index_ + 1,
UniqueNodeTable(/*bucket_count=*/0,
InternedNodeHash{nodes_location_.get()},
InternedNodeEq{nodes_location_.get()}));
}
return unique_table_by_field_[field.index_];
}

bool PacketSetManager::Contains(PacketSetHandle packet_set,
Expand Down Expand Up @@ -483,16 +519,29 @@ absl::Status PacketSetManager::CheckInternalInvariants() const {
// Invariant: Proper and sentinel node indices are disjoint.
RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel);

// Invariant: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`.
for (const auto& [node, packet] : packet_by_node_) {
RET_CHECK(packet.node_index_ < nodes_.size());
RET_CHECK(nodes_[packet.node_index_] == node);
// Invariant: `unique_table_by_field_[f]` contains `i` iff
// `nodes_[i].field.index_ == f`. Every valid node index is in exactly one
// table, and no two interned nodes are equal.
size_t total_table_size = 0;
for (size_t f = 0; f < unique_table_by_field_.size(); ++f) {
const UniqueNodeTable& unique_table = unique_table_by_field_[f];
total_table_size += unique_table.size();
for (uint32_t node_index : unique_table) {
RET_CHECK(node_index < nodes_.size());
RET_CHECK(nodes_[node_index].field.index_ == f);
}
}
for (int i = 0; i < nodes_.size(); ++i) {
RET_CHECK(total_table_size == nodes_.size());
for (uint32_t i = 0; i < nodes_.size(); ++i) {
const DecisionNode& node = nodes_[i];
auto it = packet_by_node_.find(node);
RET_CHECK(it != packet_by_node_.end());
RET_CHECK(it->second == PacketSetHandle(i));
RET_CHECK(node.field.index_ < unique_table_by_field_.size());
const UniqueNodeTable& unique_table =
unique_table_by_field_[node.field.index_];
// Looking up `i` probes by node content; finding exactly `i` proves that
// no other interned node has the same content.
auto it = unique_table.find(i);
RET_CHECK(it != unique_table.end());
RET_CHECK(*it == i);
}

// Node Invariants.
Expand Down
68 changes: 62 additions & 6 deletions netkat/packet_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,15 @@

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/container/fixed_array.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
Expand Down Expand Up @@ -139,10 +142,13 @@ class PacketSetManager {
PacketSetManager() = default;

// The class is move-only: not copyable, but movable.
// Moves are implemented manually (in the cc file) because the unique tables
// reference `nodes_` through `nodes_location_`, which must be repointed at
// the new `nodes_` member on move.
PacketSetManager(const PacketSetManager&) = delete;
PacketSetManager& operator=(const PacketSetManager&) = delete;
PacketSetManager(PacketSetManager&&) = default;
PacketSetManager& operator=(PacketSetManager&&) = default;
PacketSetManager(PacketSetManager&&);
PacketSetManager& operator=(PacketSetManager&&);

// Returns true iff this packet set represents the empty set of packets.
bool IsEmptySet(PacketSetHandle packet_set) const;
Expand Down Expand Up @@ -356,11 +362,61 @@ class PacketSetManager {
// `And`, `Or`, `Not`). The class also avoids expensive relocations.
PagedStableVector<DecisionNode, kPageSize> nodes_;

// A so called "unique table" to ensure each node is only added to `nodes_`
// once, and thus has a unique `PacketSetHandle::node_index`.
// The location of `nodes_`, behind a level of indirection that remains
// stable when the manager object is moved: the unique tables below hash and
// compare node indices by dereferencing into `nodes_`, and their
// hasher/equality functors would otherwise dangle on move. Move operations
// repoint the location at the new manager's `nodes_` member.
std::unique_ptr<const PagedStableVector<DecisionNode, kPageSize>*>
nodes_location_ = std::make_unique<
const PagedStableVector<DecisionNode, kPageSize>*>(&nodes_);

// Hasher and equality for unique table entries, which are indices into
// `nodes_`. Hashing/comparing the *node* (rather than the index) is what
// makes the tables deduplicate by node content.
// The `DecisionNode` overloads enable heterogeneous lookup, so that a
// candidate node can be probed before it is added to `nodes_`.
struct InternedNodeHash {
using is_transparent = void;
const PagedStableVector<DecisionNode, kPageSize>* const* nodes;
size_t operator()(uint32_t node_index) const {
return absl::HashOf((**nodes)[node_index]);
}
size_t operator()(const DecisionNode& node) const {
return absl::HashOf(node);
}
};
struct InternedNodeEq {
using is_transparent = void;
const PagedStableVector<DecisionNode, kPageSize>* const* nodes;
bool operator()(uint32_t a, uint32_t b) const {
return (**nodes)[a] == (**nodes)[b];
}
bool operator()(uint32_t a, const DecisionNode& b) const {
return (**nodes)[a] == b;
}
bool operator()(const DecisionNode& a, uint32_t b) const {
return (**nodes)[b] == a;
}
};
using UniqueNodeTable =
absl::flat_hash_set<uint32_t, InternedNodeHash, InternedNodeEq>;

// So called "unique tables" to ensure each node is only added to `nodes_`
// once, and thus has a unique `PacketSetHandle::node_index`. One table per
// packet field: a node's entry lives in the table of the field it branches
// on. Splitting by field keeps the tables, and thus probe sequences, small,
// and storing indices (instead of node copies) keeps each node stored
// exactly once, in `nodes_`.
//
// INVARIANT: `packet_by_node_[n] = s` iff `nodes_[s.node_index_] == n`.
absl::flat_hash_map<DecisionNode, PacketSetHandle> packet_by_node_;
// INVARIANT: `unique_table_by_field_[f]` contains `i` iff
// `nodes_[i].field.index_ == f`. Every valid node index is contained in
// exactly one table.
std::vector<UniqueNodeTable> unique_table_by_field_;

// Returns the unique table for nodes branching on `field`, creating it (and
// any tables for smaller fields) if it does not exist yet.
UniqueNodeTable& GetOrCreateUniqueTable(PacketFieldHandle field);

// A map of a given `PredicateProto` to a `PacketSetHandle`.
//
Expand Down
86 changes: 68 additions & 18 deletions netkat/packet_transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@ enum SentinelNodeIndex : uint32_t {
PacketTransformerHandle::PacketTransformerHandle()
: node_index_(SentinelNodeIndex::kDeny) {}

PacketTransformerManager::PacketTransformerManager(
PacketTransformerManager&& other)
: nodes_(std::move(other.nodes_)),
nodes_location_(std::move(other.nodes_location_)),
unique_table_by_field_(std::move(other.unique_table_by_field_)),
transformer_by_hash_(std::move(other.transformer_by_hash_)),
packet_set_manager_(std::move(other.packet_set_manager_)) {
*nodes_location_ = &nodes_;
}

PacketTransformerManager& PacketTransformerManager::operator=(
PacketTransformerManager&& other) {
nodes_ = std::move(other.nodes_);
nodes_location_ = std::move(other.nodes_location_);
unique_table_by_field_ = std::move(other.unique_table_by_field_);
transformer_by_hash_ = std::move(other.transformer_by_hash_);
packet_set_manager_ = std::move(other.packet_set_manager_);
*nodes_location_ = &nodes_;
return *this;
}

std::string PacketTransformerHandle::ToString() const {
if (node_index_ == SentinelNodeIndex::kDeny) {
return "PacketTransformerHandle<deny>";
Expand Down Expand Up @@ -172,16 +193,33 @@ PacketTransformerHandle PacketTransformerManager::NodeToTransformer(
node.default_branch_by_field_modification.empty())
return node.default_branch;

auto [it, inserted] = transformer_by_node_.try_emplace(
node, PacketTransformerHandle(nodes_.size()));
if (inserted) {
nodes_.push_back(std::move(node));
LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel)
<< "Internal invariant violated: Proper and sentinel node indices must "
"be disjoint. This indicates that we allocated more nodes than are "
"supported (> 2^32 - 2).";
// Probe the unique table by node content first (heterogeneous lookup): the
// common case is that an equal node has already been interned, and probing
// with the candidate node avoids touching `nodes_` in that case.
UniqueNodeTable& unique_table = GetOrCreateUniqueTable(node.field);
if (auto it = unique_table.find(node); it != unique_table.end()) {
return PacketTransformerHandle(*it);
}
return it->second;
uint32_t node_index = nodes_.size();
nodes_.push_back(std::move(node));
unique_table.insert(node_index);
LOG_IF(DFATAL, nodes_.size() > SentinelNodeIndex::kMinSentinel)
<< "Internal invariant violated: Proper and sentinel node indices must "
"be disjoint. This indicates that we allocated more nodes than are "
"supported (> 2^32 - 2).";
return PacketTransformerHandle(node_index);
}

PacketTransformerManager::UniqueNodeTable&
PacketTransformerManager::GetOrCreateUniqueTable(PacketFieldHandle field) {
if (field.index_ >= unique_table_by_field_.size()) {
unique_table_by_field_.resize(
field.index_ + 1,
UniqueNodeTable(/*bucket_count=*/0,
InternedNodeHash{nodes_location_.get()},
InternedNodeEq{nodes_location_.get()}));
}
return unique_table_by_field_[field.index_];
}

bool PacketTransformerManager::IsDeny(
Expand Down Expand Up @@ -1104,17 +1142,29 @@ absl::Status PacketTransformerManager::CheckInternalInvariants() const {
// Invariant: Proper and sentinel node indices are disjoint.
RET_CHECK(nodes_.size() <= SentinelNodeIndex::kMinSentinel);

// Invariant: `transformer_by_node_[n] = s` iff `nodes_[s.node_index_] ==
// n`.
for (const auto& [node, transformer] : transformer_by_node_) {
RET_CHECK(transformer.node_index_ < nodes_.size());
RET_CHECK(nodes_[transformer.node_index_] == node);
// Invariant: `unique_table_by_field_[f]` contains `i` iff
// `nodes_[i].field.index_ == f`. Every valid node index is in exactly one
// table, and no two interned nodes are equal.
size_t total_table_size = 0;
for (size_t f = 0; f < unique_table_by_field_.size(); ++f) {
const UniqueNodeTable& unique_table = unique_table_by_field_[f];
total_table_size += unique_table.size();
for (uint32_t node_index : unique_table) {
RET_CHECK(node_index < nodes_.size());
RET_CHECK(nodes_[node_index].field.index_ == f);
}
}
for (int i = 0; i < nodes_.size(); ++i) {
RET_CHECK(total_table_size == nodes_.size());
for (uint32_t i = 0; i < nodes_.size(); ++i) {
const DecisionNode& node = nodes_[i];
auto it = transformer_by_node_.find(node);
RET_CHECK(it != transformer_by_node_.end());
RET_CHECK(it->second == PacketTransformerHandle(i));
RET_CHECK(node.field.index_ < unique_table_by_field_.size());
const UniqueNodeTable& unique_table =
unique_table_by_field_[node.field.index_];
// Looking up `i` probes by node content; finding exactly `i` proves that
// no other interned node has the same content.
auto it = unique_table.find(i);
RET_CHECK(it != unique_table.end());
RET_CHECK(*it == i);
}

// Node Invariants.
Expand Down
Loading