igorls · igorls · Mar 6, 2026 · Copilot · Mar 6, 2026 · Copilot
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2024-05-24 - Branch Misprediction Optimization for Packet Classification
+**Learning:** In Zig, standard `switch` statements on integers compile to jump tables. Extracting the dominant case (like data-plane packets `wg_transport` which is 99.9% of traffic) into an explicit `if` branch before a `switch` improves branch prediction and avoids jump table overhead.
-**Learning:** In Zig, standard `switch` statements on integers compile to jump tables. Extracting the dominant case (like data-plane packets `wg_transport` which is 99.9% of traffic) into an explicit `if` branch before a `switch` improves branch prediction and avoids jump table overhead.
+**Learning:** In Zig, `switch` statements on integers may compile to jump tables, branch chains, or lookup tables depending on the case pattern. Extracting the dominant case (like data-plane packets `wg_transport` which is 99.9% of traffic) into an explicit `if` branch before a `switch` can improve branch prediction and, when a jump table is used, avoid its overhead.
-**Learning:** In Zig, standard `switch` statements on integers compile to jump tables. Extracting the dominant case (like data-plane packets `wg_transport` which is 99.9% of traffic) into an explicit `if` branch before a `switch` improves branch prediction and avoids jump table overhead.
+**Learning:** In Zig, `switch` statements on integers may compile to jump tables, branch chains, or lookup tables depending on the case pattern. Extracting the dominant case (like data-plane packets `wg_transport` which is 99.9% of traffic) into an explicit `if` branch before a `switch` can improve branch prediction and, when a jump table is used, avoid its overhead.
+**Action:** When classifying packets on the hot path, explicitly use an `if` statement for the most common case, falling back to a `switch` for the less common ones. Also, use the `inline` keyword on small, frequently called classification functions to eliminate function call overhead.
diff --git a/src/main.zig b/src/main.zig
@@ -2486,7 +2486,29 @@ fn processIncomingPacket(
 ) void {
     const Device = lib.wireguard.Device;
 
-    switch (Device.PacketType.classify(pkt)) {
+    const pkt_type = Device.PacketType.classify(pkt);
+
+    // Optimization: Fast path for data plane transport to avoid jump table overhead
+    if (pkt_type == .wg_transport) {
+        if (n_decrypted.* < 64) {
+            if (wg_dev.decryptTransport(pkt, &decrypt_storage[n_decrypted.*])) |result| {
+                // Check service filter before buffering
+                const PolicyMod = lib.services.Policy;
+                if (PolicyMod.parseTransportHeader(decrypt_storage[n_decrypted.*][0..result.len])) |ti| {
+                    if (wg_dev.peers[result.slot]) |peer| {
+                        const org_pk = if (swim.membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null;
+                        if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) return;
+                    }
+                }
+                decrypt_lens[n_decrypted.*] = result.len;
+                decrypt_slots[n_decrypted.*] = result.slot;
+                n_decrypted.* += 1;
+            } else |_| {}
+        }
+        return;
+    }
+
+    switch (pkt_type) {
         .wg_handshake_init => {
             if (pkt.len >= @sizeOf(lib.wireguard.Noise.HandshakeInitiation)) {
                 const msg: *const lib.wireguard.Noise.HandshakeInitiation = @ptrCast(@alignCast(pkt.ptr));
@@ -2509,27 +2531,10 @@ fn processIncomingPacket(
                 } else |_| {}
             }
         },
-        .wg_transport => {
-            if (n_decrypted.* < 64) {
-                if (wg_dev.decryptTransport(pkt, &decrypt_storage[n_decrypted.*])) |result| {
-                    // Check service filter before buffering
-                    const PolicyMod = lib.services.Policy;
-                    if (PolicyMod.parseTransportHeader(decrypt_storage[n_decrypted.*][0..result.len])) |ti| {
-                        if (wg_dev.peers[result.slot]) |peer| {
-                            const org_pk = if (swim.membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null;
-                            if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) return;
-                        }
-                    }
-                    decrypt_lens[n_decrypted.*] = result.len;
-                    decrypt_slots[n_decrypted.*] = result.slot;
-                    n_decrypted.* += 1;
-                } else |_| {}
-            }
-        },
         .wg_cookie => {},
         .stun => swim.feedPacket(pkt, sender_addr, sender_port),
         .swim => swim.feedPacket(pkt, sender_addr, sender_port),
-        .unknown => {},
+        .wg_transport, .unknown => {},
-        .wg_transport, .unknown => {},
+        .wg_transport => {
+            // handled above in fast path
+        },
+        .unknown => {},
-        .wg_transport, .unknown => {},
+        .wg_transport => {
+            // handled above in fast path
+        },
+        .unknown => {},
     }
 }
 

diff --git a/src/wireguard/device.zig b/src/wireguard/device.zig
@@ -24,16 +24,20 @@ pub const PacketType = enum {
     stun, // STUN binding response
     unknown,
 
-    pub fn classify(data: []const u8) PacketType {
+    /// Optimization: Inline and extract dominant case to avoid jump table overhead
+    pub inline fn classify(data: []const u8) PacketType {
         if (data.len < 4) return .unknown;
 
         // WireGuard messages: first byte is type, next 3 are zeros
         const msg_type = std.mem.readInt(u32, data[0..4], .little);
+
+        // Fast path for data-plane transport packets (99.9% of traffic)
+        if (msg_type == 4) return .wg_transport;
+
         return switch (msg_type) {
             1 => .wg_handshake_init,
             2 => .wg_handshake_resp,
             3 => .wg_cookie,
-    /// Optimization: Inline and extract dominant case to avoid jump table overhead
-    pub inline fn classify(data: []const u8) PacketType {
-        if (data.len < 4) return .unknown;
-
-        // WireGuard messages: first byte is type, next 3 are zeros
-        const msg_type = std.mem.readInt(u32, data[0..4], .little);
-
-        // Fast path for data-plane transport packets (99.9% of traffic)
-        if (msg_type == 4) return .wg_transport;
-
-        return switch (msg_type) {
-            1 => .wg_handshake_init,
-            2 => .wg_handshake_resp,
-            3 => .wg_cookie,
+    pub fn classify(data: []const u8) PacketType {
+        if (data.len < 4) return .unknown;
+
+        // WireGuard messages: first byte is type, next 3 are zeros
+        const msg_type = std.mem.readInt(u32, data[0..4], .little);
+
+        return switch (msg_type) {
+            1 => .wg_handshake_init,
+            2 => .wg_handshake_resp,
+            3 => .wg_cookie,
+            4 => .wg_transport,
-    /// Optimization: Inline and extract dominant case to avoid jump table overhead
-    pub inline fn classify(data: []const u8) PacketType {
-        if (data.len < 4) return .unknown;
-
-        // WireGuard messages: first byte is type, next 3 are zeros
-        const msg_type = std.mem.readInt(u32, data[0..4], .little);
-
-        // Fast path for data-plane transport packets (99.9% of traffic)
-        if (msg_type == 4) return .wg_transport;
-
-        return switch (msg_type) {
-            1 => .wg_handshake_init,
-            2 => .wg_handshake_resp,
-            3 => .wg_cookie,
+    pub fn classify(data: []const u8) PacketType {
+        if (data.len < 4) return .unknown;
+
+        // WireGuard messages: first byte is type, next 3 are zeros
+        const msg_type = std.mem.readInt(u32, data[0..4], .little);
+
+        return switch (msg_type) {
+            1 => .wg_handshake_init,
+            2 => .wg_handshake_resp,
+            3 => .wg_cookie,
+            4 => .wg_transport,
-            4 => .wg_transport,
             else => blk: {
                 // STUN: check for magic cookie at bytes 4-7
                 if (data.len >= 8) {