diff --git a/docs/graph.svg b/docs/graph.svg index 9828f7778..59d85c29c 100644 --- a/docs/graph.svg +++ b/docs/graph.svg @@ -4,831 +4,885 @@ - - - + + + bond_output - -bond_output + +bond_output port_output - -port_output + +port_output bond_output->port_output - - + + iface_input - -iface_input + +iface_input xconnect - -xconnect + +xconnect iface_input->xconnect - - + + eth_input - -eth_input + +eth_input iface_input->eth_input - - + + + + + +bridge_input + +bridge_input + + + +iface_input->bridge_input + + iface_output - -iface_output + +iface_output - + iface_output->bond_output - - + + - + iface_output->port_output - - + + + + + +iface_output->bridge_input + + port_tx - -port_tx + +port_tx - + port_output->port_tx - - + + port_rx - -port_rx + +port_rx - + port_rx->iface_input - - + + - + xconnect->port_output - - + + lacp_input - -lacp_input + +lacp_input eth_input->lacp_input - - + + snap_input - -snap_input + +snap_input eth_input->snap_input - - + + arp_input - -arp_input + +arp_input eth_input->arp_input - - + + - + ip_input - -ip_input + +ip_input eth_input->ip_input - - + + - + ip6_input - -ip6_input + +ip6_input eth_input->ip6_input - - + + eth_output - -eth_output + +eth_output eth_output->iface_output - - + + l2_redirect - -l2_redirect + +l2_redirect lacp_output - -lacp_output + +lacp_output - + lacp_output->eth_output - - + + - + snap_input->l2_redirect - - + + arp_input_reply - -arp_input_reply + +arp_input_reply - + arp_input->arp_input_reply - - + + arp_input_request - -arp_input_request + +arp_input_request - + arp_input->arp_input_request - - + + arp_output_reply - -arp_output_reply + +arp_output_reply - + arp_output_reply->eth_output - - + + arp_output_request - -arp_output_request + +arp_output_request - + arp_output_request->eth_output - - + + - + +bridge_flood + +bridge_flood + + + +bridge_flood->iface_input + + + + + +bridge_flood->iface_output + + + + + +bridge_input->iface_input + + + + + +bridge_input->iface_output + + + + + +bridge_input->bridge_flood + + + + + ospf_redirect - -ospf_redirect + +ospf_redirect - + ospf_redirect->l2_redirect - - + + - + loopback_input - -loopback_input + +loopback_input - + loopback_input->ip_input - - + + - + loopback_input->ip6_input - - + + - + loopback_output - -loopback_output + +loopback_output - + xvrf - -xvrf + +xvrf - + xvrf->ip_input - - + + - + xvrf->ip6_input - - + + - + ip_forward - -ip_forward + +ip_forward - + ip_output - -ip_output + +ip_output - + ip_forward->ip_output - - + + - + ip_fragment - -ip_fragment + +ip_fragment - + ip_fragment->ip_output - - + + - + ip_hold - -ip_hold + +ip_hold - + ip_input->ip_forward - - + + - + ip_input_local - -ip_input_local + +ip_input_local - + ip_input->ip_input_local - - + + - + ip_input->ip_output - - + + - + dnat44_dynamic - -dnat44_dynamic + +dnat44_dynamic - + ip_input->dnat44_dynamic - - + + - + dnat44_static - -dnat44_static + +dnat44_static - + ip_input->dnat44_static - - + + - + ip_loadbalance - -ip_loadbalance + +ip_loadbalance - + ip_loadbalance->ip_output - - + + - + ip_input_local->ospf_redirect - - + + - + ipip_input - -ipip_input + +ipip_input - + ip_input_local->ipip_input - - + + - + icmp_input - -icmp_input + +icmp_input - + ip_input_local->icmp_input - - + + - + l4_input_local - -l4_input_local + +l4_input_local - + ip_input_local->l4_input_local - - + + - + ip_output->eth_output - - + + - + ip_output->xvrf - - + + - + ip_output->ip_fragment - - + + - + ip_output->ip_hold - - + + - + ip_output->ip_loadbalance - - + + - + ipip_output - -ipip_output + +ipip_output - + ip_output->ipip_output - - + + - + sr6_output - -sr6_output + +sr6_output - + ip_output->sr6_output - - + + - + ip6_forward - -ip6_forward + +ip6_forward - + ip6_output - -ip6_output + +ip6_output - + ip6_forward->ip6_output - - + + - + ip6_hold - -ip6_hold + +ip6_hold - + ip6_input->ip6_forward - - + + - + ip6_input_local - -ip6_input_local + +ip6_input_local - + ip6_input->ip6_input_local - - + + - + ip6_input->ip6_output - - + + - + sr6_local - -sr6_local + +sr6_local - + ip6_input->sr6_local - - + + - + ip6_loadbalance - -ip6_loadbalance + +ip6_loadbalance - + ip6_loadbalance->ip6_output - - + + - + ip6_input_local->ospf_redirect - - + + - + icmp6_input - -icmp6_input + +icmp6_input - + ip6_input_local->icmp6_input - - + + - + ip6_input_local->l4_input_local - - + + - + ip6_output->eth_output - - + + - + ip6_output->xvrf - - + + - + ip6_output->ip6_hold - - + + - + ip6_output->ip6_loadbalance - - + + - + ip6_output->sr6_output - - + + - + ipip_input->ip_input - - + + - + ipip_output->ip_output - - + + - + dnat44_dynamic->ip_forward - - + + - + dnat44_dynamic->ip_input_local - - + + - + dnat44_static->ip_forward - - + + - + dnat44_static->ip_input_local - - + + - + sr6_local->ip_input - - + + - + sr6_local->ip6_input - - + + - + sr6_local->ip6_input_local - - + + - + sr6_output->ip6_output - - + + - + icmp_output - -icmp_output + +icmp_output - + icmp_input->icmp_output - - + + - + icmp_local_send - -icmp_local_send + +icmp_local_send - + icmp_local_send->icmp_output - - + + - + icmp_output->ip_output - - + + - + icmp6_output - -icmp6_output + +icmp6_output - + icmp6_input->icmp6_output - - + + - + ndp_na_input - -ndp_na_input + +ndp_na_input - + icmp6_input->ndp_na_input - - + + - + ndp_ns_input - -ndp_ns_input + +ndp_ns_input - + icmp6_input->ndp_ns_input - - + + - + ndp_rs_input - -ndp_rs_input + +ndp_rs_input - + icmp6_input->ndp_rs_input - - + + - + icmp6_local_send - -icmp6_local_send + +icmp6_local_send - + icmp6_local_send->icmp6_output - - + + - + icmp6_output->ip6_output - - + + - + ndp_na_output - -ndp_na_output + +ndp_na_output - + ndp_na_output->icmp6_output - - + + - + ndp_ns_output - -ndp_ns_output + +ndp_ns_output - + ndp_ns_output->icmp6_output - - + + - + l4_loopback_output - -l4_loopback_output + +l4_loopback_output - + l4_input_local->l4_loopback_output - - + + - + dhcp_input - -dhcp_input + +dhcp_input - + l4_input_local->dhcp_input - - + + - + l4_loopback_output->loopback_output - - + + diff --git a/docs/meson.build b/docs/meson.build index 35f3a2ba5..478f5ef41 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -74,9 +74,10 @@ custom_target( # Individual command man pages # The list is hardcoded since we can't run grcli during meson configuration. grcli_commands = [ - 'address', 'affinity', 'conntrack', 'dnat44', 'events', 'graph', 'interface', - 'logging', 'nexthop', 'ping', 'ping6', 'route', 'router-advert', 'snat44', - 'stats', 'trace', 'traceroute', 'traceroute6', 'tunsrc', + 'address', 'affinity', 'conntrack', 'dnat44', 'events', 'fdb', 'graph', + 'interface', 'logging', 'nexthop', 'ping', 'ping6', 'route', + 'router-advert', 'snat44', 'stats', 'trace', 'traceroute', 'traceroute6', + 'tunsrc', ] foreach cmd : grcli_commands diff --git a/modules/infra/control/control_queue.c b/main/control_queue.c similarity index 86% rename from modules/infra/control/control_queue.c rename to main/control_queue.c index 4922a8901..24a62b86d 100644 --- a/modules/infra/control/control_queue.c +++ b/main/control_queue.c @@ -2,14 +2,12 @@ // Copyright (c) 2024 Christophe Fontaine #include -#include -#include #include #include #include -#include #include +#include #include #include @@ -88,23 +86,11 @@ static void *sem_wait_to_event(void *) { return NULL; } -// When interfaces or nexthops are deleted, drain the control queue -// to free any packets that reference the deleted object. This prevents -// callbacks from being invoked with dangling pointers. -static void event_handler(uint32_t event, const void *obj) { +void control_queue_drain(uint32_t event, const void *obj) { struct control_queue_drain drain = {event, obj}; control_queue_poll(0, 0, &drain); } -static struct gr_event_subscription event_sub = { - .callback = event_handler, - .ev_count = 2, - .ev_types = { - GR_EVENT_IFACE_REMOVE, - GR_EVENT_NEXTHOP_DELETE, - }, -}; - static void control_queue_init(struct event_base *ev_base) { atomic_init(&thread_shutdown, false); @@ -146,5 +132,4 @@ static struct gr_module module = { RTE_INIT(control_queue_module_init) { gr_register_module(&module); - gr_event_subscribe(&event_sub); } diff --git a/main/event.c b/main/event.c index 7f6aabe42..ded4dbed5 100644 --- a/main/event.c +++ b/main/event.c @@ -4,10 +4,13 @@ #include "api.h" #include +#include #include #include #include +#include + #include STAILQ_HEAD(subscribers, gr_event_subscription); @@ -17,7 +20,7 @@ void gr_event_subscribe(struct gr_event_subscription *sub) { STAILQ_INSERT_TAIL(&subscribers, sub, next); } -void gr_event_push(uint32_t ev_type, const void *obj) { +static void notify_subscribers(void *obj, uintptr_t ev_type, const struct control_queue_drain *) { const struct gr_event_subscription *sub; STAILQ_FOREACH (sub, &subscribers, next) { @@ -28,9 +31,24 @@ void gr_event_push(uint32_t ev_type, const void *obj) { } } } + api_send_notifications(ev_type, obj); } +void gr_event_push(uint32_t ev_type, const void *obj) { + if (rte_lcore_has_role(rte_lcore_id(), ROLE_NON_EAL)) { + // Called from a dataplane worker thread. + // Defer the notification to the control plane thread. + if (control_queue_push(notify_subscribers, (void *)obj, ev_type) < 0) { + // XXX: add error stat if push fails? + } + } else { + // Called from the control plane thread. + // Notify subscribers immediately. + notify_subscribers((void *)obj, ev_type, NULL); + } +} + STAILQ_HEAD(serializers, gr_event_serializer); static struct serializers serializers = STAILQ_HEAD_INITIALIZER(serializers); diff --git a/modules/infra/control/gr_control_queue.h b/main/gr_control_queue.h similarity index 88% rename from modules/infra/control/gr_control_queue.h rename to main/gr_control_queue.h index 81cab3446..6c701f4da 100644 --- a/modules/infra/control/gr_control_queue.h +++ b/main/gr_control_queue.h @@ -13,6 +13,10 @@ struct control_queue_drain { const void *obj; // Object being deleted }; +// Force drain the control queue from all items. +// Pass ev_type and deleted_obj to item callbacks so that they can ignore/free references. +void control_queue_drain(uint32_t ev_type, const void *deleted_obj); + // Callback definition to pass arbitrary data to be processed by the control plane event loop. // It is up to the function to free any data referenced by the pointer if necessary. // diff --git a/main/meson.build b/main/meson.build index 8086eb988..e02364094 100644 --- a/main/meson.build +++ b/main/meson.build @@ -3,6 +3,7 @@ src += files( 'api.c', + 'control_queue.c', 'dpdk.c', 'event.c', 'main.c', diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index 90ecb247c..88c6df8c1 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -21,6 +21,7 @@ typedef enum : uint8_t { GR_IFACE_TYPE_VLAN, GR_IFACE_TYPE_IPIP, GR_IFACE_TYPE_BOND, + GR_IFACE_TYPE_BRIDGE, GR_IFACE_TYPE_COUNT } gr_iface_type_t; @@ -56,6 +57,7 @@ typedef enum : uint8_t { GR_IFACE_MODE_VRF = 0, GR_IFACE_MODE_XC, GR_IFACE_MODE_BOND, + GR_IFACE_MODE_BRIDGE, GR_IFACE_MODE_COUNT } gr_iface_mode_t; @@ -433,8 +435,6 @@ struct gr_infra_cpu_affinity_set_req { // Helper function to convert iface type enum to string static inline const char *gr_iface_type_name(gr_iface_type_t type) { switch (type) { - case GR_IFACE_TYPE_UNDEF: - return "undef"; case GR_IFACE_TYPE_VRF: return "vrf"; case GR_IFACE_TYPE_PORT: @@ -445,6 +445,9 @@ static inline const char *gr_iface_type_name(gr_iface_type_t type) { return "ipip"; case GR_IFACE_TYPE_BOND: return "bond"; + case GR_IFACE_TYPE_BRIDGE: + return "bridge"; + case GR_IFACE_TYPE_UNDEF: case GR_IFACE_TYPE_COUNT: break; } @@ -460,6 +463,8 @@ static inline const char *gr_iface_mode_name(gr_iface_mode_t mode) { return "XC"; case GR_IFACE_MODE_BOND: return "bond"; + case GR_IFACE_MODE_BRIDGE: + return "bridge"; case GR_IFACE_MODE_COUNT: break; } diff --git a/modules/infra/control/ctlplane.c b/modules/infra/control/ctlplane.c index a42b59a49..2d9bcf41c 100644 --- a/modules/infra/control/ctlplane.c +++ b/modules/infra/control/ctlplane.c @@ -206,7 +206,8 @@ static void iface_cp_poll(evutil_socket_t, short reason, void *ev_iface) { } } - mbuf_data(mbuf)->iface = iface; + iface_mbuf_data(mbuf)->iface = iface; + iface_mbuf_data(mbuf)->vlan_id = 0; if (post_to_stack(iface_output, mbuf) < 0) { LOG(ERR, "post_to_stack: %s", strerror(errno)); @@ -396,6 +397,7 @@ static void iface_event(uint32_t event, const void *obj) { case GR_IFACE_TYPE_PORT: case GR_IFACE_TYPE_VLAN: case GR_IFACE_TYPE_BOND: + case GR_IFACE_TYPE_BRIDGE: break; default: return; diff --git a/modules/infra/control/iface.c b/modules/infra/control/iface.c index 93311f08e..4e2eed60d 100644 --- a/modules/infra/control/iface.c +++ b/modules/infra/control/iface.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Robin Jarry #include +#include #include #include #include @@ -33,6 +34,7 @@ static bool iface_type_valid(gr_iface_type_t type) { case GR_IFACE_TYPE_VLAN: case GR_IFACE_TYPE_IPIP: case GR_IFACE_TYPE_BOND: + case GR_IFACE_TYPE_BRIDGE: return true; case GR_IFACE_TYPE_UNDEF: case GR_IFACE_TYPE_COUNT: @@ -586,10 +588,11 @@ int iface_destroy(struct iface *iface) { rte_rcu_qsbr_synchronize(gr_datapath_rcu(), RTE_QSBR_THRID_INVALID); - // Push IFACE_REMOVE event after RCU sync to ensure all datapath threads + // Drain the control queue after RCU sync to ensure all datapath threads // have seen that this iface is gone. At this point, only packets already - // in the control queue may still reference it. The event triggers - // a drain that frees those packets before type->fini() frees the iface. + // in the control queue may still reference it. + control_queue_drain(GR_EVENT_IFACE_REMOVE, iface); + gr_event_push(GR_EVENT_IFACE_REMOVE, iface); type = iface_type_get(iface->type); diff --git a/modules/infra/control/meson.build b/modules/infra/control/meson.build index 34642ecb9..15cd88d04 100644 --- a/modules/infra/control/meson.build +++ b/modules/infra/control/meson.build @@ -3,7 +3,6 @@ src += files( 'bond.c', - 'control_queue.c', 'ctlplane.c', 'graph.c', 'group_nexthop.c', diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index 32c274fe0..227c1b1cb 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2024 Robin Jarry +#include #include #include #include @@ -478,11 +479,11 @@ void nexthop_destroy(struct nexthop *nh) { rte_rcu_qsbr_synchronize(gr_datapath_rcu(), RTE_QSBR_THRID_INVALID); - // Push NEXTHOP_DELETE event after RCU sync to ensure all datapath + // Drain the control queue after RCU sync to ensure all datapath // threads have seen that this nexthop is gone. At this point, only // packets already in the control queue may still reference it. - // The event triggers a drain that frees those packets before we free - // the nexthop memory. + control_queue_drain(GR_EVENT_NEXTHOP_DELETE, nh); + if (nh->origin != GR_NH_ORIGIN_INTERNAL) gr_event_push(GR_EVENT_NEXTHOP_DELETE, nh); diff --git a/modules/infra/datapath/eth_output.c b/modules/infra/datapath/eth_output.c index 8e073192e..3b93ddc07 100644 --- a/modules/infra/datapath/eth_output.c +++ b/modules/infra/datapath/eth_output.c @@ -51,6 +51,8 @@ eth_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, eth->src_addr = src_mac; eth->ether_type = priv->ether_type; + iface_mbuf_data(mbuf)->vlan_id = 0; + edge = OUTPUT; next: if (gr_mbuf_is_traced(mbuf)) { diff --git a/modules/infra/datapath/iface_output.c b/modules/infra/datapath/iface_output.c index 8a20afae8..146419ebb 100644 --- a/modules/infra/datapath/iface_output.c +++ b/modules/infra/datapath/iface_output.c @@ -56,8 +56,8 @@ static uint16_t iface_output_process( void **objs, uint16_t nb_objs ) { - uint16_t iface_id, vlan_id; const struct iface *iface; + struct iface_mbuf_data *d; struct rte_mbuf *m; rte_edge_t edge; @@ -65,36 +65,33 @@ static uint16_t iface_output_process( for (uint16_t i = 0; i < nb_objs; i++) { m = objs[i]; - iface = mbuf_data(m)->iface; - iface_id = iface->id; + d = iface_mbuf_data(m); + iface = d->iface; if (iface->type == GR_IFACE_TYPE_VLAN) { const struct iface_info_vlan *vlan = iface_info_vlan(iface); - vlan_id = vlan->vlan_id; + d->vlan_id = vlan->vlan_id; iface = iface_from_id(vlan->parent_id); - } else { - vlan_id = 0; } if (gr_mbuf_is_traced(m)) { struct iface_output_trace_data *t = gr_mbuf_trace_add(m, node, sizeof(*t)); - t->iface_id = iface_id; - t->vlan_id = vlan_id; + t->iface_id = d->iface->id; + t->vlan_id = d->vlan_id; } if (iface == NULL) { edge = NO_PARENT; goto next; } - if (!(iface->flags & GR_IFACE_F_UP)) { + if (!(d->iface->flags & GR_IFACE_F_UP)) { edge = IFACE_DOWN; goto next; } - IFACE_STATS_INC(tx, m, iface); + IFACE_STATS_INC(tx, m, d->iface); - iface_mbuf_data(m)->iface = iface; - iface_mbuf_data(m)->vlan_id = vlan_id; + d->iface = iface; edge = iface_type_edges[iface->type]; next: rte_node_enqueue_x1(graph, node, edge, m); diff --git a/modules/l2/api/gr_l2.h b/modules/l2/api/gr_l2.h new file mode 100644 index 000000000..84f0c6dba --- /dev/null +++ b/modules/l2/api/gr_l2.h @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#pragma once + +#include +#include +#include +#include + +#include + +#define GR_L2_MODULE 0xbabe + +// Bridge configuration flags. +typedef enum : uint16_t { + GR_BRIDGE_F_NO_FLOOD = GR_BIT16(0), + GR_BRIDGE_F_NO_LEARN = GR_BIT16(1), +} gr_bridge_flags_t; + +#define GR_BRIDGE_MAX_MEMBERS 64 +#define GR_BRIDGE_DEFAULT_AGEING 300 + +// Bridge reconfiguration attribute flags. +#define GR_BRIDGE_SET_AGEING_TIME GR_BIT64(32) +#define GR_BRIDGE_SET_FLAGS GR_BIT64(33) +#define GR_BRIDGE_SET_MAC GR_BIT64(34) + +struct __gr_iface_info_bridge_base { + uint16_t ageing_time; // Learned MAC ageing time in seconds (0 = default) + gr_bridge_flags_t flags; + struct rte_ether_addr mac; // Randomly generated if not set explicitly. + uint16_t n_members; +}; + +// Info structure for GR_IFACE_TYPE_BRIDGE interfaces. +// Only port, VLAN and bond interfaces can be members. +// Members are reassigned to the default VRF when the bridge is destroyed. +struct gr_iface_info_bridge { + BASE(__gr_iface_info_bridge_base); + uint16_t members[GR_BRIDGE_MAX_MEMBERS]; // Interface IDs of bridge members. +}; + +// FDB (L2 Forwarding Database) management ///////////////////////////////////// + +// FDB entry flags. +typedef enum : uint8_t { + GR_FDB_F_STATIC = GR_BIT8(0), // User-configured, never aged out. + GR_FDB_F_LEARN = GR_BIT8(1), // Learned via local bridge. +} gr_fdb_flags_t; + +// Forwarding database entry associating a MAC+VLAN to a bridge member interface. +struct gr_fdb_entry { + uint16_t bridge_id; + struct rte_ether_addr mac; + uint16_t vlan_id; + uint16_t iface_id; // Updated automatically when a MAC moves between members. + gr_fdb_flags_t flags; + clock_t last_seen; // Refreshed on each datapath hit for learned entries. +}; + +enum { + GR_EVENT_FDB_ADD = EVENT_TYPE(GR_L2_MODULE, 0x0001), + GR_EVENT_FDB_DEL = EVENT_TYPE(GR_L2_MODULE, 0x0002), + GR_EVENT_FDB_UPDATE = EVENT_TYPE(GR_L2_MODULE, 0x0003), +}; + +// Add an FDB entry. The bridge_id is resolved from the member interface's domain. +// Entries without GR_FDB_F_STATIC are subject to ageing like learned entries. +#define GR_FDB_ADD REQUEST_TYPE(GR_L2_MODULE, 0x0001) + +struct gr_fdb_add_req { + struct gr_fdb_entry fdb; + bool exist_ok; // If true, update existing entry instead of returning EEXIST. +}; + +// struct gr_fdb_add_resp { }; + +// Delete an FDB entry by key. +#define GR_FDB_DEL REQUEST_TYPE(GR_L2_MODULE, 0x0002) + +struct gr_fdb_del_req { + uint16_t bridge_id; + struct rte_ether_addr mac; + uint16_t vlan_id; + bool missing_ok; // If true, ignore ENOENT. +}; + +// Flush FDB entries. All non-zero fields are ANDed as filters. +#define GR_FDB_FLUSH REQUEST_TYPE(GR_L2_MODULE, 0x0003) + +struct gr_fdb_flush_req { + uint16_t bridge_id; // GR_IFACE_ID_UNDEF to match all bridges. + struct rte_ether_addr mac; // Zero address to match all MACs. + uint16_t iface_id; // GR_IFACE_ID_UNDEF to match all interfaces. + gr_fdb_flags_t flags; // GR_FDB_F_STATIC: flush all. Otherwise, only dynamic entries. +}; + +// struct gr_fdb_flush_resp { }; + +// List FDB entries with optional filtering. +#define GR_FDB_LIST REQUEST_TYPE(GR_L2_MODULE, 0x0004) + +struct gr_fdb_list_req { + uint16_t bridge_id; // GR_IFACE_ID_UNDEF to list all bridges. + uint16_t iface_id; // GR_IFACE_ID_UNDEF to match all interfaces. + gr_fdb_flags_t flags; // GR_FDB_F_STATIC: only static. Otherwise, list all entries. +}; + +STREAM_RESP(struct gr_fdb_entry); + +// Get FDB subsystem configuration and usage. +#define GR_FDB_CONFIG_GET REQUEST_TYPE(GR_L2_MODULE, 0x0005) + +// struct gr_fdb_config_get_req { }; + +struct gr_fdb_config_get_resp { + uint32_t max_entries; + uint32_t used_entries; +}; + +// Set FDB subsystem configuration. +// Changing max_entries requires the FDB to be empty (returns EBUSY otherwise). +#define GR_FDB_CONFIG_SET REQUEST_TYPE(GR_L2_MODULE, 0x0006) + +struct gr_fdb_config_set_req { + uint32_t max_entries; +}; + +// struct gr_fdb_config_set_resp { }; diff --git a/modules/l2/api/meson.build b/modules/l2/api/meson.build new file mode 100644 index 000000000..98416c3c9 --- /dev/null +++ b/modules/l2/api/meson.build @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +api_headers += files('gr_l2.h') + +api_inc += include_directories('.') diff --git a/modules/l2/cli/bridge.c b/modules/l2/cli/bridge.c new file mode 100644 index 000000000..af03fc6ba --- /dev/null +++ b/modules/l2/cli/bridge.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +static void bridge_show(struct gr_api_client *c, const struct gr_iface *iface) { + const struct gr_iface_info_bridge *bridge = PAYLOAD(iface); + + printf("flags: %sflood %slearn\n", + (bridge->flags & GR_BRIDGE_F_NO_FLOOD) ? "no_" : "", + (bridge->flags & GR_BRIDGE_F_NO_LEARN) ? "no_" : ""); + + printf("ageing_time: %u seconds\n", bridge->ageing_time); + printf("mac: " ETH_F "\n", &bridge->mac); + printf("members:\n"); + + for (uint8_t i = 0; i < bridge->n_members; i++) { + struct gr_iface *member = iface_from_id(c, bridge->members[i]); + if (member != NULL) + printf("- %s\n", member->name); + free(member); + } +} + +static void +bridge_list_info(struct gr_api_client *, const struct gr_iface *iface, char *buf, size_t len) { + const struct gr_iface_info_bridge *bridge = PAYLOAD(iface); + snprintf( + buf, + len, + "members=%u %sflood %slearn", + bridge->n_members, + (bridge->flags & GR_BRIDGE_F_NO_FLOOD) ? "no_" : "", + (bridge->flags & GR_BRIDGE_F_NO_LEARN) ? "no_" : "" + ); +} + +static struct cli_iface_type bridge_type = { + .type_id = GR_IFACE_TYPE_BRIDGE, + .show = bridge_show, + .list_info = bridge_list_info, +}; + +static uint64_t parse_bridge_args( + struct gr_api_client *c, + const struct ec_pnode *p, + struct gr_iface *iface, + bool update +) { + struct gr_iface_info_bridge *bridge = PAYLOAD(iface); + uint64_t set_attrs; + + set_attrs = parse_iface_args(c, p, iface, sizeof(*bridge), update); + + if (arg_str(p, "flood")) { + bridge->flags &= ~GR_BRIDGE_F_NO_FLOOD; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } else if (arg_str(p, "no_flood")) { + bridge->flags |= GR_BRIDGE_F_NO_FLOOD; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } + if (arg_str(p, "learn")) { + bridge->flags &= ~GR_BRIDGE_F_NO_LEARN; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } else if (arg_str(p, "no_learn")) { + bridge->flags |= GR_BRIDGE_F_NO_LEARN; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } + + if (arg_u16(p, "AGE", &bridge->ageing_time) == 0) + set_attrs |= GR_BRIDGE_SET_AGEING_TIME; + else if (errno != ENOENT) + return 0; + + if (arg_eth_addr(p, "MAC", &bridge->mac) == 0) + set_attrs |= GR_BRIDGE_SET_MAC; + else if (errno != ENOENT) + return 0; + + if (set_attrs == 0) + errno = EINVAL; + + return set_attrs; +} + +static cmd_status_t bridge_add(struct gr_api_client *c, const struct ec_pnode *p) { + const struct gr_infra_iface_add_resp *resp; + struct gr_infra_iface_add_req *req = NULL; + void *resp_ptr = NULL; + size_t len; + + len = sizeof(*req) + sizeof(struct gr_iface_info_bridge); + if ((req = calloc(1, len)) == NULL) + goto err; + + req->iface.type = GR_IFACE_TYPE_BRIDGE; + req->iface.flags = GR_IFACE_F_UP; + + if (parse_bridge_args(c, p, &req->iface, false) == 0) + goto err; + + if (gr_api_client_send_recv(c, GR_INFRA_IFACE_ADD, len, req, &resp_ptr) < 0) + goto err; + + free(req); + resp = resp_ptr; + printf("Created interface %u\n", resp->iface_id); + free(resp_ptr); + return CMD_SUCCESS; +err: + free(req); + return CMD_ERROR; +} + +static cmd_status_t bridge_set(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_infra_iface_set_req *req = NULL; + cmd_status_t ret = CMD_ERROR; + size_t len; + + len = sizeof(*req) + sizeof(struct gr_iface_info_bridge); + if ((req = calloc(1, len)) == NULL) + goto out; + + if ((req->set_attrs = parse_bridge_args(c, p, &req->iface, true)) == 0) + goto out; + + if (gr_api_client_send_recv(c, GR_INFRA_IFACE_SET, len, req, NULL) < 0) + goto out; + + ret = CMD_SUCCESS; +out: + free(req); + return ret; +} + +#define BRIDGE_ATTRS_CMD IFACE_ATTRS_CMD ",(ageing_time AGE),(mac MAC),FLOOD,LEARN" + +#define BRIDGE_ATTRS_ARGS \ + IFACE_ATTRS_ARGS, \ + with_help( \ + "Expiration time for learned MAC addresses.", \ + ec_node_uint("AGE", 0, UINT16_MAX, 10) \ + ), \ + with_help("Bridge ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), \ + EC_NODE_OR( \ + "FLOOD", \ + with_help( \ + "Enable flooding of BUM traffic.", ec_node_str("flood", "flood") \ + ), \ + with_help( \ + "Disable flooding of BUM traffic.", \ + ec_node_str("no_flood", "no_flood") \ + ) \ + ), \ + EC_NODE_OR( \ + "LEARN", \ + with_help("Enable MAC learning.", ec_node_str("learn", "learn")), \ + with_help("Disable MAC learning.", ec_node_str("no_learn", "no_learn")) \ + ) + +static int ctx_init(struct ec_node *root) { + int ret; + + ret = CLI_COMMAND( + INTERFACE_ADD_CTX(root), + "bridge NAME [" BRIDGE_ATTRS_CMD "]", + bridge_add, + "Create a new bridge interface.", + with_help("Interface name.", ec_node("any", "NAME")), + BRIDGE_ATTRS_ARGS + ); + if (ret < 0) + return ret; + ret = CLI_COMMAND( + INTERFACE_SET_CTX(root), + "bridge NAME (name NEW_NAME)," BRIDGE_ATTRS_CMD, + bridge_set, + "Modify bridge parameters.", + with_help( + "Interface name.", + ec_node_dyn("NAME", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ), + with_help("New interface name.", ec_node("any", "NEW_NAME")), + BRIDGE_ATTRS_ARGS + ); + if (ret < 0) + return ret; + + return 0; +} + +static struct cli_context ctx = { + .name = "bridge", + .init = ctx_init, +}; + +static void __attribute__((constructor, used)) init(void) { + cli_context_register(&ctx); + register_iface_type(&bridge_type); +} diff --git a/modules/l2/cli/fdb.c b/modules/l2/cli/fdb.c new file mode 100644 index 000000000..4ab652086 --- /dev/null +++ b/modules/l2/cli/fdb.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +static int arg_iface( + struct gr_api_client *c, + const struct ec_pnode *p, + const char *id, + gr_iface_type_t type, + uint16_t *iface_id +) { + const char *name = arg_str(p, id); + if (name == NULL) + return -errno; + + struct gr_iface *iface = iface_from_name(c, name); + if (iface == NULL) + return -errno; + + if (type != GR_IFACE_TYPE_UNDEF && iface->type != type) { + free(iface); + return errno_set(EMEDIUMTYPE); + } + + *iface_id = iface->id; + free(iface); + return 0; +} + +static cmd_status_t fdb_add(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_fdb_add_req req = {.exist_ok = true}; + + if (arg_iface(c, p, "IFACE", GR_IFACE_TYPE_UNDEF, &req.fdb.iface_id) < 0) + return CMD_ERROR; + if (arg_eth_addr(p, "MAC", &req.fdb.mac) < 0) + return CMD_ERROR; + if (arg_u16(p, "VLAN", &req.fdb.vlan_id) < 0 && errno != ENOENT) + return CMD_ERROR; + + req.fdb.flags = GR_FDB_F_STATIC; + + if (gr_api_client_send_recv(c, GR_FDB_ADD, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static cmd_status_t fdb_del(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_fdb_del_req req = {.missing_ok = true}; + + if (arg_iface(c, p, "BRIDGE", GR_IFACE_TYPE_BRIDGE, &req.bridge_id) < 0) + return CMD_ERROR; + if (arg_eth_addr(p, "MAC", &req.mac) < 0) + return CMD_ERROR; + if (arg_u16(p, "VLAN", &req.vlan_id) < 0 && errno != ENOENT) + return CMD_ERROR; + + if (gr_api_client_send_recv(c, GR_FDB_DEL, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static cmd_status_t fdb_flush(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_fdb_flush_req req = { + .bridge_id = GR_IFACE_ID_UNDEF, + .iface_id = GR_IFACE_ID_UNDEF, + .flags = GR_FDB_F_LEARN, + }; + + if (arg_str(p, "BRIDGE") != NULL) { + if (arg_iface(c, p, "BRIDGE", GR_IFACE_TYPE_BRIDGE, &req.bridge_id) < 0) + return CMD_ERROR; + } + if (arg_str(p, "IFACE") != NULL) { + if (arg_iface(c, p, "IFACE", GR_IFACE_TYPE_UNDEF, &req.iface_id) < 0) + return CMD_ERROR; + } + if (arg_eth_addr(p, "MAC", &req.mac) < 0 && errno != ENOENT) + return CMD_ERROR; + + if (arg_str(p, "all") != NULL) + req.flags |= GR_FDB_F_STATIC; + + if (gr_api_client_send_recv(c, GR_FDB_FLUSH, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static size_t fdb_format_flags(char *buf, size_t len, gr_fdb_flags_t flags) { + size_t n = 0; + buf[0] = 0; + if (flags & GR_FDB_F_LEARN) + SAFE_BUF(snprintf, len, "%slearn", n ? " " : ""); + if (flags & GR_FDB_F_STATIC) + SAFE_BUF(snprintf, len, "%sstatic", n ? " " : ""); +err: + return n; +} + +static cmd_status_t fdb_show(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_fdb_list_req req = { + .bridge_id = GR_IFACE_ID_UNDEF, + .iface_id = GR_IFACE_ID_UNDEF, + .flags = 0, + }; + const struct gr_fdb_entry *fdb; + char flags[128]; + int ret; + + if (arg_str(p, "BRIDGE") != NULL) { + if (arg_iface(c, p, "BRIDGE", GR_IFACE_TYPE_BRIDGE, &req.bridge_id) < 0) + return CMD_ERROR; + } + if (arg_str(p, "IFACE") != NULL) { + if (arg_iface(c, p, "IFACE", GR_IFACE_TYPE_UNDEF, &req.iface_id) < 0) + return CMD_ERROR; + } + if (arg_str(p, "static") != NULL) + req.flags |= GR_FDB_F_STATIC; + if (arg_str(p, "learn") != NULL) + req.flags |= GR_FDB_F_LEARN; + + struct libscols_table *table = scols_new_table(); + scols_table_new_column(table, "BRIDGE", 0, 0); + scols_table_new_column(table, "MAC", 0, 0); + scols_table_new_column(table, "VLAN", 0, 0); + scols_table_new_column(table, "IFACE", 0, 0); + scols_table_new_column(table, "FLAGS", 0, 0); + scols_table_new_column(table, "AGE", 0, SCOLS_FL_RIGHT); + scols_table_set_column_separator(table, " "); + + gr_api_client_stream_foreach (fdb, ret, c, GR_FDB_LIST, sizeof(req), &req) { + struct libscols_line *line = scols_table_new_line(table, NULL); + + struct gr_iface *bridge = iface_from_id(c, fdb->bridge_id); + scols_line_sprintf(line, 0, "%s", bridge ? bridge->name : "[deleted]"); + free(bridge); + + scols_line_sprintf(line, 1, ETH_F, &fdb->mac); + + if (fdb->vlan_id != 0) + scols_line_sprintf(line, 2, "%u", fdb->vlan_id); + + struct gr_iface *iface = iface_from_id(c, fdb->iface_id); + scols_line_sprintf(line, 3, "%s", iface ? iface->name : "[deleted]"); + free(iface); + + if (fdb_format_flags(flags, sizeof(flags), fdb->flags)) + scols_line_set_data(line, 4, flags); + + scols_line_sprintf( + line, 5, "%lds", (gr_clock_us() - fdb->last_seen) / CLOCKS_PER_SEC + ); + } + + scols_print_table(table); + scols_unref_table(table); + + return ret < 0 ? CMD_ERROR : CMD_SUCCESS; +} + +static cmd_status_t fdb_config_set(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_fdb_config_set_req req; + + if (arg_u32(p, "MAX", &req.max_entries) < 0) + return CMD_ERROR; + + if (gr_api_client_send_recv(c, GR_FDB_CONFIG_SET, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + +static cmd_status_t fdb_config_show(struct gr_api_client *c, const struct ec_pnode *) { + const struct gr_fdb_config_get_resp *resp; + void *resp_ptr = NULL; + float used = 0.0; + + if (gr_api_client_send_recv(c, GR_FDB_CONFIG_GET, 0, NULL, &resp_ptr) < 0) + return CMD_ERROR; + + resp = resp_ptr; + if (resp->max_entries != 0) + used = (100.0 * (float)resp->used_entries) / (float)resp->max_entries; + printf("used %u (%.01f%%)\n", resp->used_entries, used); + printf("max %u\n", resp->max_entries); + free(resp_ptr); + + return CMD_SUCCESS; +} + +#define FDB_CTX(root) CLI_CONTEXT(root, CTX_ARG("fdb", "Forwarding database.")) + +static int ctx_init(struct ec_node *root) { + int ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "add MAC iface IFACE [vlan VLAN]", + fdb_add, + "Add a static FDB entry.", + with_help("MAC address.", ec_node_re("MAC", ETH_ADDR_RE)), + with_help( + "Bridge member interface.", + ec_node_dyn("IFACE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_UNDEF)) + ), + with_help("VLAN ID.", ec_node_uint("VLAN", 1, 4094, 10)) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "del bridge BRIDGE MAC [vlan VLAN]", + fdb_del, + "Delete an FDB entry.", + with_help( + "Bridge interface.", + ec_node_dyn("BRIDGE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ), + with_help("MAC address.", ec_node_re("MAC", ETH_ADDR_RE)), + with_help("VLAN ID.", ec_node_uint("VLAN", 1, 4094, 10)) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "flush [(bridge BRIDGE),(iface IFACE),(mac MAC),(all)]", + fdb_flush, + "Flush dynamic FDB entries.", + with_help( + "Flush only entries on this bridge.", + ec_node_dyn("BRIDGE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ), + with_help( + "Flush only entries on this interface.", + ec_node_dyn("IFACE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_UNDEF)) + ), + with_help( + "Flush only entries matching this MAC address.", + ec_node_re("MAC", ETH_ADDR_RE) + ), + with_help("Flush all entries including static.", ec_node_str("all", "all")) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "config set max MAX", + fdb_config_set, + "Change the FDB configuration.", + with_help("Maximum number of FDB entries.", ec_node_uint("MAX", 1, UINT32_MAX, 10)) + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "config [show]", + fdb_config_show, + "Show the current FDB configuration." + ); + if (ret < 0) + return ret; + + ret = CLI_COMMAND( + FDB_CTX(root), + "[show] [(bridge BRIDGE),(iface IFACE),(static|learn)]", + fdb_show, + "Show FDB entries.", + with_help( + "Show only entries on this bridge.", + ec_node_dyn("BRIDGE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_BRIDGE)) + ), + with_help( + "Show only entries on this interface.", + ec_node_dyn("IFACE", complete_iface_names, INT2PTR(GR_IFACE_TYPE_UNDEF)) + ), + with_help("Show only static entries.", ec_node_str("static", "static")), + with_help("Show only learned entries.", ec_node_str("learn", "learn")) + ); + if (ret < 0) + return ret; + + return 0; +} + +static struct cli_context ctx = { + .name = "fdb", + .init = ctx_init, +}; + +static void fdb_event_print(uint32_t event, const void *obj) { + const struct gr_fdb_entry *fdb = obj; + const char *action; + char flags[128]; + + switch (event) { + case GR_EVENT_FDB_ADD: + action = "add"; + break; + case GR_EVENT_FDB_DEL: + action = "del"; + break; + case GR_EVENT_FDB_UPDATE: + action = "update"; + break; + default: + action = "?"; + break; + } + + printf("fdb %s: bridge=%u " ETH_F, action, fdb->bridge_id, &fdb->mac); + if (fdb->vlan_id != 0) + printf(" vlan=%u", fdb->vlan_id); + printf(" iface=%u", fdb->iface_id); + if (fdb_format_flags(flags, sizeof(flags), fdb->flags)) + printf(" %s", flags); + printf("\n"); +} + +static struct cli_event_printer printer = { + .print = fdb_event_print, + .ev_count = 3, + .ev_types = { + GR_EVENT_FDB_ADD, + GR_EVENT_FDB_DEL, + GR_EVENT_FDB_UPDATE, + }, +}; + +static void __attribute__((constructor, used)) init(void) { + cli_context_register(&ctx); + cli_event_printer_register(&printer); +} diff --git a/modules/l2/cli/meson.build b/modules/l2/cli/meson.build new file mode 100644 index 000000000..53b9e5699 --- /dev/null +++ b/modules/l2/cli/meson.build @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +cli_src += files( + 'bridge.c', + 'fdb.c', +) diff --git a/modules/l2/control/bridge.c b/modules/l2/control/bridge.c new file mode 100644 index 000000000..208f37112 --- /dev/null +++ b/modules/l2/control/bridge.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include + +#include +#include + +static int bridge_reconfig( + struct iface *iface, + uint64_t set_attrs, + const struct gr_iface *, + const void *api_info +) { + struct iface_info_bridge *cur = iface_info_bridge(iface); + const struct gr_iface_info_bridge *next = api_info; + + if (set_attrs & GR_BRIDGE_SET_MAC) + iface_set_eth_addr(iface, &next->mac); + if (set_attrs & GR_BRIDGE_SET_FLAGS) + cur->flags = next->flags; + if (set_attrs & GR_BRIDGE_SET_AGEING_TIME) + cur->ageing_time = next->ageing_time ?: GR_BRIDGE_DEFAULT_AGEING; + + return 0; +} + +static int bridge_attach_member(struct iface *bridge, struct iface *member) { + struct iface_info_bridge *br = iface_info_bridge(bridge); + + switch (member->type) { + case GR_IFACE_TYPE_PORT: + case GR_IFACE_TYPE_VLAN: + case GR_IFACE_TYPE_BOND: + break; + default: + return errno_set(EMEDIUMTYPE); + } + + for (unsigned i = 0; i < br->n_members; i++) { + if (br->members[i] == member) + return 0; // already a member + } + + if (br->n_members == ARRAY_DIM(br->members)) + return errno_set(EUSERS); + + br->members[br->n_members++] = member; + member->domain_id = bridge->id; + member->vrf_id = GR_VRF_ID_UNDEF; + member->mode = GR_IFACE_MODE_BRIDGE; + + return 0; +} + +static int bridge_detach_member(struct iface *bridge, struct iface *member) { + struct iface_info_bridge *br = iface_info_bridge(bridge); + + for (unsigned i = 0; i < br->n_members; i++) { + if (br->members[i] == member) { + unsigned last = br->n_members - 1; + if (i < last) + br->members[i] = br->members[last]; + br->n_members--; + member->domain_id = GR_IFACE_ID_UNDEF; + member->mode = GR_IFACE_MODE_VRF; + fdb_purge_iface(member->id); + break; + } + } + + return 0; +} + +static int bridge_fini(struct iface *iface) { + struct iface_info_bridge *bridge = iface_info_bridge(iface); + + for (unsigned i = 0; i < bridge->n_members; i++) { + struct iface *member = bridge->members[i]; + member->vrf_id = vrf_default_get_or_create(); + if (member->vrf_id != GR_VRF_ID_UNDEF) + vrf_incref(member->vrf_id); + member->domain_id = GR_IFACE_ID_UNDEF; + member->mode = GR_IFACE_MODE_VRF; + gr_event_push(GR_EVENT_IFACE_POST_RECONFIG, member); + } + + fdb_purge_bridge(iface->id); + + return 0; +} + +static int bridge_init(struct iface *iface, const void *api_info) { + int ret; + + iface->domain_id = iface->id; // for convenience, bridges are in their own domain + + ret = bridge_reconfig(iface, IFACE_SET_ALL, NULL, api_info); + if (ret < 0) { + bridge_fini(iface); + errno = -ret; + } + + return ret; +} + +static int bridge_get_eth_addr(const struct iface *iface, struct rte_ether_addr *mac) { + const struct iface_info_bridge *bridge = iface_info_bridge(iface); + *mac = bridge->mac; + return 0; +} + +static int bridge_set_eth_addr(struct iface *iface, const struct rte_ether_addr *mac) { + struct iface_info_bridge *bridge = iface_info_bridge(iface); + + if (rte_is_zero_ether_addr(mac)) { + rte_eth_random_addr(bridge->mac.addr_bytes); + } else { + bridge->mac = *mac; + } + + return 0; +} + +static void bridge_to_api(void *info, const struct iface *iface) { + const struct iface_info_bridge *bridge = iface_info_bridge(iface); + struct gr_iface_info_bridge *api = info; + + api->ageing_time = bridge->ageing_time; + api->flags = bridge->flags; + api->mac = bridge->mac; + api->n_members = bridge->n_members; + for (unsigned i = 0; i < bridge->n_members; i++) + api->members[i] = bridge->members[i]->id; +} + +static struct iface_type iface_type_bridge = { + .id = GR_IFACE_TYPE_BRIDGE, + .pub_size = sizeof(struct gr_iface_info_bridge), + .priv_size = sizeof(struct iface_info_bridge), + .init = bridge_init, + .reconfig = bridge_reconfig, + .fini = bridge_fini, + .attach_domain = bridge_attach_member, + .detach_domain = bridge_detach_member, + .get_eth_addr = bridge_get_eth_addr, + .set_eth_addr = bridge_set_eth_addr, + .to_api = bridge_to_api, +}; + +RTE_INIT(bridge_constructor) { + iface_type_register(&iface_type_bridge); +} diff --git a/modules/l2/control/fdb.c b/modules/l2/control/fdb.c new file mode 100644 index 000000000..dfea4339c --- /dev/null +++ b/modules/l2/control/fdb.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct fdb_key { + uint16_t bridge_id; + uint16_t vlan_id; + struct rte_ether_addr mac; +}; + +static unsigned fdb_max_entries; +static struct rte_hash *fdb_hash; +static struct rte_mempool *fdb_pool; + +static void fdb_free_entry(void *pool, void *fdb) { + gr_event_push(GR_EVENT_FDB_DEL, fdb); + rte_mempool_put(pool, fdb); +} + +static int fdb_reconfig(unsigned max_entries) { + char name[64]; + snprintf(name, sizeof(name), "fdb-%u", max_entries); + + struct rte_hash_parameters params = { + .name = name, + .socket_id = SOCKET_ID_ANY, + .key_len = sizeof(struct fdb_key), + .entries = max_entries, + .extra_flag = RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF + | RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT, + }; + + struct rte_hash *h = rte_hash_create(¶ms); + if (h == NULL) + return errno_log(rte_errno, "rte_hash_create"); + + struct rte_mempool *p = rte_mempool_create( + name, + rte_align32pow2(max_entries) - 1, + sizeof(struct gr_fdb_entry), + 0, // cache size + 0, // priv size + NULL, // mp_init + NULL, // mp_init_arg + NULL, // obj_init + NULL, // obj_init_arg + SOCKET_ID_ANY, + 0 // flags + ); + if (p == NULL) { + rte_hash_free(h); + return errno_log(rte_errno, "rte_mempool_create"); + } + + struct rte_hash_rcu_config conf = { + .v = gr_datapath_rcu(), + .mode = RTE_HASH_QSBR_MODE_SYNC, + .free_key_data_func = fdb_free_entry, + .key_data_ptr = p, + }; + if (rte_hash_rcu_qsbr_add(h, &conf) < 0) { + rte_hash_free(h); + rte_mempool_free(p); + return errno_log(rte_errno, "rte_hash_rcu_qsbr_add"); + } + + struct rte_hash *tmp_h = fdb_hash; + struct rte_mempool *tmp_p = fdb_pool; + fdb_hash = h; + fdb_pool = p; + + rte_rcu_qsbr_synchronize(gr_datapath_rcu(), rte_lcore_id()); + + rte_hash_free(tmp_h); + rte_mempool_free(tmp_p); + + fdb_max_entries = max_entries; + + return 0; +} + +const struct gr_fdb_entry * +fdb_lookup(uint16_t bridge_id, const struct rte_ether_addr *mac, uint16_t vlan_id) { + const struct fdb_key key = {bridge_id, vlan_id, *mac}; + void *data; + + if (rte_hash_lookup_data(fdb_hash, &key, &data) < 0) + return errno_set_null(ENOENT); + + return data; +} + +// Learn a new FDB entry or refresh its last_seen timestamp. +void fdb_learn( + uint16_t bridge_id, + uint16_t iface_id, + const struct rte_ether_addr *mac, + uint16_t vlan_id +) { + const struct fdb_key key = {bridge_id, vlan_id, *mac}; + struct gr_fdb_entry *fdb; + void *data; + + if (rte_hash_lookup_data(fdb_hash, &key, &data) < 0) { + if (rte_mempool_get(fdb_pool, &data) < 0) + return; // pool exhausted + + fdb = data; + fdb->bridge_id = bridge_id; + fdb->vlan_id = vlan_id; + fdb->mac = *mac; + fdb->flags = GR_FDB_F_LEARN; + fdb->iface_id = iface_id; + + if (rte_hash_add_key_data(fdb_hash, &key, fdb) < 0) { + // no space left in hash + rte_mempool_put(fdb_pool, fdb); + return; + } + + gr_event_push(GR_EVENT_FDB_ADD, fdb); + } else { + fdb = data; + } + + fdb->last_seen = gr_clock_us(); + + if ((fdb->flags & GR_FDB_F_LEARN) && fdb->iface_id != iface_id) { + // update in case the mac address has moved + fdb->iface_id = iface_id; + gr_event_push(GR_EVENT_FDB_UPDATE, fdb); + } +} + +void fdb_purge_iface(uint16_t iface_id) { + struct gr_fdb_entry *fdb; + uint32_t next = 0; + const void *key; + void *data; + + while (rte_hash_iterate(fdb_hash, &key, &data, &next) >= 0) { + fdb = data; + if (fdb->iface_id == iface_id) { + rte_hash_del_key(fdb_hash, key); + } + } +} + +void fdb_purge_bridge(uint16_t bridge_id) { + struct gr_fdb_entry *fdb; + uint32_t next = 0; + const void *key; + void *data; + + while (rte_hash_iterate(fdb_hash, &key, &data, &next) >= 0) { + fdb = data; + if (fdb->bridge_id == bridge_id) { + rte_hash_del_key(fdb_hash, key); + } + } +} + +static struct api_out fdb_add(const void *request, struct api_ctx *) { + const struct gr_fdb_add_req *req = request; + const struct iface *iface; + struct gr_fdb_entry *e; + void *data; + int ret; + + if (req->fdb.flags & ~GR_FDB_F_STATIC) + return api_out(EINVAL, 0, NULL); + + iface = iface_from_id(req->fdb.iface_id); + if (iface == NULL) + return api_out(errno, 0, NULL); + + iface = iface_from_id(iface->domain_id); + if (iface == NULL) + return api_out(EMEDIUMTYPE, 0, NULL); + + if (iface->type != GR_IFACE_TYPE_BRIDGE) + return api_out(EMEDIUMTYPE, 0, NULL); + + const struct fdb_key key = {iface->id, req->fdb.vlan_id, req->fdb.mac}; + + if (rte_hash_lookup_data(fdb_hash, &key, &data) < 0) { + if ((ret = rte_mempool_get(fdb_pool, &data)) < 0) + return api_out(-ret, 0, NULL); + + e = data; + *e = req->fdb; + e->bridge_id = iface->id; + e->last_seen = gr_clock_us(); + + if ((ret = rte_hash_add_key_data(fdb_hash, &key, data)) < 0) { + rte_mempool_put(fdb_pool, e); + return api_out(-ret, 0, NULL); + } + + gr_event_push(GR_EVENT_FDB_ADD, e); + } else if (req->exist_ok) { + e = data; + *e = req->fdb; + e->bridge_id = iface->id; + e->last_seen = gr_clock_us(); + + gr_event_push(GR_EVENT_FDB_UPDATE, e); + } else { + return api_out(EEXIST, 0, NULL); + } + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler add_handler = { + .name = "fdb add", + .request_type = GR_FDB_ADD, + .callback = fdb_add, +}; + +static struct api_out fdb_del(const void *request, struct api_ctx *) { + const struct gr_fdb_del_req *req = request; + const struct fdb_key key = {req->bridge_id, req->vlan_id, req->mac}; + int ret; + + ret = rte_hash_del_key(fdb_hash, &key); + if (ret == -ENOENT && req->missing_ok) + ret = 0; + else if (ret > 0) + ret = 0; + + return api_out(-ret, 0, NULL); +} + +static struct gr_api_handler del_handler = { + .name = "fdb del", + .request_type = GR_FDB_DEL, + .callback = fdb_del, +}; + +static inline bool fdb_match( + const struct gr_fdb_entry *e, + gr_fdb_flags_t flags, + uint16_t bridge_id, + uint16_t iface_id, + const struct rte_ether_addr *mac +) { + if ((flags & GR_FDB_F_STATIC) && !(e->flags & GR_FDB_F_STATIC)) + return false; + if ((flags & GR_FDB_F_LEARN) && !(e->flags & GR_FDB_F_LEARN)) + return false; + if (bridge_id != GR_IFACE_ID_UNDEF && e->bridge_id != bridge_id) + return false; + if (iface_id != GR_IFACE_ID_UNDEF && e->iface_id != iface_id) + return false; + if (mac != NULL && !rte_is_zero_ether_addr(mac) && !rte_is_same_ether_addr(&e->mac, mac)) + return false; + return true; +} + +static struct api_out fdb_flush(const void *request, struct api_ctx *) { + const struct gr_fdb_flush_req *req = request; + uint32_t next = 0; + const void *key; + void *data; + int ret; + + while (rte_hash_iterate(fdb_hash, &key, &data, &next) >= 0) { + if (!fdb_match(data, req->flags, req->bridge_id, req->iface_id, &req->mac)) + continue; + + ret = rte_hash_del_key(fdb_hash, key); + if (ret < 0) + return api_out(-ret, 0, NULL); + } + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler flush_handler = { + .name = "fdb flush", + .request_type = GR_FDB_FLUSH, + .callback = fdb_flush, +}; + +static struct api_out fdb_list(const void *request, struct api_ctx *ctx) { + const struct gr_fdb_list_req *req = request; + struct gr_fdb_entry *fdb; + uint32_t next = 0; + const void *key; + void *data; + + while (rte_hash_iterate(fdb_hash, &key, &data, &next) >= 0) { + if (!fdb_match(data, req->flags, req->bridge_id, req->iface_id, NULL)) + continue; + + fdb = data; + api_send(ctx, sizeof(*fdb), fdb); + } + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler list_handler = { + .name = "fdb list", + .request_type = GR_FDB_LIST, + .callback = fdb_list, +}; + +static struct api_out fdb_config_get(const void * /*request*/, struct api_ctx *) { + struct gr_fdb_config_get_resp *resp = malloc(sizeof(*resp)); + + if (resp == NULL) + return api_out(ENOMEM, 0, NULL); + + resp->max_entries = fdb_max_entries; + resp->used_entries = rte_hash_count(fdb_hash); + + return api_out(0, sizeof(*resp), resp); +} + +static struct gr_api_handler config_get_handler = { + .name = "fdb config get", + .request_type = GR_FDB_CONFIG_GET, + .callback = fdb_config_get, +}; + +static struct api_out fdb_config_set(const void *request, struct api_ctx *) { + const struct gr_fdb_config_set_req *req = request; + + if (req->max_entries == 0) + return api_out(EINVAL, 0, NULL); + + if (req->max_entries != fdb_max_entries) { + if (rte_hash_count(fdb_hash) > 0) + return api_out(EBUSY, 0, NULL); + + if (fdb_reconfig(req->max_entries) < 0) + return api_out(errno, 0, NULL); + + fdb_max_entries = req->max_entries; + } + + return api_out(0, 0, NULL); +} + +static struct gr_api_handler config_set_handler = { + .name = "fdb config set", + .request_type = GR_FDB_CONFIG_SET, + .callback = fdb_config_set, +}; + +static struct gr_event_serializer serializer = { + .size = sizeof(struct gr_fdb_entry), + .ev_count = 3, + .ev_types = { + GR_EVENT_FDB_ADD, + GR_EVENT_FDB_DEL, + GR_EVENT_FDB_UPDATE, + }, +}; + +static void fdb_ageing_cb(evutil_socket_t, short /*what*/, void * /*priv*/) { + const struct iface *bridge; + struct gr_fdb_entry *fdb; + uint32_t next = 0; + uint16_t max_age; + const void *key; + clock_t now; + void *data; + time_t age; + + now = gr_clock_us(); + + while (rte_hash_iterate(fdb_hash, &key, &data, &next) >= 0) { + fdb = data; + + if ((fdb->flags & GR_FDB_F_STATIC) || !(fdb->flags & GR_FDB_F_LEARN)) + continue; + + age = (now - fdb->last_seen) / CLOCKS_PER_SEC; + + bridge = iface_from_id(fdb->bridge_id); + if (bridge != NULL) + max_age = iface_info_bridge(bridge)->ageing_time; + else + max_age = GR_BRIDGE_DEFAULT_AGEING; + + if (age > max_age) { + LOG(DEBUG, + ETH_F " vlan=%u bridge=%u iface=%u: aged out (%ld sec)", + &fdb->mac, + fdb->vlan_id, + fdb->bridge_id, + fdb->iface_id, + age); + rte_hash_del_key(fdb_hash, key); + } + } +} + +static struct event *ageing_timer; + +#define FDB_DEFAULT_MAX_ENTRIES 4096 + +static void fdb_init(struct event_base *base) { + if (fdb_reconfig(FDB_DEFAULT_MAX_ENTRIES) < 0) + ABORT("fdb_reconfig failed"); + + ageing_timer = event_new(base, -1, EV_PERSIST | EV_FINALIZE, fdb_ageing_cb, NULL); + if (ageing_timer == NULL) + ABORT("event_new() failed"); + + if (event_add(ageing_timer, &(struct timeval) {.tv_sec = 1}) < 0) + ABORT("event_add() failed"); +} + +static void fdb_fini(struct event_base *) { + if (ageing_timer != NULL) + event_free(ageing_timer); + + rte_hash_free(fdb_hash); + rte_mempool_free(fdb_pool); +} + +static struct gr_module module = { + .name = "fdb", + .depends_on = "rcu", + .init = fdb_init, + .fini = fdb_fini, +}; + +RTE_INIT(init) { + gr_register_api_handler(&add_handler); + gr_register_api_handler(&del_handler); + gr_register_api_handler(&flush_handler); + gr_register_api_handler(&list_handler); + gr_register_api_handler(&config_get_handler); + gr_register_api_handler(&config_set_handler); + gr_event_register_serializer(&serializer); + gr_register_module(&module); +} diff --git a/modules/l2/control/gr_l2_control.h b/modules/l2/control/gr_l2_control.h new file mode 100644 index 000000000..89284ee87 --- /dev/null +++ b/modules/l2/control/gr_l2_control.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#pragma once + +#include +#include + +#include + +// Internal bridge info structure. +GR_IFACE_INFO(GR_IFACE_TYPE_BRIDGE, iface_info_bridge, { + BASE(__gr_iface_info_bridge_base); + + struct iface *members[GR_BRIDGE_MAX_MEMBERS]; +}); + +// Lookup a FDB entry from a MAC address and VLAN +const struct gr_fdb_entry * +fdb_lookup(uint16_t bridge_id, const struct rte_ether_addr *, uint16_t vlan_id); + +// Learn a new FDB entry or refresh its last_seen timestamp. +void fdb_learn( + uint16_t bridge_id, + uint16_t iface_id, + const struct rte_ether_addr *, + uint16_t vlan_id +); + +// Delete all FDB entries referencing the provided interface. +void fdb_purge_iface(uint16_t iface_id); + +// Delete all FDB entries referencing the provided bridge. +void fdb_purge_bridge(uint16_t bridge_id); diff --git a/modules/l2/control/meson.build b/modules/l2/control/meson.build new file mode 100644 index 000000000..e98d2892b --- /dev/null +++ b/modules/l2/control/meson.build @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +src += files( + 'bridge.c', + 'fdb.c', +) + +inc += include_directories('.') diff --git a/modules/l2/datapath/bridge_flood.c b/modules/l2/datapath/bridge_flood.c new file mode 100644 index 000000000..50ae8370b --- /dev/null +++ b/modules/l2/datapath/bridge_flood.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +enum edges { + OUTPUT = 0, + INPUT, + DROP, + EDGE_COUNT +}; + +static inline struct rte_mbuf * +clone_packet(struct rte_mbuf *m, uint16_t clone_count, const struct iface *output_iface) { + struct rte_mbuf *clone; + + // Copy packet for each output port (except the first one) + if (clone_count == 0) { + clone = m; + } else { + clone = gr_mbuf_copy(m, UINT32_MAX, sizeof(struct mbuf_data)); + if (clone == NULL) { + // TODO: add xstat + return NULL; + } + } + + mbuf_data(clone)->iface = output_iface; + + return clone; +} + +static uint16_t bridge_flood_process( + struct rte_graph *graph, + struct rte_node *node, + void **objs, + uint16_t nb_objs +) { + const struct iface *br, *member, *iface; + const struct iface_info_bridge *bridge; + struct rte_mbuf *m, *clone; + uint16_t flood_count; + uint16_t sent = 0; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + flood_count = 0; + + if (gr_mbuf_is_traced(m)) + gr_mbuf_trace_add(m, node, 0); + + iface = mbuf_data(m)->iface; + assert(iface != NULL); + + br = iface_from_id(iface->domain_id); + if (br == NULL || br->type != GR_IFACE_TYPE_BRIDGE) + goto next; + + bridge = iface_info_bridge(br); + + for (uint16_t j = 0; j < bridge->n_members; j++) { + member = bridge->members[j]; + + if (member == iface) + continue; // Never flood back to source + + if (!(member->flags & GR_IFACE_F_UP)) + continue; // Skip down interfaces + + clone = clone_packet(m, flood_count, member); + if (clone == NULL) + continue; + + rte_node_enqueue_x1(graph, node, OUTPUT, clone); + flood_count++; + } + if (iface != br && (br->flags & GR_IFACE_F_UP)) { + // also flood to bridge interface + clone = clone_packet(m, flood_count, br); + if (clone != NULL) { + rte_node_enqueue_x1(graph, node, INPUT, clone); + flood_count++; + } + } +next: + if (flood_count == 0) { + // If no flooding occurred, drop the original packet + rte_node_enqueue_x1(graph, node, DROP, m); + } + sent += flood_count; + } + + return sent; +} + +static struct rte_node_register node = { + .name = "bridge_flood", + .process = bridge_flood_process, + .nb_edges = EDGE_COUNT, + .next_nodes = { + [OUTPUT] = "iface_output", + [INPUT] = "iface_input", + [DROP] = "bridge_flood_drop", + }, +}; + +static struct gr_node_info info = { + .node = &node, + .type = GR_NODE_T_L2, +}; + +GR_NODE_REGISTER(info); + +GR_DROP_REGISTER(bridge_flood_drop); diff --git a/modules/l2/datapath/bridge_input.c b/modules/l2/datapath/bridge_input.c new file mode 100644 index 000000000..a54f7116c --- /dev/null +++ b/modules/l2/datapath/bridge_input.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include +#include +#include +#include +#include +#include +#include + +#include + +enum edges { + OUTPUT = 0, + INPUT, + FLOOD, + BRIDGE_INVAL, + HAIRPIN, + OUT_IFACE_INVAL, + FLOOD_DISABLED, + EDGE_COUNT +}; + +struct bridge_input_trace { + uint16_t iface_id; + uint16_t bridge_id; +}; + +static uint16_t bridge_input_process( + struct rte_graph *graph, + struct rte_node *node, + void **objs, + uint16_t nb_objs +) { + const struct iface *bridge, *iface; + const struct iface_info_bridge *br; + const struct gr_fdb_entry *fdb; + struct iface_mbuf_data *d; + struct rte_ether_hdr *eth; + struct rte_mbuf *m; + rte_edge_t edge; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + d = iface_mbuf_data(m); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + fdb = NULL; + + if (gr_mbuf_is_traced(m)) { + struct bridge_input_trace *t = gr_mbuf_trace_add(m, node, sizeof(*t)); + t->iface_id = d->iface->id; + t->bridge_id = d->iface->domain_id; + } + + bridge = iface_from_id(d->iface->domain_id); + if (bridge == NULL || bridge->type != GR_IFACE_TYPE_BRIDGE) { + edge = BRIDGE_INVAL; + goto next; + } + br = iface_info_bridge(bridge); + + if (rte_is_unicast_ether_addr(ð->src_addr) + && !(br->flags & GR_BRIDGE_F_NO_LEARN)) + fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id); + + if (rte_is_unicast_ether_addr(ð->dst_addr)) { + fdb = fdb_lookup(bridge->id, ð->dst_addr, d->vlan_id); + if (fdb == NULL) { + // Unknown unicast + edge = FLOOD; + goto next; + } + if (fdb->iface_id == d->iface->id) { + // Don't forward back to source interface + edge = HAIRPIN; + goto next; + } + iface = iface_from_id(fdb->iface_id); + if (iface == NULL) { + edge = OUT_IFACE_INVAL; + goto next; + } + // Direct output to learned interface + d->iface = iface; + if (iface->type == GR_IFACE_TYPE_BRIDGE) { + edge = INPUT; + } else { + edge = OUTPUT; + } + } else { + // Broadcast, multicast + edge = FLOOD; + } +next: + if (edge == FLOOD && (br->flags & GR_BRIDGE_F_NO_FLOOD)) + edge = FLOOD_DISABLED; + + rte_node_enqueue_x1(graph, node, edge, m); + } + + return nb_objs; +} + +static int bridge_input_trace_format(char *buf, size_t len, const void *data, size_t /*data_len*/) { + const struct bridge_input_trace *t = data; + const struct iface *iface = iface_from_id(t->iface_id); + const struct iface *bridge = iface_from_id(t->bridge_id); + return snprintf( + buf, + len, + "iface=%s bridge=%s", + iface ? iface->name : "[deleted]", + bridge ? bridge->name : "[deleted]" + ); +} + +static void bridge_input_register(void) { + iface_input_mode_register(GR_IFACE_MODE_BRIDGE, "bridge_input"); + iface_output_type_register(GR_IFACE_TYPE_BRIDGE, "bridge_input"); +} + +static struct rte_node_register node = { + .name = "bridge_input", + .process = bridge_input_process, + .nb_edges = EDGE_COUNT, + .next_nodes = { + [OUTPUT] = "iface_output", + [INPUT] = "iface_input", + [FLOOD] = "bridge_flood", + [BRIDGE_INVAL] = "bridge_input_invalid_domain", + [HAIRPIN] = "bridge_input_hairpin", + [OUT_IFACE_INVAL] = "bridge_input_invalid_output", + [FLOOD_DISABLED] = "bridge_input_flood_disabled", + }, +}; + +static struct gr_node_info info = { + .node = &node, + .type = GR_NODE_T_L2, + .register_callback = bridge_input_register, + .trace_format = bridge_input_trace_format, +}; + +GR_NODE_REGISTER(info); + +GR_DROP_REGISTER(bridge_input_invalid_domain); +GR_DROP_REGISTER(bridge_input_hairpin); +GR_DROP_REGISTER(bridge_input_invalid_output); +GR_DROP_REGISTER(bridge_input_flood_disabled); diff --git a/modules/l2/datapath/meson.build b/modules/l2/datapath/meson.build new file mode 100644 index 000000000..d61132060 --- /dev/null +++ b/modules/l2/datapath/meson.build @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +src += files( + 'bridge_flood.c', + 'bridge_input.c', +) diff --git a/modules/l2/meson.build b/modules/l2/meson.build new file mode 100644 index 000000000..53fa9d7b8 --- /dev/null +++ b/modules/l2/meson.build @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +subdir('api') +subdir('cli') +subdir('control') +subdir('datapath') diff --git a/modules/meson.build b/modules/meson.build index 1859be3d3..0b978a62b 100644 --- a/modules/meson.build +++ b/modules/meson.build @@ -5,6 +5,7 @@ subdir('infra') subdir('ip') subdir('ip6') subdir('ipip') +subdir('l2') subdir('l4') subdir('policy') subdir('srv6') diff --git a/smoke/bridge_test.sh b/smoke/bridge_test.sh new file mode 100755 index 000000000..31089fdab --- /dev/null +++ b/smoke/bridge_test.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +. $(dirname $0)/_init.sh + +grcli interface add bridge br0 + +port_add p0 domain br0 +port_add p1 domain br0 +port_add p2 domain br0 + +grcli interface show name br0 + +grcli address add 172.16.0.1/24 iface br0 + +for n in 0 1 2; do + p=x-p$n + ns=n$n + netns_add $ns + move_to_netns $p $ns + ip -n $ns addr add 172.16.0.1$n/24 dev $p + ip -n $ns route add default via 172.16.0.1 +done + +ip netns exec n0 ping -i0.01 -c3 -W1 -n 172.16.0.11 || fail "L2 ping n0->n1 failed" +ip netns exec n1 ping -i0.01 -c3 -W1 -n 172.16.0.12 || fail "L2 ping n1->n2 failed" +ip netns exec n2 ping -i0.01 -c3 -W1 -n 172.16.0.10 || fail "L2 ping n2->n0 failed" + +for n in 0 1 2; do + mac=$(ip netns exec n$n cat /sys/class/net/x-p$n/address) + grcli fdb show iface p$n learn | grep -F "$mac" +done + +# overwrite dynamic learned fdb entry with static one +mac=$(ip netns exec n0 cat /sys/class/net/x-p0/address) +grcli fdb add "$mac" iface p0 +grcli fdb show iface p0 static | grep -F "$mac" + +grcli ping 172.16.0.10 count 3 delay 10 + +ip netns exec n0 ping -i0.01 -c3 -W1 -n 172.16.0.1 || fail "L3 ping n0->bridge failed" +ip netns exec n1 ping -i0.01 -c3 -W1 -n 172.16.0.1 || fail "L3 ping n1->bridge failed" +ip netns exec n2 ping -i0.01 -c3 -W1 -n 172.16.0.1 || fail "L3 ping n2->bridge failed" + +grcli interface set port p1 vrf main +if grcli fdb show iface p1 | grep .; then + fail "fdb still contains entries for removed interface" +fi + +grcli interface del br0 +if grcli fdb show | grep .; then + fail "fdb still contains entries" +fi diff --git a/subprojects/dpdk.wrap b/subprojects/dpdk.wrap index 828812103..8827d3fd4 100644 --- a/subprojects/dpdk.wrap +++ b/subprojects/dpdk.wrap @@ -2,7 +2,10 @@ url = https://github.com/DPDK/dpdk-stable revision = v25.11 depth = 1 -diff_files = dpdk/iavf-fix-reported-max-TX-and-RX-queues-in-ethdev-inf.patch +diff_files = + dpdk/iavf-fix-reported-max-TX-and-RX-queues-in-ethdev-inf.patch, + dpdk/hash-avoid-leaking-entries-on-RCU-defer-queue-failur.patch, + dpdk/hash-free-replaced-data-on-overwrite-when-RCU-is-con.patch [provide] dependency_names = libdpdk diff --git a/subprojects/packagefiles/dpdk/hash-avoid-leaking-entries-on-RCU-defer-queue-failur.patch b/subprojects/packagefiles/dpdk/hash-avoid-leaking-entries-on-RCU-defer-queue-failur.patch new file mode 100644 index 000000000..62f266876 --- /dev/null +++ b/subprojects/packagefiles/dpdk/hash-avoid-leaking-entries-on-RCU-defer-queue-failur.patch @@ -0,0 +1,46 @@ +From 182f9223b9d407db31de3e54833572be554f08dc Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Fri, 13 Feb 2026 11:20:19 +0100 +Subject: [PATCH dpdk] hash: avoid leaking entries on RCU defer queue failure + +When rte_rcu_qsbr_dq_enqueue() fails in DQ mode, the deleted key slot +is never freed and becomes permanently leaked. Fall back to synchronous +reclamation instead of only logging an error. + +Cc: stable@dpdk.org +Fixes: 769b2de7fb52 ("hash: implement RCU resources reclamation") + +Signed-off-by: Robin Jarry +--- + lib/hash/rte_cuckoo_hash.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c +index da12825c6ed2..8189bde024be 100644 +--- a/lib/hash/rte_cuckoo_hash.c ++++ b/lib/hash/rte_cuckoo_hash.c +@@ -1870,18 +1870,15 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, + /* Key index where key is stored, adding the first dummy index */ + rcu_dq_entry.key_idx = ret + 1; + rcu_dq_entry.ext_bkt_idx = index; +- if (h->dq == NULL) { ++ if (h->dq == NULL || rte_rcu_qsbr_dq_enqueue(h->dq, &rcu_dq_entry) != 0) { + /* Wait for quiescent state change if using +- * RTE_HASH_QSBR_MODE_SYNC ++ * RTE_HASH_QSBR_MODE_SYNC or if RCU enqueue failed. + */ + rte_rcu_qsbr_synchronize(h->hash_rcu_cfg->v, + RTE_QSBR_THRID_INVALID); + __hash_rcu_qsbr_free_resource((void *)((uintptr_t)h), + &rcu_dq_entry, 1); +- } else if (h->dq) +- /* Push into QSBR FIFO if using RTE_HASH_QSBR_MODE_DQ */ +- if (rte_rcu_qsbr_dq_enqueue(h->dq, &rcu_dq_entry) != 0) +- HASH_LOG(ERR, "Failed to push QSBR FIFO"); ++ } + } + __hash_rw_writer_unlock(h); + return ret; +-- +2.53.0 + diff --git a/subprojects/packagefiles/dpdk/hash-free-replaced-data-on-overwrite-when-RCU-is-con.patch b/subprojects/packagefiles/dpdk/hash-free-replaced-data-on-overwrite-when-RCU-is-con.patch new file mode 100644 index 000000000..968eceaac --- /dev/null +++ b/subprojects/packagefiles/dpdk/hash-free-replaced-data-on-overwrite-when-RCU-is-con.patch @@ -0,0 +1,323 @@ +From ec82d13d198825f2d49d9fe24568dee5905554e9 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Thu, 12 Feb 2026 21:25:53 +0100 +Subject: [PATCH dpdk] hash: free replaced data on overwrite when RCU is + configured + +When rte_hash_add_key_data() overwrites an existing key, the old data +pointer is silently lost. With RCU-protected readers still potentially +accessing the old data, the application has no safe way to free it. + +When RCU is configured with a free_key_data_func callback, automatically +enqueue the old data for deferred freeing via the RCU defer queue on +overwrite. In SYNC mode, synchronize and call free_key_data_func +directly. + +Cc: stable@dpdk.org +Fixes: 769b2de7fb52 ("hash: implement RCU resources reclamation") + +Signed-off-by: Robin Jarry +--- + lib/hash/rte_cuckoo_hash.c | 101 ++++++++++++++++++++++++++++--------- + lib/hash/rte_hash.h | 8 ++- + 2 files changed, 84 insertions(+), 25 deletions(-) + +diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c +index 8189bde024be..f487b3b725dd 100644 +--- a/lib/hash/rte_cuckoo_hash.c ++++ b/lib/hash/rte_cuckoo_hash.c +@@ -75,6 +75,7 @@ EAL_REGISTER_TAILQ(rte_hash_tailq) + struct __rte_hash_rcu_dq_entry { + uint32_t key_idx; + uint32_t ext_bkt_idx; ++ void *old_data; + }; + + RTE_EXPORT_SYMBOL(rte_hash_find_existing) +@@ -763,10 +764,11 @@ enqueue_slot_back(const struct rte_hash *h, + + /* Search a key from bucket and update its data. + * Writer holds the lock before calling this. ++ * If old_data is non-NULL, save the previous data pointer before overwriting. + */ + static inline int32_t + search_and_update(const struct rte_hash *h, void *data, const void *key, +- struct rte_hash_bucket *bkt, uint16_t sig) ++ struct rte_hash_bucket *bkt, uint16_t sig, void **old_data) + { + int i; + struct rte_hash_key *k, *keys = h->key_store; +@@ -776,6 +778,8 @@ search_and_update(const struct rte_hash *h, void *data, const void *key, + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { ++ if (old_data != NULL) ++ *old_data = k->pdata; + /* The store to application data at *data + * should not leak after the store to pdata + * in the key store. i.e. pdata is the guard +@@ -807,7 +811,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, + struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key *key, void *data, + uint16_t sig, uint32_t new_idx, +- int32_t *ret_val) ++ int32_t *ret_val, void **old_data) + { + unsigned int i; + struct rte_hash_bucket *cur_bkt; +@@ -817,7 +821,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, + /* Check if key was inserted after last check but before this + * protected region in case of inserting duplicated keys. + */ +- ret = search_and_update(h, data, key, prim_bkt, sig); ++ ret = search_and_update(h, data, key, prim_bkt, sig, old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; +@@ -825,7 +829,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h, + } + + FOR_EACH_BUCKET(cur_bkt, sec_bkt) { +- ret = search_and_update(h, data, key, cur_bkt, sig); ++ ret = search_and_update(h, data, key, cur_bkt, sig, old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; +@@ -872,7 +876,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, + const struct rte_hash_key *key, void *data, + struct queue_node *leaf, uint32_t leaf_slot, + uint16_t sig, uint32_t new_idx, +- int32_t *ret_val) ++ int32_t *ret_val, void **old_data) + { + uint32_t prev_alt_bkt_idx; + struct rte_hash_bucket *cur_bkt; +@@ -892,7 +896,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, + /* Check if key was inserted after last check but before this + * protected region. + */ +- ret = search_and_update(h, data, key, bkt, sig); ++ ret = search_and_update(h, data, key, bkt, sig, old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; +@@ -900,7 +904,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h, + } + + FOR_EACH_BUCKET(cur_bkt, alt_bkt) { +- ret = search_and_update(h, data, key, cur_bkt, sig); ++ ret = search_and_update(h, data, key, cur_bkt, sig, old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); + *ret_val = ret; +@@ -997,7 +1001,8 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h, + struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key *key, void *data, + uint16_t sig, uint32_t bucket_idx, +- uint32_t new_idx, int32_t *ret_val) ++ uint32_t new_idx, int32_t *ret_val, ++ void **old_data) + { + unsigned int i; + struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN]; +@@ -1023,7 +1028,7 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h, + int32_t ret = rte_hash_cuckoo_move_insert_mw(h, + bkt, sec_bkt, key, data, + tail, i, sig, +- new_idx, ret_val); ++ new_idx, ret_val, old_data); + if (likely(ret != -1)) + return ret; + } +@@ -1076,6 +1081,29 @@ alloc_slot(const struct rte_hash *h, struct lcore_cache *cached_free_slots) + return slot_id; + } + ++/* ++ * When RCU is configured with a free function, auto-free the overwritten ++ * data pointer via RCU. ++ */ ++static inline void ++__hash_rcu_auto_free_old_data(const struct rte_hash *h, void *old_data_val) ++{ ++ struct __rte_hash_rcu_dq_entry rcu_dq_entry = { ++ .key_idx = EMPTY_SLOT, /* sentinel value for __hash_rcu_qsbr_free_resource */ ++ .old_data = old_data_val, ++ }; ++ ++ if (h->hash_rcu_cfg == NULL || h->hash_rcu_cfg->free_key_data_func == NULL) ++ return; ++ ++ if (h->dq == NULL || rte_rcu_qsbr_dq_enqueue(h->dq, &rcu_dq_entry) != 0) { ++ /* SYNC mode or enqueue failed in DQ mode */ ++ rte_rcu_qsbr_synchronize(h->hash_rcu_cfg->v, RTE_QSBR_THRID_INVALID); ++ h->hash_rcu_cfg->free_key_data_func(h->hash_rcu_cfg->key_data_ptr, ++ old_data_val); ++ } ++} ++ + static inline int32_t + __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void *data) +@@ -1092,6 +1120,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + struct lcore_cache *cached_free_slots = NULL; + int32_t ret_val; + struct rte_hash_bucket *last; ++ void *saved_old_data = NULL; + + short_sig = get_short_sig(sig); + prim_bucket_idx = get_prim_bucket_index(h, sig); +@@ -1103,18 +1132,20 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + + /* Check if key is already inserted in primary location */ + __hash_rw_writer_lock(h); +- ret = search_and_update(h, data, key, prim_bkt, short_sig); ++ ret = search_and_update(h, data, key, prim_bkt, short_sig, ++ &saved_old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); +- return ret; ++ goto overwrite; + } + + /* Check if key is already inserted in secondary location */ + FOR_EACH_BUCKET(cur_bkt, sec_bkt) { +- ret = search_and_update(h, data, key, cur_bkt, short_sig); ++ ret = search_and_update(h, data, key, cur_bkt, short_sig, ++ &saved_old_data); + if (ret != -1) { + __hash_rw_writer_unlock(h); +- return ret; ++ goto overwrite; + } + } + +@@ -1153,33 +1184,39 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + + /* Find an empty slot and insert */ + ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data, +- short_sig, slot_id, &ret_val); ++ short_sig, slot_id, &ret_val, ++ &saved_old_data); + if (ret == 0) + return slot_id - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); +- return ret_val; ++ ret = ret_val; ++ goto overwrite; + } + + /* Primary bucket full, need to make space for new entry */ + ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data, +- short_sig, prim_bucket_idx, slot_id, &ret_val); ++ short_sig, prim_bucket_idx, slot_id, &ret_val, ++ &saved_old_data); + if (ret == 0) + return slot_id - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); +- return ret_val; ++ ret = ret_val; ++ goto overwrite; + } + + /* Also search secondary bucket to get better occupancy */ + ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data, +- short_sig, sec_bucket_idx, slot_id, &ret_val); ++ short_sig, sec_bucket_idx, slot_id, &ret_val, ++ &saved_old_data); + + if (ret == 0) + return slot_id - 1; + else if (ret == 1) { + enqueue_slot_back(h, cached_free_slots, slot_id); +- return ret_val; ++ ret = ret_val; ++ goto overwrite; + } + + /* if ext table not enabled, we failed the insertion */ +@@ -1193,17 +1230,21 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + */ + __hash_rw_writer_lock(h); + /* We check for duplicates again since could be inserted before the lock */ +- ret = search_and_update(h, data, key, prim_bkt, short_sig); ++ ret = search_and_update(h, data, key, prim_bkt, short_sig, ++ &saved_old_data); + if (ret != -1) { + enqueue_slot_back(h, cached_free_slots, slot_id); +- goto failure; ++ __hash_rw_writer_unlock(h); ++ goto overwrite; + } + + FOR_EACH_BUCKET(cur_bkt, sec_bkt) { +- ret = search_and_update(h, data, key, cur_bkt, short_sig); ++ ret = search_and_update(h, data, key, cur_bkt, short_sig, ++ &saved_old_data); + if (ret != -1) { + enqueue_slot_back(h, cached_free_slots, slot_id); +- goto failure; ++ __hash_rw_writer_unlock(h); ++ goto overwrite; + } + } + +@@ -1263,6 +1304,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + __hash_rw_writer_unlock(h); + return slot_id - 1; + ++overwrite: ++ if (saved_old_data != NULL) ++ __hash_rcu_auto_free_old_data(h, saved_old_data); ++ return ret; ++ + failure: + __hash_rw_writer_unlock(h); + return ret; +@@ -1566,6 +1612,15 @@ __hash_rcu_qsbr_free_resource(void *p, void *e, unsigned int n) + *((struct __rte_hash_rcu_dq_entry *)e); + + RTE_SET_USED(n); ++ ++ if (rcu_dq_entry.key_idx == EMPTY_SLOT) { ++ /* Overwrite case: free old data only, do not recycle slot */ ++ RTE_ASSERT(h->hash_rcu_cfg->free_key_data_func != NULL); ++ h->hash_rcu_cfg->free_key_data_func(h->hash_rcu_cfg->key_data_ptr, ++ rcu_dq_entry.old_data); ++ return; ++ } ++ + keys = h->key_store; + + k = (struct rte_hash_key *) ((char *)keys + +diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h +index f692e0868dcf..e33f0aea0f5e 100644 +--- a/lib/hash/rte_hash.h ++++ b/lib/hash/rte_hash.h +@@ -226,7 +226,9 @@ rte_hash_max_key_id(const struct rte_hash *h); + * Thread safety can be enabled by setting flag during + * table creation. + * If the key exists already in the table, this API updates its value +- * with 'data' passed in this API. It is the responsibility of ++ * with 'data' passed in this API. If RCU is configured with a ++ * free_key_data_func callback, the old data is automatically ++ * deferred-freed via RCU. Otherwise, it is the responsibility of + * the application to manage any memory associated with the old value. + * The readers might still be using the old value even after this API + * has returned. +@@ -253,7 +255,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data); + * Thread safety can be enabled by setting flag during + * table creation. + * If the key exists already in the table, this API updates its value +- * with 'data' passed in this API. It is the responsibility of ++ * with 'data' passed in this API. If RCU is configured with a ++ * free_key_data_func callback, the old data is automatically ++ * deferred-freed via RCU. Otherwise, it is the responsibility of + * the application to manage any memory associated with the old value. + * The readers might still be using the old value even after this API + * has returned. +-- +2.53.0 +