From bf126da916cc2b56bfa75dcac8da12c5bd1d38dd Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 14:06:38 +0200 Subject: [PATCH 01/23] Parallelize realtime IPv4 ingress draining --- AGENTS.md | 35 +++- common/src/lib.rs | 3 + docs/engineering-notes.md | 186 +++++++++++++++++ ebpf-ipv4/src/main.rs | 94 ++++++++- ebpf-ipv6/src/main.rs | 6 +- rustiflow/src/realtime.rs | 291 ++++++++++++++++++++++++--- scripts/realtime_container_stress.sh | 183 +++++++++++++++++ 7 files changed, 750 insertions(+), 48 deletions(-) create mode 100755 scripts/realtime_container_stress.sh diff --git a/AGENTS.md b/AGENTS.md index 52c52c34..e1c5d873 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -117,22 +117,39 @@ in `docs/engineering-notes.md`. ### Current Focus -- [ ] Stabilize and measure before expanding the eBPF event payload further. -- [x] Finish the remaining TCP quality signals that current metadata already supports: - duplicate ACKs, zero-window events, and close style. -- [x] Add the next IP and path signals once they can be trusted in both offline - and realtime modes. +- [x] Keep the `rustiflow-t0` / `rustiflow-peer` container harness green as the + realtime throughput baseline: + `docker run --privileged --network host ... realtime rustiflow-t0 --ingress-only` + plus `iperf3 -c 10.203.0.2 -B 10.203.0.1 -u -b 2.5G -l 1400 -R`. +- [x] Prove where the current realtime bottleneck lives before redesigning it: + ring-buffer capacity, single-source drain task, shard channel backpressure, + or flow-table work. +- [x] Restructure realtime ingestion so more than one userspace task can drain + packet events in parallel instead of funnelling all ingress traffic through + one hot path in `rustiflow/src/realtime.rs`. +- [ ] Preserve semantic parity with offline mode while changing ingestion + structure: timestamps, packet lengths, biflow direction, expiration, and + export contents must remain aligned. +- [x] Add a repeatable throughput comparison after each structural change: + same `iperf3` command, same interface, same export mode, and explicit + `Total dropped packets before exit` capture. +- [x] Treat the redesign as successful only when the verification data improves: + fewer dropped packets on the single-flow `2.5G` case and materially better + behavior on the `-P 8` multi-flow ingress case. +- [ ] Decide whether the current multi-queue ring-buffer design should also be + extended to IPv6, or whether the next step should be the more invasive + transport rewrite captured as Option 2 in `docs/engineering-notes.md`. Primary files: -- `rustiflow/src/packet_features.rs` -- `rustiflow/src/pcap.rs` - `rustiflow/src/realtime.rs` -- `common/src/lib.rs` - `ebpf-ipv4/src/main.rs` - `ebpf-ipv6/src/main.rs` +- `common/src/lib.rs` +- `rustiflow/src/packet_features.rs` +- `rustiflow/src/flow_table.rs` - `rustiflow/src/flows/basic_flow.rs` -- `rustiflow/src/flows/features/` +- `docs/engineering-notes.md` ### Later Work diff --git a/common/src/lib.rs b/common/src/lib.rs index 15f257aa..52378206 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -2,6 +2,9 @@ pub use network_types::{icmp::IcmpHdr, tcp::TcpHdr, udp::UdpHdr}; +pub const REALTIME_EVENT_QUEUE_COUNT: usize = 4; +pub const REALTIME_EVENT_RINGBUF_BYTES: u32 = 1024 * 1024 * 64; + /// BasicFeaturesIpv4 is a struct collection all ipv4 traffic data. #[repr(C, packed)] #[derive(Copy, Clone)] diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 277bc0bb..dcb15304 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -78,3 +78,189 @@ This file keeps short-lived design choices and execution notes that would make treating non-first IPv4 fragments as if they started with a fresh transport header. Non-first IPv4 fragments should now be dropped while first fragments still parse their transport header normally. + +## 2026-03-27 + +- `rgbcore` now has a persistent local software-path performance harness for + RustiFlow realtime testing: + - host namespace capture side: `rustiflow-t0` + - peer namespace: `rustiflow-peer` + - peer interface inside that namespace: `rustiflow-p0` + - addressing: `10.203.0.1/30` on `rustiflow-t0` and `10.203.0.2/30` on + `rustiflow-p0` +- This harness is intended to isolate RustiFlow software-path performance from + physical LAN limits. It is valid for stressing local realtime ingestion, but + it is not a replacement for true physical wire-rate validation on real NICs. +- Realtime capture baseline command on `rgbcore`: + `RUST_LOG=info cargo xtask run --release -- -f basic -o csv --export-path /dev/null --performance-mode --threads 32 realtime rustiflow-t0 --ingress-only` +- Peer-side traffic generation baseline command: + `sudo ip netns exec rustiflow-peer iperf3 -s -B 10.203.0.2` +- Large-packet UDP stress baseline: + `iperf3 -c 10.203.0.2 -B 10.203.0.1 -u -b 1G -l 1400 -t 30` +- Result for that large-packet run: `iperf3` sustained `1.00 Gbit/s` and + RustiFlow reported `Total dropped packets: 0` on shutdown. +- Small-packet UDP stress command used for PPS pressure: + `iperf3 -c 10.203.0.2 -B 10.203.0.1 -u -b 1G -l 256 -t 30` +- Result for the `256`-byte run with RustiFlow enabled: about `630 Mbit/s`, + small `iperf3` receiver loss, and RustiFlow still reported `0` dropped + packets. +- Control result for the same `256`-byte run with RustiFlow disabled: about + `614 Mbit/s` with similarly small `iperf3` receiver loss. +- Current interpretation: the small-packet ceiling observed in this harness is + in the `iperf3`/kernel UDP packets-per-second path, not in RustiFlow's eBPF + event ingestion path. The local evidence currently supports that RustiFlow + can keep up with `1 Gbit/s` realtime traffic for larger UDP datagrams in the + software-path harness without reporting internal packet drops. + +## 2026-03-30 + +- Local container-based realtime stress testing is now the preferred workflow + on `rgbcore` instead of direct binary execution. +- The working setup depends on three pieces: + - the persistent local harness from `2026-03-27` (`rustiflow-t0`, + `rustiflow-peer`, `rustiflow-p0`) + - a long-lived `iperf3` server inside `rustiflow-peer` bound to `10.203.0.2` + - privileged host-network RustiFlow containers built from `Dockerfile` or + `Dockerfile-slim` +- With that setup in place, the containerized end-to-end loop can be driven + independently from `rgbcore`: + - start RustiFlow in a privileged host-network container on `rustiflow-t0` + - generate reverse-mode UDP traffic with `iperf3 -R` so the hot stream is + ingress on `rustiflow-t0` + - stop the container with `docker kill -s INT ...` so RustiFlow prints its + dropped-packet summary before exit +- Verified containerized cases: + - `rustiflow:test-slim`, `1G`, `1400`-byte UDP, one processing thread: + `0` RustiFlow drops and the exported CSV contains the expected large UDP + flow + - `rustiflow:test-slim`, target `2.5G`, `1400`-byte UDP, one processing + thread: achieved about `2.19 Gbit/s`, repeated ring-buffer reservation + failures, and `201510` dropped packets reported by RustiFlow + - `rustiflow:test-full`, `1G`, `1400`-byte UDP, one processing thread: + `0` RustiFlow drops +- Current operational pain point: when the realtime path starts dropping at + high rate, the eBPF-side `error!` log on ring-buffer reservation failure + floods container logs and makes result collection noisy. +- `scripts/realtime_container_stress.sh` now standardizes the local + containerized realtime baseline on `rgbcore`. It starts a privileged + RustiFlow container on `rustiflow-t0`, drives reverse-mode `iperf3` traffic + against `10.203.0.2`, interrupts RustiFlow cleanly with `SIGINT`, and prints + the receiver bitrate plus `Total dropped packets before exit`. +- That log has been demoted from `error!` to `debug!` in both eBPF programs. + The authoritative overload signal remains the `DROPPED_PACKETS` counter + consumed by userspace and reported at shutdown, so container logs stay quiet + while the dropped-packet summary remains accurate. +- Slim-image thread matrix on the fixed `iperf3 -u -b 2.5G -l 1400 -R` + workload: + - `--threads 1`: receiver bitrate about `2.21 Gbit/s`, dropped packets + `261634` + - `--threads 2`: receiver bitrate about `2.26 Gbit/s`, dropped packets + `277063` + - `--threads 4`: receiver bitrate about `2.20 Gbit/s`, dropped packets + `199182` + - `--threads 8`: receiver bitrate about `2.24 Gbit/s`, dropped packets + `272799` +- Ring-buffer-size experiment: + - increasing both realtime eBPF ring buffers from `20 MB` to `64 MB` had a + large effect on the single-flow case + - with `--threads 4` and `iperf3 -u -b 2.5G -l 1400 -P 1 -R`, receiver + bitrate improved to about `2.32 Gbit/s` and RustiFlow reported `0` dropped + packets + - with `--threads 4` and `iperf3 -u -b 2.5G -l 1400 -P 8 -R`, the harness + still achieved about `14.8 Gbit/s` aggregate and RustiFlow still dropped + about `9182037` packets + - current interpretation: extra ring-buffer capacity materially improves + burst absorption and the moderate single-flow case, but it does not remove + the real ingestion ceiling once the single userspace drain path is driven + far beyond what it can sustainably consume +- Realtime drain-path instrumentation now logs per-source shutdown stats from + `rustiflow/src/realtime.rs`: total events, decode-and-shard time, shard-send + wait time, and total per-event time. +- The instrumentation perturbs absolute throughput enough that the raw + `iperf3` rates should not be treated as new baselines, but the ratios are + still useful for locating the bottleneck. +- On the active `ingress-ipv4` source, shard-send wait time dominated + decode-and-shard time in both inspected cases: + - `-P 1`, `2.5G`, `--threads 4`, `5s`: about `565620` events, `65 ms` + decode-and-shard time, `74 ms` shard-send wait time + - `-P 8`, `2.5G`, `--threads 4`, `5s`: about `4760253` events, `507 ms` + decode-and-shard time, `1363 ms` shard-send wait time +- Current interpretation: the single userspace drain task is real bottleneck + surface, and a large part of its hot-path cost is awaiting shard-channel + capacity rather than only decoding eBPF events. That supports redesigning + realtime ingestion so packet draining and dispatch can parallelize more + effectively before work reaches the flow-table shards. +- Realtime dispatch now batches packets per shard before sending them into the + flow-table workers instead of awaiting one channel send per packet. +- This batching change improved the moderate single-flow case without changing + the public flow semantics: + - `64 MB` ring buffer, `--threads 4`, `iperf3 -u -b 2.5G -l 1400 -P 1 -R`: + about `2.27 Gbit/s`, `0` dropped packets +- The same batching change only modestly improved the overloaded multi-flow + case: + - `64 MB` ring buffer, `--threads 4`, `iperf3 -u -b 2.5G -l 1400 -P 8 -R`: + about `14.4 Gbit/s`, `8929817` dropped packets +- Current interpretation: per-packet dispatch overhead mattered, but the + current architecture is still limited by a single ingress ring-buffer drain + task under very high aggregate load. The remaining redesign target is still a + more parallel ingress structure, not only better batching. +- Realtime redesign options considered: + - Option 1: keep the current event model, but split the hot ingress source + into multiple eBPF event maps and drain them in parallel from userspace + before handing work to the flow-table shards + - Option 2: replace the current ring-buffer transport with a transport that + more naturally supports parallel userspace consumption, most likely a + per-CPU perf-event style design +- Option 1 assessment: + - lower-risk extension of the current architecture + - keeps `PacketFeatures`, flow semantics, and most userspace processing + structure intact + - directly targets the measured bottleneck without a transport rewrite + - still requires careful fanout design and more eBPF/userspace map plumbing +- Option 2 assessment: + - stronger long-term scalability story + - cleaner fit for parallel userspace draining + - materially more invasive because it changes both eBPF emission and + userspace event transport + - higher semantic and validation risk than Option 1 +- Current branch decision: + - pursue Option 1 first on `codex/ingestion-throughput-parallellization` + - keep Option 2 as the likely next escalation if Option 1 does not improve + the overloaded multi-flow ingress case enough +- Option 1 is now implemented for the hot IPv4 realtime path: + - the eBPF IPv4 program now emits into four fixed ring buffers + (`EVENTS_IPV4_0` through `EVENTS_IPV4_3`) instead of one shared + `EVENTS_IPV4` map + - queue selection happens in eBPF from a canonical biflow-style IPv4 + endpoint ordering so both directions of the same flow land on the same + queue + - userspace now loads and drains those four IPv4 ring buffers as independent + Tokio tasks before handing work to the existing shard workers + - IPv6 remains on the old single-queue path for now; this first pass is + deliberately bounded to the proven hot path and should not be treated as a + full transport redesign +- Validation of the implemented Option 1 shape on `rustiflow:test-slim`, + `rustiflow-t0`, `--threads 4`: + - single-flow ingress case, `iperf3 -u -b 2.5G -l 1400 -P 1 -R`, `10s`: + receiver bitrate about `2.18 Gbit/s`, `0` dropped packets + - multi-flow ingress case, `iperf3 -u -b 2.5G -l 1400 -P 8 -R`, `10s`: + receiver bitrate between about `15.7` and `16.3 Gbit/s`, dropped packets + between about `641688` and `1233317` + - compared with the earlier single-ring-buffer result on the same + `-P 8` shape (`14.4 Gbit/s`, `8929817` dropped packets), the multi-queue + ingress design materially reduced overload drops +- A short stats-enabled `-P 8`, `5s` run also confirms that the new userspace + drain path is genuinely parallel rather than just cosmetically split: + - `ingress-ipv4-q0`: `829312` events + - `ingress-ipv4-q1`: `2600892` events + - `ingress-ipv4-q2`: `2662511` events + - `ingress-ipv4-q3`: `813676` events + - all four IPv4 drain tasks were active, and RustiFlow reported `0` dropped + packets for that shorter run +- Current interpretation after Option 1: + - the original single-source bottleneck was real + - bounded multi-queue ingress fanout buys substantial headroom on the + overloaded IPv4 multi-flow case + - distribution across the four queues is not perfectly even, so there is + still room to tune the fanout function or escalate to the Option 2 + transport rewrite later if needed diff --git a/ebpf-ipv4/src/main.rs b/ebpf-ipv4/src/main.rs index 8dc5a846..cee9471e 100644 --- a/ebpf-ipv4/src/main.rs +++ b/ebpf-ipv4/src/main.rs @@ -9,11 +9,12 @@ use aya_ebpf::{ maps::{PerCpuArray, RingBuf}, programs::TcContext, }; -use aya_log_ebpf::error; +use aya_log_ebpf::debug; use common::EbpfEventIpv4; use common::IcmpHdr; use common::NetworkHeader; +use common::REALTIME_EVENT_RINGBUF_BYTES; use common::TcpHdr; use common::UdpHdr; use network_types::{ @@ -30,7 +31,16 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { static DROPPED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); #[map] -static EVENTS_IPV4: RingBuf = RingBuf::with_byte_size(1024 * 1024 * 20, 0); // 20 MB +static EVENTS_IPV4_0: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_1: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_2: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_3: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); #[classifier] pub fn tc_flow_track(ctx: TcContext) -> i32 { @@ -60,18 +70,83 @@ fn process_packet(ctx: &TcContext) -> Result { } #[inline(always)] -fn submit_ipv4_event(ctx: &TcContext, event: EbpfEventIpv4) { - if let Some(mut entry) = EVENTS_IPV4.reserve::(0) { +fn submit_ipv4_event(ctx: &TcContext, event: EbpfEventIpv4, queue_index: u32) { + let reserved = match queue_index { + 0 => reserve_ipv4_event(&EVENTS_IPV4_0, event), + 1 => reserve_ipv4_event(&EVENTS_IPV4_1, event), + 2 => reserve_ipv4_event(&EVENTS_IPV4_2, event), + _ => reserve_ipv4_event(&EVENTS_IPV4_3, event), + }; + + if !reserved { + increment_dropped_packets(); + debug!(ctx, "Failed to reserve entry in ring buffer."); + } +} + +#[inline(always)] +fn reserve_ipv4_event(queue: &RingBuf, event: EbpfEventIpv4) -> bool { + if let Some(mut entry) = queue.reserve::(0) { *entry = core::mem::MaybeUninit::new(event); entry.submit(0); + true + } else { + false + } +} + +#[inline(always)] +fn increment_dropped_packets() { + if let Some(counter) = DROPPED_PACKETS.get_ptr_mut(0) { + unsafe { *counter += 1 }; + } +} + +#[inline(always)] +fn queue_index_ipv4(packet_info: &PacketInfo, header: &impl NetworkHeader) -> u32 { + let (first_ip, first_port, second_ip, second_port) = canonical_ipv4_endpoints( + packet_info.ipv4_source, + header.source_port(), + packet_info.ipv4_destination, + header.destination_port(), + ); + let hash = mix_u32(first_ip) + ^ mix_u32(second_ip).rotate_left(7) + ^ mix_u16(first_port).rotate_left(13) + ^ mix_u16(second_port).rotate_left(19) + ^ u32::from(packet_info.protocol).rotate_left(27); + hash & 0b11 +} + +#[inline(always)] +fn canonical_ipv4_endpoints( + source_ip: u32, + source_port: u16, + destination_ip: u32, + destination_port: u16, +) -> (u32, u16, u32, u16) { + if source_ip < destination_ip || (source_ip == destination_ip && source_port <= destination_port) + { + (source_ip, source_port, destination_ip, destination_port) } else { - if let Some(counter) = DROPPED_PACKETS.get_ptr_mut(0) { - unsafe { *counter += 1 }; - } - error!(ctx, "Failed to reserve entry in ring buffer."); + (destination_ip, destination_port, source_ip, source_port) } } +#[inline(always)] +fn mix_u32(mut value: u32) -> u32 { + value ^= value >> 16; + value = value.wrapping_mul(0x7feb_352d); + value ^= value >> 15; + value = value.wrapping_mul(0x846c_a68b); + value ^ (value >> 16) +} + +#[inline(always)] +fn mix_u16(value: u16) -> u32 { + mix_u32(u32::from(value)) +} + fn process_transport_packet( ctx: &TcContext, packet_info: &PacketInfo, @@ -79,8 +154,9 @@ fn process_transport_packet( ) -> Result { let hdr = ctx.load::(header_offset).map_err(|_| ())?; let packet_log = packet_info.to_packet_log(&hdr); + let queue_index = queue_index_ipv4(packet_info, &hdr); - submit_ipv4_event(ctx, packet_log); + submit_ipv4_event(ctx, packet_log, queue_index); Ok(TC_ACT_PIPE) } diff --git a/ebpf-ipv6/src/main.rs b/ebpf-ipv6/src/main.rs index a1d69c34..4620d8f6 100644 --- a/ebpf-ipv6/src/main.rs +++ b/ebpf-ipv6/src/main.rs @@ -9,7 +9,7 @@ use aya_ebpf::{ maps::{PerCpuArray, RingBuf}, programs::TcContext, }; -use aya_log_ebpf::error; +use aya_log_ebpf::debug; use common::{EbpfEventIpv6, IcmpHdr, NetworkHeader, TcpHdr, UdpHdr}; use network_types::{ @@ -26,7 +26,7 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { static DROPPED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); #[map] -static EVENTS_IPV6: RingBuf = RingBuf::with_byte_size(1024 * 1024 * 10 * 2, 0); // 20 MB +static EVENTS_IPV6: RingBuf = RingBuf::with_byte_size(1024 * 1024 * 64, 0); // 64 MB #[classifier] pub fn tc_flow_track(ctx: TcContext) -> i32 { @@ -122,7 +122,7 @@ fn submit_ipv6_event(ctx: &TcContext, event: EbpfEventIpv6) { if let Some(counter) = DROPPED_PACKETS.get_ptr_mut(0) { unsafe { *counter += 1 }; } - error!(ctx, "Failed to reserve entry in ring buffer."); + debug!(ctx, "Failed to reserve entry in ring buffer."); } } diff --git a/rustiflow/src/realtime.rs b/rustiflow/src/realtime.rs index 5e1cea1a..f7eae575 100644 --- a/rustiflow/src/realtime.rs +++ b/rustiflow/src/realtime.rs @@ -1,7 +1,11 @@ use std::hash::{DefaultHasher, Hash, Hasher}; use std::io; use std::path::PathBuf; -use std::sync::Arc; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; +use std::time::Instant; use crate::debug; use crate::flow_tui::launch_packet_tui; @@ -10,12 +14,12 @@ use crate::realtime_mode::PacketGraphMode; use crate::{flow_table::FlowTable, flows::flow::Flow, packet_features::PacketFeatures}; use anyhow::Context; use aya::{ - maps::{PerCpuArray, RingBuf}, + maps::{MapData, PerCpuArray, RingBuf}, programs::{tc, SchedClassifier, TcAttachType}, Ebpf, }; use aya_log::EbpfLogger; -use common::{EbpfEventIpv4, EbpfEventIpv6}; +use common::{EbpfEventIpv4, EbpfEventIpv6, REALTIME_EVENT_QUEUE_COUNT}; use log::{error, info}; use tokio::sync::watch; use tokio::{ @@ -26,6 +30,76 @@ use tokio::{ task::JoinSet, }; +#[derive(Default)] +struct RealtimeSourceStats { + events: AtomicU64, + decode_and_shard_ns: AtomicU64, + send_wait_ns: AtomicU64, + packet_graph_ns: AtomicU64, + total_event_ns: AtomicU64, + send_errors: AtomicU64, +} + +impl RealtimeSourceStats { + fn add_decode_and_shard_ns(&self, value: u64) { + self.decode_and_shard_ns.fetch_add(value, Ordering::Relaxed); + } + + fn add_send_wait_ns(&self, value: u64) { + self.send_wait_ns.fetch_add(value, Ordering::Relaxed); + } + + fn add_packet_graph_ns(&self, value: u64) { + self.packet_graph_ns.fetch_add(value, Ordering::Relaxed); + } + + fn add_total_event_ns(&self, value: u64) { + self.total_event_ns.fetch_add(value, Ordering::Relaxed); + } + + fn increment_events(&self) { + self.events.fetch_add(1, Ordering::Relaxed); + } + + fn increment_send_errors(&self) { + self.send_errors.fetch_add(1, Ordering::Relaxed); + } +} + +fn elapsed_ns(start: Instant) -> u64 { + start.elapsed().as_nanos().min(u64::MAX as u128) as u64 +} + +fn log_source_stats(label: &str, stats: &RealtimeSourceStats) { + let events = stats.events.load(Ordering::Relaxed); + let decode_and_shard_ns = stats.decode_and_shard_ns.load(Ordering::Relaxed); + let send_wait_ns = stats.send_wait_ns.load(Ordering::Relaxed); + let packet_graph_ns = stats.packet_graph_ns.load(Ordering::Relaxed); + let total_event_ns = stats.total_event_ns.load(Ordering::Relaxed); + let send_errors = stats.send_errors.load(Ordering::Relaxed); + + if events == 0 { + info!("Realtime source {}: no events drained", label); + return; + } + + info!( + "Realtime source {}: events={} total_ms={:.3} decode_ms={:.3} send_wait_ms={:.3} packet_graph_ms={:.3} avg_event_us={:.3} avg_send_wait_us={:.3} send_errors={}", + label, + events, + total_event_ns as f64 / 1_000_000.0, + decode_and_shard_ns as f64 / 1_000_000.0, + send_wait_ns as f64 / 1_000_000.0, + packet_graph_ns as f64 / 1_000_000.0, + total_event_ns as f64 / events as f64 / 1_000.0, + send_wait_ns as f64 / events as f64 / 1_000.0, + send_errors, + ); +} + +const SHARD_BATCH_SIZE: usize = 128; +const SHARD_QUEUE_CAPACITY: usize = 512; + /// Starts the realtime processing of packets on the given interface. /// The function will return the number of packets dropped by the eBPF program. #[allow(clippy::too_many_arguments)] @@ -51,7 +125,7 @@ where // Load the eBPF programs and attach to the event arrays let mut bpf_ingress_ipv4 = load_ebpf_ipv4(interface, TcAttachType::Ingress)?; let mut bpf_ingress_ipv6 = load_ebpf_ipv6(interface, TcAttachType::Ingress)?; - let events_ingress_ipv4 = RingBuf::try_from(bpf_ingress_ipv4.take_map("EVENTS_IPV4").unwrap())?; + let events_ingress_ipv4 = take_ring_buf_maps(&mut bpf_ingress_ipv4, "EVENTS_IPV4")?; let dropped_packets_ingress_ipv4: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_ingress_ipv4.take_map("DROPPED_PACKETS").unwrap())?; let events_ingress_ipv6 = RingBuf::try_from(bpf_ingress_ipv6.take_map("EVENTS_IPV6").unwrap())?; @@ -64,16 +138,21 @@ where if !ingress_only { let mut bpf_egress_ipv4 = load_ebpf_ipv4(interface, TcAttachType::Egress)?; let mut bpf_egress_ipv6 = load_ebpf_ipv6(interface, TcAttachType::Egress)?; - let events_egress_ipv4 = - RingBuf::try_from(bpf_egress_ipv4.take_map("EVENTS_IPV4").unwrap())?; + let events_egress_ipv4 = take_ring_buf_maps(&mut bpf_egress_ipv4, "EVENTS_IPV4")?; let dropped_packets_egress_ipv4: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_egress_ipv4.take_map("DROPPED_PACKETS").unwrap())?; let events_egress_ipv6 = RingBuf::try_from(bpf_egress_ipv6.take_map("EVENTS_IPV6").unwrap())?; let dropped_packets_egress_ipv6: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_egress_ipv6.take_map("DROPPED_PACKETS").unwrap())?; - event_sources_v4 = vec![events_egress_ipv4, events_ingress_ipv4]; - event_sources_v6 = vec![events_egress_ipv6, events_ingress_ipv6]; + event_sources_v4 = labeled_ringbuf_sources("egress-ipv4", events_egress_ipv4) + .into_iter() + .chain(labeled_ringbuf_sources("ingress-ipv4", events_ingress_ipv4)) + .collect(); + event_sources_v6 = vec![ + ("egress-ipv6", events_egress_ipv6), + ("ingress-ipv6", events_ingress_ipv6), + ]; dropped_packet_counters = vec![ dropped_packets_egress_ipv4, dropped_packets_ingress_ipv4, @@ -81,13 +160,13 @@ where dropped_packets_ingress_ipv6, ]; } else { - event_sources_v4 = vec![events_ingress_ipv4]; - event_sources_v6 = vec![events_ingress_ipv6]; + event_sources_v4 = labeled_ringbuf_sources("ingress-ipv4", events_ingress_ipv4); + event_sources_v6 = vec![("ingress-ipv6", events_ingress_ipv6)]; dropped_packet_counters = vec![dropped_packets_ingress_ipv4, dropped_packets_ingress_ipv6]; } - let buffer_num_packets = 10_000; let mut shard_senders = Vec::with_capacity(num_threads as usize); + let enable_source_stats = std::env::var_os("RUSTIFLOW_REALTIME_STATS").is_some(); let (packet_graph, packet_rx) = match packet_graph_mode { PacketGraphMode::Enabled => { let (packet_tx, packet_rx) = watch::channel(Vec::new()); @@ -104,7 +183,7 @@ where debug!("Creating {} sharded FlowTables...", num_threads); for _ in 0..num_threads { - let (tx, mut rx) = mpsc::channel::(buffer_num_packets); + let (tx, mut rx) = mpsc::channel::>(SHARD_QUEUE_CAPACITY); let mut flow_table = FlowTable::new( active_timeout, idle_timeout, @@ -116,9 +195,11 @@ where // Spawn a task per shard tokio::spawn(async move { let mut last_timestamp = None; - while let Some(packet_features) = rx.recv().await { - last_timestamp = Some(packet_features.timestamp_us); - flow_table.process_packet(&packet_features).await; + while let Some(packet_batch) = rx.recv().await { + for packet_features in packet_batch { + last_timestamp = Some(packet_features.timestamp_us); + flow_table.process_packet(&packet_features).await; + } } debug!("Shard finished processing packets"); // Handle flow exporting when the receiver is closed @@ -132,14 +213,18 @@ where // Spawn a task per event source let mut handle_set = JoinSet::new(); + let mut source_stats = Vec::new(); - for ebpf_event_source in event_sources_v4 { + for (label, ebpf_event_source) in event_sources_v4 { let shard_senders_clone = shard_senders.clone(); let packet_graph = packet_graph.clone(); + let stats = enable_source_stats.then(|| Arc::new(RealtimeSourceStats::default())); + source_stats.push((label, stats.clone())); handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio let mut async_ring_buf = AsyncFd::new(ebpf_event_source).unwrap(); + let mut pending_batches = create_pending_batches(num_threads as usize); loop { // Wait for data to be available in the ring buffer @@ -147,37 +232,65 @@ where let ring_buf = guard.get_inner_mut(); while let Some(event) = ring_buf.next() { + let event_start = enable_source_stats.then(Instant::now); if let Some(packet_graph) = &packet_graph { + let packet_graph_start = enable_source_stats.then(Instant::now); packet_graph.record_packet().await; + if let (Some(stats), Some(packet_graph_start)) = + (&stats, packet_graph_start) + { + stats.add_packet_graph_ns(elapsed_ns(packet_graph_start)); + } } + let decode_start = enable_source_stats.then(Instant::now); let ebpf_event_ipv4: EbpfEventIpv4 = unsafe { std::ptr::read(event.as_ptr() as *const _) }; let packet_features = PacketFeatures::from_ebpf_event_ipv4(&ebpf_event_ipv4, realtime_offset_us); let flow_key = packet_features.biflow_key_value(); let shard_index = compute_shard_index(&flow_key, num_threads); + if let (Some(stats), Some(decode_start)) = (&stats, decode_start) { + stats.add_decode_and_shard_ns(elapsed_ns(decode_start)); + } + pending_batches[shard_index].push(packet_features); + + if pending_batches[shard_index].len() >= SHARD_BATCH_SIZE { + flush_shard_batch( + &shard_senders_clone[shard_index], + &mut pending_batches[shard_index], + stats.as_ref(), + shard_index, + ) + .await; + } - if let Err(e) = shard_senders_clone[shard_index].send(packet_features).await { - error!( - "Failed to send packet_features to shard {}: {}", - shard_index, e - ); + if let Some(stats) = &stats { + stats.increment_events(); + if let Some(event_start) = event_start { + stats.add_total_event_ns(elapsed_ns(event_start)); + } } } + flush_pending_batches(&shard_senders_clone, &mut pending_batches, stats.as_ref()) + .await; + // Clear the readiness state for the next iteration guard.clear_ready(); } }); } - for ebpf_event_source in event_sources_v6 { + for (label, ebpf_event_source) in event_sources_v6 { let shard_senders_clone = shard_senders.clone(); let packet_graph = packet_graph.clone(); + let stats = enable_source_stats.then(|| Arc::new(RealtimeSourceStats::default())); + source_stats.push((label, stats.clone())); handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio let mut async_ring_buf = AsyncFd::new(ebpf_event_source).unwrap(); + let mut pending_batches = create_pending_batches(num_threads as usize); loop { // Wait for data to be available in the ring buffer @@ -185,24 +298,49 @@ where let ring_buf = guard.get_inner_mut(); while let Some(event) = ring_buf.next() { + let event_start = enable_source_stats.then(Instant::now); if let Some(packet_graph) = &packet_graph { + let packet_graph_start = enable_source_stats.then(Instant::now); packet_graph.record_packet().await; + if let (Some(stats), Some(packet_graph_start)) = + (&stats, packet_graph_start) + { + stats.add_packet_graph_ns(elapsed_ns(packet_graph_start)); + } } + let decode_start = enable_source_stats.then(Instant::now); let ebpf_event_ipv6: EbpfEventIpv6 = unsafe { std::ptr::read(event.as_ptr() as *const _) }; let packet_features = PacketFeatures::from_ebpf_event_ipv6(&ebpf_event_ipv6, realtime_offset_us); let flow_key = packet_features.biflow_key_value(); let shard_index = compute_shard_index(&flow_key, num_threads); + if let (Some(stats), Some(decode_start)) = (&stats, decode_start) { + stats.add_decode_and_shard_ns(elapsed_ns(decode_start)); + } + pending_batches[shard_index].push(packet_features); + + if pending_batches[shard_index].len() >= SHARD_BATCH_SIZE { + flush_shard_batch( + &shard_senders_clone[shard_index], + &mut pending_batches[shard_index], + stats.as_ref(), + shard_index, + ) + .await; + } - if let Err(e) = shard_senders_clone[shard_index].send(packet_features).await { - error!( - "Failed to send packet_features to shard {}: {}", - shard_index, e - ); + if let Some(stats) = &stats { + stats.increment_events(); + if let Some(event_start) = event_start { + stats.add_total_event_ns(elapsed_ns(event_start)); + } } } + flush_pending_batches(&shard_senders_clone, &mut pending_batches, stats.as_ref()) + .await; + // Clear the readiness state for the next iteration guard.clear_ready(); } @@ -234,6 +372,11 @@ where } info!("Total dropped packets before exit: {}", total_dropped); + for (label, stats) in &source_stats { + if let Some(stats) = stats.as_ref() { + log_source_stats(label, stats); + } + } // Cancel the tasks reading ebpf events handle_set.abort_all(); @@ -259,6 +402,54 @@ where Ok(total_dropped) } +fn create_pending_batches(num_shards: usize) -> Vec> { + std::iter::repeat_with(|| Vec::with_capacity(SHARD_BATCH_SIZE)) + .take(num_shards) + .collect() +} + +async fn flush_pending_batches( + shard_senders: &[Sender>], + pending_batches: &mut [Vec], + stats: Option<&Arc>, +) { + for (shard_index, pending_batch) in pending_batches.iter_mut().enumerate() { + flush_shard_batch( + &shard_senders[shard_index], + pending_batch, + stats, + shard_index, + ) + .await; + } +} + +async fn flush_shard_batch( + shard_sender: &Sender>, + pending_batch: &mut Vec, + stats: Option<&Arc>, + shard_index: usize, +) { + if pending_batch.is_empty() { + return; + } + + let batch = std::mem::replace(pending_batch, Vec::with_capacity(SHARD_BATCH_SIZE)); + let send_start = stats.as_ref().map(|_| Instant::now()); + if let Err(e) = shard_sender.send(batch).await { + if let Some(stats) = stats { + stats.increment_send_errors(); + } + error!( + "Failed to send packet batch to shard {}: {}", + shard_index, e + ); + } + if let (Some(stats), Some(send_start)) = (stats, send_start) { + stats.add_send_wait_ns(elapsed_ns(send_start)); + } +} + #[derive(Clone)] struct PacketGraphState { packet_counter: Arc>, @@ -288,6 +479,52 @@ fn compute_realtime_offset_us() -> Result { Ok(realtime_us - monotonic_us) } +fn labeled_ringbuf_sources( + label_prefix: &'static str, + ring_bufs: Vec>, +) -> Vec<(&'static str, RingBuf)> { + ring_bufs + .into_iter() + .enumerate() + .map(|(index, ring_buf)| (queue_label(label_prefix, index), ring_buf)) + .collect() +} + +fn queue_label(label_prefix: &'static str, index: usize) -> &'static str { + match (label_prefix, index) { + ("ingress-ipv4", 0) => "ingress-ipv4-q0", + ("ingress-ipv4", 1) => "ingress-ipv4-q1", + ("ingress-ipv4", 2) => "ingress-ipv4-q2", + ("ingress-ipv4", 3) => "ingress-ipv4-q3", + ("egress-ipv4", 0) => "egress-ipv4-q0", + ("egress-ipv4", 1) => "egress-ipv4-q1", + ("egress-ipv4", 2) => "egress-ipv4-q2", + ("egress-ipv4", 3) => "egress-ipv4-q3", + _ => panic!( + "unexpected realtime queue label: {}-{}", + label_prefix, index + ), + } +} + +fn take_ring_buf_maps( + bpf: &mut Ebpf, + base_name: &str, +) -> Result>, anyhow::Error> { + let mut ring_bufs = Vec::with_capacity(REALTIME_EVENT_QUEUE_COUNT); + + for index in 0..REALTIME_EVENT_QUEUE_COUNT { + let map_name = format!("{}_{}", base_name, index); + let ring_buf = RingBuf::try_from( + bpf.take_map(&map_name) + .with_context(|| format!("missing ring buffer map {}", map_name))?, + )?; + ring_bufs.push(ring_buf); + } + + Ok(ring_bufs) +} + fn read_clock_us(clock_id: libc::clockid_t) -> Result { let mut ts = libc::timespec { tv_sec: 0, diff --git a/scripts/realtime_container_stress.sh b/scripts/realtime_container_stress.sh new file mode 100755 index 00000000..611f5846 --- /dev/null +++ b/scripts/realtime_container_stress.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: scripts/realtime_container_stress.sh [OPTIONS] + +Run the local containerized RustiFlow realtime stress harness on rgbcore. + +This script assumes: +- the persistent local test harness exists +- rustiflow-t0 is the host-side capture interface +- an iperf3 server is already running inside rustiflow-peer on 10.203.0.2:5201 + +Options: + --image NAME Docker image to run (default: rustiflow:test-slim) + --container NAME Container name prefix (default: rustiflow-stress) + --features NAME Flow type to export (default: rustiflow) + --interface NAME Capture interface (default: rustiflow-t0) + --threads N RustiFlow worker threads (default: 4) + --bitrate RATE iperf3 UDP target bitrate (default: 2.5G) + --length BYTES iperf3 UDP payload length (default: 1400) + --parallel N iperf3 parallel streams (default: 1) + --duration SEC iperf3 run duration in seconds (default: 15) + --export-path PATH CSV export path inside the host filesystem + (default: target/realtime-stress/rustiflow-stress.csv) + -h, --help Show this help text + +Example: + scripts/realtime_container_stress.sh --threads 4 --bitrate 2.5G --parallel 1 +EOF +} + +image="rustiflow:test-slim" +container_prefix="rustiflow-stress" +features="rustiflow" +interface="rustiflow-t0" +threads=4 +bitrate="2.5G" +length=1400 +parallel=1 +duration=15 +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root="$(cd "$script_dir/.." && pwd)" +export_path="$repo_root/target/realtime-stress/rustiflow-stress.csv" + +while [[ $# -gt 0 ]]; do + case "$1" in + --image) + image="$2" + shift 2 + ;; + --container) + container_prefix="$2" + shift 2 + ;; + --features) + features="$2" + shift 2 + ;; + --interface) + interface="$2" + shift 2 + ;; + --threads) + threads="$2" + shift 2 + ;; + --bitrate) + bitrate="$2" + shift 2 + ;; + --length) + length="$2" + shift 2 + ;; + --parallel) + parallel="$2" + shift 2 + ;; + --duration) + duration="$2" + shift 2 + ;; + --export-path) + export_path="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "error: unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if ! command -v docker >/dev/null 2>&1; then + echo "error: docker is required" >&2 + exit 1 +fi + +if ! command -v iperf3 >/dev/null 2>&1; then + echo "error: iperf3 is required" >&2 + exit 1 +fi + +if ! docker image inspect "$image" >/dev/null 2>&1; then + echo "error: docker image not found: $image" >&2 + exit 1 +fi + +container_name="${container_prefix}-${threads}t-${parallel}p" +if [[ "$export_path" != /* ]]; then + export_path="$repo_root/$export_path" +fi +export_dir="$(dirname "$export_path")" + +cleanup() { + docker rm -f "$container_name" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +rm -f "$export_path" +mkdir -p "$export_dir" +docker rm -f "$container_name" >/dev/null 2>&1 || true + +docker run -d \ + --name "$container_name" \ + --privileged \ + --network host \ + -v "$export_dir:/tmp" \ + "$image" \ + -f "$features" \ + -o csv \ + --header \ + --export-path "/tmp/$(basename "$export_path")" \ + --performance-mode \ + --threads "$threads" \ + --early-export 5 \ + realtime "$interface" --ingress-only >/dev/null + +sleep 2 + +iperf_output="$( + iperf3 \ + -c 10.203.0.2 \ + -B 10.203.0.1 \ + -u \ + -b "$bitrate" \ + -l "$length" \ + -P "$parallel" \ + -t "$duration" \ + -R +)" + +docker kill -s INT "$container_name" >/dev/null +sleep 1 +logs="$(docker logs --tail 100 "$container_name" 2>&1)" + +if [[ "$parallel" -eq 1 ]]; then + receiver_line="$(printf '%s\n' "$iperf_output" | awk '/receiver$/ {line=$0} END {print line}')" + receiver_bitrate="$(printf '%s\n' "$receiver_line" | awk '{print $7 " " $8}')" +else + receiver_line="$(printf '%s\n' "$iperf_output" | awk '/SUM.*receiver$/ {line=$0} END {print line}')" + receiver_bitrate="$(printf '%s\n' "$receiver_line" | awk '{print $6 " " $7}')" +fi + +dropped_packets="$(printf '%s\n' "$logs" | sed -n 's/.*Total dropped packets before exit: //p' | tail -n1)" + +printf 'image: %s\n' "$image" +printf 'interface: %s\n' "$interface" +printf 'threads: %s\n' "$threads" +printf 'bitrate_target: %s\n' "$bitrate" +printf 'parallel_streams: %s\n' "$parallel" +printf 'udp_length: %s\n' "$length" +printf 'duration_s: %s\n' "$duration" +printf 'receiver_bitrate: %s\n' "${receiver_bitrate:-missing}" +printf 'dropped_packets: %s\n' "${dropped_packets:-missing}" +printf 'export_path: %s\n' "$export_path" From c230b7d92c96cd1de4604e662f3cff74d4e0f588 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:07:35 +0200 Subject: [PATCH 02/23] Tune realtime throughput defaults --- README.md | 2 +- docs/engineering-notes.md | 79 ++++++++++++++++++++++++++++ rustiflow/src/args.rs | 4 +- rustiflow/src/main.rs | 23 +++++++- rustiflow/src/realtime.rs | 40 ++++++++++---- scripts/realtime_container_stress.sh | 40 +++++++++++--- 6 files changed, 165 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e1ee1c89..d05666cc 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,7 @@ Options: [default: 60] --threads - The numbers of threads to use for processing packets (optional) (default: 5, maximum number of logical CPUs) + The numbers of threads to use for processing packets (optional) (default: realtime uses 12, capped at the number of logical CPUs; pcap uses 5; maximum number of logical CPUs) -o, --output Output method (required if no config file is provided) diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index dcb15304..1fb7a16b 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -264,3 +264,82 @@ This file keeps short-lived design choices and execution notes that would make - distribution across the four queues is not perfectly even, so there is still room to tune the fanout function or escalate to the Option 2 transport rewrite later if needed +- Follow-up adversarial 10 Gbit/s work on the same slim-container IPv4 path + now shows a practical zero-drop operating point for larger packets: + - `8 x 1.25G`, `1400`-byte UDP, `--threads 4`: about `9.99 Gbit/s`, + `275442` dropped packets + - `8 x 1.25G`, `1400`-byte UDP, `--threads 8`: about `9.99 Gbit/s`, + `178257` dropped packets + - `8 x 1.25G`, `1400`-byte UDP, `--threads 10`: about `9.99 Gbit/s`, + `229803` dropped packets + - `8 x 1.25G`, `1400`-byte UDP, `--threads 11`: repeated `9.99 Gbit/s`, + `0` dropped packets + - `8 x 1.25G`, `1400`-byte UDP, `--threads 12`: `9.99 Gbit/s`, + `0` dropped packets + - `8 x 1.25G`, `1400`-byte UDP, `--threads 16`: `9.98 Gbit/s`, + `0` dropped packets +- More adversarial 10 Gbit/s shapes with `--threads 11`: + - `16 x 625M`, `1400`-byte UDP: `10.0 Gbit/s`, `0` dropped packets + - `8 x 1.25G`, `1024`-byte UDP: `9.90 Gbit/s`, `0` dropped packets + - `8 x 1.25G`, `512`-byte UDP: only about `5.77 Gbit/s` achieved and + `1171091` dropped packets + - the same `512`-byte case with `--threads 16` still only reached about + `5.86 Gbit/s`, though dropped packets fell to `489178` +- Current interpretation: + - after Option 1, the IPv4 realtime path can now sustain about `10 Gbit/s` + without internal drops for larger-packet UDP workloads when given at least + `11` worker threads in this local harness + - the next remaining pressure point is packet-per-second intensity rather + than only aggregate bitrate; the `512`-byte case is still a clear failure + mode even with more threads +- Additional hot-path knob ranking on the improved IPv4 ingress path: + - dominant runtime knobs: + - worker thread count + - feature/export cost (`basic` vs `rustiflow`) + - early export cadence + - secondary userspace knobs: + - shard batch size + - shard queue capacity + - already-proven eBPF / transport knobs from earlier work: + - ring-buffer count / parallel ingress queues + - ring-buffer byte size +- Measured on the `8 x 1.25G`, `1400`-byte UDP, `15s` ingress case: + - `rustiflow`, `--threads 10`, `--early-export 5`, current defaults: + about `9.98 Gbit/s`, `3190699` dropped packets + - same traffic, `--early-export 0`: + about `9.99 Gbit/s`, `0` dropped packets + - same traffic, `basic`, `--threads 11`, `--early-export 5`: + about `9.99 Gbit/s`, `0` dropped packets + - same traffic, `rustiflow`, `--threads 11`, `--early-export 5`: + still vulnerable to multi-million drops on longer runs +- Runtime batch / queue-depth experiments on the same stressed + `rustiflow`, `--threads 10`, `--early-export 5`, `15s` case: + - default batch `128`, queue capacity `512`: `3190699` drops + - batch `32`: `2821050` drops + - batch `256`: `2723922` drops + - queue capacity `2048`: `2670573` drops + - batch `256` plus queue capacity `2048`: `1737472` drops +- Current interpretation of the knob sweep: + - worker count and early-export behavior dominate the outcome much more than + local batching tweaks + - larger batches and deeper shard queues help, but they do not rescue a + configuration that is already overloaded by richer flow work plus frequent + early export + - `--threads 5` is not a sensible realtime high-throughput default for the + current architecture; it is too low relative to the measured `10 Gbit/s` + operating point + - the realtime default thread policy is now `12`, still capped at the + number of logical CPUs, while offline pcap keeps the historical default of + `5` + - validation of the new realtime default on `rustiflow:test-slim` with no + explicit `--threads`, `8 x 1.25G`, `1400`-byte UDP, `15s`, ingress: + `9.99 Gbit/s`, `0` dropped packets + - using all logical CPUs by default was unnecessary for the current `10G` + target in this harness; a bounded default keeps the out-of-box realtime + behavior aligned with the proven zero-drop operating point without jumping + straight to the machine maximum + - `early_export = None` remains the sane throughput default; short periodic + export intervals should be treated as an observability tradeoff, not as a + neutral setting + - shard batch size and queue capacity are worth keeping tunable as advanced + knobs, but they are second-order compared with threads and export cadence diff --git a/rustiflow/src/args.rs b/rustiflow/src/args.rs index 074c0bc2..b8e6a373 100644 --- a/rustiflow/src/args.rs +++ b/rustiflow/src/args.rs @@ -33,7 +33,7 @@ pub struct Cli { pub expiration_check_interval: u64, /// The numbers of threads to use for processing packets (optional) - /// (default: 5, maximum: number of logical CPUs) + /// (default: realtime uses 12, capped at the number of logical CPUs; pcap uses 5; maximum: number of logical CPUs) #[clap(long, group = "cli_group")] pub threads: Option, @@ -120,7 +120,7 @@ pub struct ExportConfig { pub expiration_check_interval: u64, /// The numbers of threads to use for processing packets (optional) - /// (default: 5, maximum: number of logical CPUs) + /// (default: realtime uses 12, capped at the number of logical CPUs; pcap uses 5; maximum: number of logical CPUs) #[clap(short, long)] pub threads: Option, } diff --git a/rustiflow/src/main.rs b/rustiflow/src/main.rs index dbd28108..954cb29f 100644 --- a/rustiflow/src/main.rs +++ b/rustiflow/src/main.rs @@ -34,6 +34,9 @@ use std::time::Instant; use tokio::sync::mpsc; use tui::{launch_tui, Config}; +const DEFAULT_OFFLINE_THREADS: u8 = 5; +const DEFAULT_REALTIME_THREADS: u8 = 12; + #[tokio::main] async fn main() { env_logger::init(); @@ -139,7 +142,7 @@ async fn run_with_config(config: Config) { let result = handle_realtime::<$flow_ty>( &interface, sender, - std::cmp::min(config.config.threads.unwrap_or(5), num_cpus::get() as u8), + resolve_realtime_threads(config.config.threads), config.config.active_timeout, config.config.idle_timeout, config.config.early_export, @@ -220,7 +223,7 @@ async fn run_with_config(config: Config) { if let Err(err) = read_pcap_file::<$flow_ty>( &path, sender, - std::cmp::min(config.config.threads.unwrap_or(5), num_cpus::get() as u8), + resolve_offline_threads(config.config.threads), config.config.active_timeout, config.config.idle_timeout, config.config.early_export, @@ -255,3 +258,19 @@ async fn run_with_config(config: Config) { } } } + +fn resolve_realtime_threads(config_threads: Option) -> u8 { + let logical_cpus = num_cpus::get() as u8; + std::cmp::min( + config_threads.unwrap_or(DEFAULT_REALTIME_THREADS), + logical_cpus, + ) +} + +fn resolve_offline_threads(config_threads: Option) -> u8 { + let logical_cpus = num_cpus::get() as u8; + std::cmp::min( + config_threads.unwrap_or(DEFAULT_OFFLINE_THREADS), + logical_cpus, + ) +} diff --git a/rustiflow/src/realtime.rs b/rustiflow/src/realtime.rs index f7eae575..8e08f483 100644 --- a/rustiflow/src/realtime.rs +++ b/rustiflow/src/realtime.rs @@ -97,8 +97,8 @@ fn log_source_stats(label: &str, stats: &RealtimeSourceStats) { ); } -const SHARD_BATCH_SIZE: usize = 128; -const SHARD_QUEUE_CAPACITY: usize = 512; +const DEFAULT_SHARD_BATCH_SIZE: usize = 128; +const DEFAULT_SHARD_QUEUE_CAPACITY: usize = 512; /// Starts the realtime processing of packets on the given interface. /// The function will return the number of packets dropped by the eBPF program. @@ -117,6 +117,15 @@ pub async fn handle_realtime( where T: Flow, { + let shard_batch_size = read_env_usize( + "RUSTIFLOW_REALTIME_SHARD_BATCH_SIZE", + DEFAULT_SHARD_BATCH_SIZE, + ); + let shard_queue_capacity = read_env_usize( + "RUSTIFLOW_REALTIME_SHARD_QUEUE_CAPACITY", + DEFAULT_SHARD_QUEUE_CAPACITY, + ); + // Needed for older kernels bump_memlock_rlimit(); @@ -183,7 +192,7 @@ where debug!("Creating {} sharded FlowTables...", num_threads); for _ in 0..num_threads { - let (tx, mut rx) = mpsc::channel::>(SHARD_QUEUE_CAPACITY); + let (tx, mut rx) = mpsc::channel::>(shard_queue_capacity); let mut flow_table = FlowTable::new( active_timeout, idle_timeout, @@ -224,7 +233,8 @@ where handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio let mut async_ring_buf = AsyncFd::new(ebpf_event_source).unwrap(); - let mut pending_batches = create_pending_batches(num_threads as usize); + let mut pending_batches = + create_pending_batches(num_threads as usize, shard_batch_size); loop { // Wait for data to be available in the ring buffer @@ -254,7 +264,7 @@ where } pending_batches[shard_index].push(packet_features); - if pending_batches[shard_index].len() >= SHARD_BATCH_SIZE { + if pending_batches[shard_index].len() >= shard_batch_size { flush_shard_batch( &shard_senders_clone[shard_index], &mut pending_batches[shard_index], @@ -290,7 +300,8 @@ where handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio let mut async_ring_buf = AsyncFd::new(ebpf_event_source).unwrap(); - let mut pending_batches = create_pending_batches(num_threads as usize); + let mut pending_batches = + create_pending_batches(num_threads as usize, shard_batch_size); loop { // Wait for data to be available in the ring buffer @@ -320,7 +331,7 @@ where } pending_batches[shard_index].push(packet_features); - if pending_batches[shard_index].len() >= SHARD_BATCH_SIZE { + if pending_batches[shard_index].len() >= shard_batch_size { flush_shard_batch( &shard_senders_clone[shard_index], &mut pending_batches[shard_index], @@ -402,8 +413,8 @@ where Ok(total_dropped) } -fn create_pending_batches(num_shards: usize) -> Vec> { - std::iter::repeat_with(|| Vec::with_capacity(SHARD_BATCH_SIZE)) +fn create_pending_batches(num_shards: usize, shard_batch_size: usize) -> Vec> { + std::iter::repeat_with(|| Vec::with_capacity(shard_batch_size)) .take(num_shards) .collect() } @@ -434,7 +445,8 @@ async fn flush_shard_batch( return; } - let batch = std::mem::replace(pending_batch, Vec::with_capacity(SHARD_BATCH_SIZE)); + let next_capacity = pending_batch.capacity().max(1); + let batch = std::mem::replace(pending_batch, Vec::with_capacity(next_capacity)); let send_start = stats.as_ref().map(|_| Instant::now()); if let Err(e) = shard_sender.send(batch).await { if let Some(stats) = stats { @@ -450,6 +462,14 @@ async fn flush_shard_batch( } } +fn read_env_usize(var_name: &str, default_value: usize) -> usize { + std::env::var(var_name) + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(default_value) +} + #[derive(Clone)] struct PacketGraphState { packet_counter: Arc>, diff --git a/scripts/realtime_container_stress.sh b/scripts/realtime_container_stress.sh index 611f5846..12069ae2 100755 --- a/scripts/realtime_container_stress.sh +++ b/scripts/realtime_container_stress.sh @@ -22,6 +22,10 @@ Options: --length BYTES iperf3 UDP payload length (default: 1400) --parallel N iperf3 parallel streams (default: 1) --duration SEC iperf3 run duration in seconds (default: 15) + --early-export SEC RustiFlow early-export interval in seconds + (default: 5, use 0 to disable) + --env KEY=VALUE Extra environment variable for the RustiFlow container + (repeatable) --export-path PATH CSV export path inside the host filesystem (default: target/realtime-stress/rustiflow-stress.csv) -h, --help Show this help text @@ -40,9 +44,11 @@ bitrate="2.5G" length=1400 parallel=1 duration=15 +early_export=5 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" repo_root="$(cd "$script_dir/.." && pwd)" export_path="$repo_root/target/realtime-stress/rustiflow-stress.csv" +container_env=() while [[ $# -gt 0 ]]; do case "$1" in @@ -82,6 +88,14 @@ while [[ $# -gt 0 ]]; do duration="$2" shift 2 ;; + --early-export) + early_export="$2" + shift 2 + ;; + --env) + container_env+=(-e "$2") + shift 2 + ;; --export-path) export_path="$2" shift 2 @@ -128,20 +142,29 @@ rm -f "$export_path" mkdir -p "$export_dir" docker rm -f "$container_name" >/dev/null 2>&1 || true +rustiflow_args=( + -f "$features" + -o csv + --header + --export-path "/tmp/$(basename "$export_path")" + --performance-mode + --threads "$threads" +) + +if [[ "$early_export" != "0" ]]; then + rustiflow_args+=(--early-export "$early_export") +fi + +rustiflow_args+=(realtime "$interface" --ingress-only) + docker run -d \ --name "$container_name" \ --privileged \ --network host \ -v "$export_dir:/tmp" \ + "${container_env[@]}" \ "$image" \ - -f "$features" \ - -o csv \ - --header \ - --export-path "/tmp/$(basename "$export_path")" \ - --performance-mode \ - --threads "$threads" \ - --early-export 5 \ - realtime "$interface" --ingress-only >/dev/null + "${rustiflow_args[@]}" >/dev/null sleep 2 @@ -178,6 +201,7 @@ printf 'bitrate_target: %s\n' "$bitrate" printf 'parallel_streams: %s\n' "$parallel" printf 'udp_length: %s\n' "$length" printf 'duration_s: %s\n' "$duration" +printf 'early_export_s: %s\n' "$early_export" printf 'receiver_bitrate: %s\n' "${receiver_bitrate:-missing}" printf 'dropped_packets: %s\n' "${dropped_packets:-missing}" printf 'export_path: %s\n' "$export_path" From fea8284b2a28c0a3ccc138d8f0635d703fffd9be Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:53:56 +0200 Subject: [PATCH 03/23] Fix realtime tc attach and validate IPv6 ingress --- AGENTS.md | 8 +- docs/engineering-notes.md | 29 ++++++ ebpf-ipv4/src/main.rs | 16 ++- ebpf-ipv6/src/main.rs | 118 ++++++++++++++++++++-- rustiflow/src/realtime.rs | 207 ++++++++++++++++++++++++++++++++------ 5 files changed, 335 insertions(+), 43 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e1c5d873..43ab40c4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,9 @@ This repository is a Rust workspace for a network flow extractor. The main crate - Linux is the source of truth for build, runtime, and performance validation. - Do not assume that successful non-Linux builds imply realtime correctness. - When touching `aya`/eBPF/realtime code, prefer validating on Linux. +- On the local Arch `rustiflow-t0` veth harness, legacy netlink tc attach is + currently more reliable than `aya`'s automatic TCX attach path for realtime + validation. ## Local Test Network @@ -21,7 +24,8 @@ This repository is a Rust workspace for a network flow extractor. The main crate - host namespace capture side: `rustiflow-t0` - peer namespace side: `rustiflow-p0` - peer namespace: `rustiflow-peer` - - addressing: `10.203.0.1/30` on `rustiflow-t0`, `10.203.0.2/30` on `rustiflow-p0` + - IPv4 addressing: `10.203.0.1/30` on `rustiflow-t0`, `10.203.0.2/30` on `rustiflow-p0` + - IPv6 addressing: `fd42:203::1/64` on `rustiflow-t0`, `fd42:203::2/64` on `rustiflow-p0` - This setup is intended to stress the RustiFlow software path without depending on the physical LAN. - Treat it as a high-throughput local test harness, not as a substitute for true physical wire-rate validation. @@ -136,7 +140,7 @@ in `docs/engineering-notes.md`. - [x] Treat the redesign as successful only when the verification data improves: fewer dropped packets on the single-flow `2.5G` case and materially better behavior on the `-P 8` multi-flow ingress case. -- [ ] Decide whether the current multi-queue ring-buffer design should also be +- [x] Decide whether the current multi-queue ring-buffer design should also be extended to IPv6, or whether the next step should be the more invasive transport rewrite captured as Option 2 in `docs/engineering-notes.md`. diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 1fb7a16b..c3e8b644 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -278,6 +278,35 @@ This file keeps short-lived design choices and execution notes that would make `0` dropped packets - `8 x 1.25G`, `1400`-byte UDP, `--threads 16`: `9.98 Gbit/s`, `0` dropped packets +- IPv6 now mirrors the bounded Option 1 ingress design as well: + - the IPv6 eBPF program emits into four fixed ring buffers using the same + canonical biflow-style queue selection pattern as IPv4 + - userspace drains those four IPv6 queues in parallel before handing work to + the existing shard workers +- Realtime attach debugging on the local Arch `rustiflow-t0` harness found a + separate issue from the queue design: + - `aya`'s automatic `SchedClassifier::attach()` path reported success but, on + this kernel and veth setup, the TCX-attached programs never executed + - the added per-CPU eBPF counters (`matched_packets`, `submitted_events`, + `dropped_packets`) made this visible immediately because both IPv4 and IPv6 + stayed at `0` despite successful traffic on the harness + - forcing legacy netlink tc attach from userspace restored execution on the + local harness, after which the IPv6 counters tracked the live traffic as + expected +- IPv6 validation after forcing legacy netlink tc attach on the same slim + container workflow: + - `8 x 1.25G`, `1400`-byte UDP, `10s`, `--threads 4`, reverse IPv6 traffic: + `9.99 Gbit/s`, `0` RustiFlow drops + - `8 x 1.25G`, `1400`-byte UDP, `10s`, `--threads 12`, reverse IPv6 traffic: + `9.99 Gbit/s`, `0` RustiFlow drops + - `8 x 1.25G`, `512`-byte UDP, `10s`, `--threads 12`, reverse IPv6 traffic: + about `5.83 Gbit/s`, `0` RustiFlow drops +- Current interpretation of the IPv6 result: + - the bounded multi-queue ingress design now holds for both IPv4 and IPv6 on + the local software-path harness + - the local `512`-byte IPv6 case is not presently exposing a RustiFlow drop + point; the traffic generator or receive path gives out first while + RustiFlow still reports `0` drops - More adversarial 10 Gbit/s shapes with `--threads 11`: - `16 x 625M`, `1400`-byte UDP: `10.0 Gbit/s`, `0` dropped packets - `8 x 1.25G`, `1024`-byte UDP: `9.90 Gbit/s`, `0` dropped packets diff --git a/ebpf-ipv4/src/main.rs b/ebpf-ipv4/src/main.rs index cee9471e..208709cb 100644 --- a/ebpf-ipv4/src/main.rs +++ b/ebpf-ipv4/src/main.rs @@ -30,6 +30,12 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { #[map] static DROPPED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); +#[map] +static MATCHED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); + +#[map] +static SUBMITTED_EVENTS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); + #[map] static EVENTS_IPV4_0: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); @@ -89,15 +95,23 @@ fn reserve_ipv4_event(queue: &RingBuf, event: EbpfEventIpv4) -> bool { if let Some(mut entry) = queue.reserve::(0) { *entry = core::mem::MaybeUninit::new(event); entry.submit(0); + increment_counter(&MATCHED_PACKETS); + increment_counter(&SUBMITTED_EVENTS); true } else { + increment_counter(&MATCHED_PACKETS); false } } #[inline(always)] fn increment_dropped_packets() { - if let Some(counter) = DROPPED_PACKETS.get_ptr_mut(0) { + increment_counter(&DROPPED_PACKETS); +} + +#[inline(always)] +fn increment_counter(counter_array: &PerCpuArray) { + if let Some(counter) = counter_array.get_ptr_mut(0) { unsafe { *counter += 1 }; } } diff --git a/ebpf-ipv6/src/main.rs b/ebpf-ipv6/src/main.rs index 4620d8f6..361c0b76 100644 --- a/ebpf-ipv6/src/main.rs +++ b/ebpf-ipv6/src/main.rs @@ -11,7 +11,9 @@ use aya_ebpf::{ }; use aya_log_ebpf::debug; -use common::{EbpfEventIpv6, IcmpHdr, NetworkHeader, TcpHdr, UdpHdr}; +use common::{ + EbpfEventIpv6, IcmpHdr, NetworkHeader, TcpHdr, UdpHdr, REALTIME_EVENT_RINGBUF_BYTES, +}; use network_types::{ eth::{EthHdr, EtherType}, ip::{IpProto, Ipv6Hdr}, @@ -26,7 +28,22 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { static DROPPED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); #[map] -static EVENTS_IPV6: RingBuf = RingBuf::with_byte_size(1024 * 1024 * 64, 0); // 64 MB +static MATCHED_PACKETS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); + +#[map] +static SUBMITTED_EVENTS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); + +#[map] +static EVENTS_IPV6_0: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_1: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_2: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_3: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); #[classifier] pub fn tc_flow_track(ctx: TcContext) -> i32 { @@ -114,18 +131,100 @@ fn process_packet(ctx: &TcContext) -> Result { } #[inline(always)] -fn submit_ipv6_event(ctx: &TcContext, event: EbpfEventIpv6) { - if let Some(mut entry) = EVENTS_IPV6.reserve::(0) { +fn submit_ipv6_event(ctx: &TcContext, event: EbpfEventIpv6, queue_index: u32) { + let reserved = match queue_index { + 0 => reserve_ipv6_event(&EVENTS_IPV6_0, event), + 1 => reserve_ipv6_event(&EVENTS_IPV6_1, event), + 2 => reserve_ipv6_event(&EVENTS_IPV6_2, event), + _ => reserve_ipv6_event(&EVENTS_IPV6_3, event), + }; + + if !reserved { + increment_dropped_packets(); + debug!(ctx, "Failed to reserve entry in ring buffer."); + } +} + +#[inline(always)] +fn reserve_ipv6_event(queue: &RingBuf, event: EbpfEventIpv6) -> bool { + if let Some(mut entry) = queue.reserve::(0) { *entry = core::mem::MaybeUninit::new(event); entry.submit(0); + increment_counter(&MATCHED_PACKETS); + increment_counter(&SUBMITTED_EVENTS); + true } else { - if let Some(counter) = DROPPED_PACKETS.get_ptr_mut(0) { - unsafe { *counter += 1 }; - } - debug!(ctx, "Failed to reserve entry in ring buffer."); + increment_counter(&MATCHED_PACKETS); + false } } +#[inline(always)] +fn increment_dropped_packets() { + increment_counter(&DROPPED_PACKETS); +} + +#[inline(always)] +fn increment_counter(counter_array: &PerCpuArray) { + if let Some(counter) = counter_array.get_ptr_mut(0) { + unsafe { *counter += 1 }; + } +} + +#[inline(always)] +fn queue_index_ipv6(packet_info: &PacketInfo, header: &impl NetworkHeader) -> u32 { + let (first_ip, first_port, second_ip, second_port) = canonical_ipv6_endpoints( + packet_info.ipv6_source, + header.source_port(), + packet_info.ipv6_destination, + header.destination_port(), + ); + let hash = mix_u128(first_ip) + ^ mix_u128(second_ip).rotate_left(11) + ^ mix_u16(first_port).rotate_left(17) + ^ mix_u16(second_port).rotate_left(23) + ^ u32::from(packet_info.protocol).rotate_left(29); + hash & 0b11 +} + +#[inline(always)] +fn canonical_ipv6_endpoints( + source_ip: u128, + source_port: u16, + destination_ip: u128, + destination_port: u16, +) -> (u128, u16, u128, u16) { + if source_ip < destination_ip + || (source_ip == destination_ip && source_port <= destination_port) + { + (source_ip, source_port, destination_ip, destination_port) + } else { + (destination_ip, destination_port, source_ip, source_port) + } +} + +#[inline(always)] +fn mix_u128(value: u128) -> u32 { + let lower = value as u64; + let upper = (value >> 64) as u64; + mix_u64(lower) ^ mix_u64(upper).rotate_left(13) +} + +#[inline(always)] +fn mix_u64(mut value: u64) -> u32 { + value ^= value >> 30; + value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9); + value ^= value >> 27; + value = value.wrapping_mul(0x94d0_49bb_1331_11eb); + let mixed = value ^ (value >> 31); + mixed as u32 ^ (mixed >> 32) as u32 +} + +#[inline(always)] +fn mix_u16(value: u16) -> u32 { + mix_u64(u64::from(value)) +} + fn process_transport_packet( ctx: &TcContext, packet_info: &PacketInfo, @@ -133,8 +232,9 @@ fn process_transport_packet( ) -> Result { let hdr = ctx.load::(transport_offset).map_err(|_| ())?; let packet_log = packet_info.to_packet_log(&hdr); + let queue_index = queue_index_ipv6(packet_info, &hdr); - submit_ipv6_event(ctx, packet_log); + submit_ipv6_event(ctx, packet_log, queue_index); Ok(TC_ACT_PIPE) } diff --git a/rustiflow/src/realtime.rs b/rustiflow/src/realtime.rs index 8e08f483..283e6419 100644 --- a/rustiflow/src/realtime.rs +++ b/rustiflow/src/realtime.rs @@ -15,7 +15,11 @@ use crate::{flow_table::FlowTable, flows::flow::Flow, packet_features::PacketFea use anyhow::Context; use aya::{ maps::{MapData, PerCpuArray, RingBuf}, - programs::{tc, SchedClassifier, TcAttachType}, + programs::{ + tc, + tc::{NlOptions, TcAttachOptions}, + SchedClassifier, TcAttachType, + }, Ebpf, }; use aya_log::EbpfLogger; @@ -40,6 +44,13 @@ struct RealtimeSourceStats { send_errors: AtomicU64, } +struct RealtimeEbpfCounters { + label: &'static str, + dropped_packets: PerCpuArray, + matched_packets: PerCpuArray, + submitted_events: PerCpuArray, +} + impl RealtimeSourceStats { fn add_decode_and_shard_ns(&self, value: u64) { self.decode_and_shard_ns.fetch_add(value, Ordering::Relaxed); @@ -137,12 +148,20 @@ where let events_ingress_ipv4 = take_ring_buf_maps(&mut bpf_ingress_ipv4, "EVENTS_IPV4")?; let dropped_packets_ingress_ipv4: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_ingress_ipv4.take_map("DROPPED_PACKETS").unwrap())?; - let events_ingress_ipv6 = RingBuf::try_from(bpf_ingress_ipv6.take_map("EVENTS_IPV6").unwrap())?; + let matched_packets_ingress_ipv4: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_ingress_ipv4.take_map("MATCHED_PACKETS").unwrap())?; + let submitted_events_ingress_ipv4: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_ingress_ipv4.take_map("SUBMITTED_EVENTS").unwrap())?; + let events_ingress_ipv6 = take_ring_buf_maps(&mut bpf_ingress_ipv6, "EVENTS_IPV6")?; let dropped_packets_ingress_ipv6: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_ingress_ipv6.take_map("DROPPED_PACKETS").unwrap())?; + let matched_packets_ingress_ipv6: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_ingress_ipv6.take_map("MATCHED_PACKETS").unwrap())?; + let submitted_events_ingress_ipv6: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_ingress_ipv6.take_map("SUBMITTED_EVENTS").unwrap())?; let event_sources_v4; let event_sources_v6; - let dropped_packet_counters; + let ebpf_counters; if !ingress_only { let mut bpf_egress_ipv4 = load_ebpf_ipv4(interface, TcAttachType::Egress)?; @@ -150,28 +169,68 @@ where let events_egress_ipv4 = take_ring_buf_maps(&mut bpf_egress_ipv4, "EVENTS_IPV4")?; let dropped_packets_egress_ipv4: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_egress_ipv4.take_map("DROPPED_PACKETS").unwrap())?; - let events_egress_ipv6 = - RingBuf::try_from(bpf_egress_ipv6.take_map("EVENTS_IPV6").unwrap())?; + let matched_packets_egress_ipv4: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_egress_ipv4.take_map("MATCHED_PACKETS").unwrap())?; + let submitted_events_egress_ipv4: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_egress_ipv4.take_map("SUBMITTED_EVENTS").unwrap())?; + let events_egress_ipv6 = take_ring_buf_maps(&mut bpf_egress_ipv6, "EVENTS_IPV6")?; let dropped_packets_egress_ipv6: PerCpuArray<_, u64> = PerCpuArray::try_from(bpf_egress_ipv6.take_map("DROPPED_PACKETS").unwrap())?; + let matched_packets_egress_ipv6: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_egress_ipv6.take_map("MATCHED_PACKETS").unwrap())?; + let submitted_events_egress_ipv6: PerCpuArray<_, u64> = + PerCpuArray::try_from(bpf_egress_ipv6.take_map("SUBMITTED_EVENTS").unwrap())?; event_sources_v4 = labeled_ringbuf_sources("egress-ipv4", events_egress_ipv4) .into_iter() .chain(labeled_ringbuf_sources("ingress-ipv4", events_ingress_ipv4)) .collect(); - event_sources_v6 = vec![ - ("egress-ipv6", events_egress_ipv6), - ("ingress-ipv6", events_ingress_ipv6), - ]; - dropped_packet_counters = vec![ - dropped_packets_egress_ipv4, - dropped_packets_ingress_ipv4, - dropped_packets_egress_ipv6, - dropped_packets_ingress_ipv6, + event_sources_v6 = labeled_ringbuf_sources("egress-ipv6", events_egress_ipv6) + .into_iter() + .chain(labeled_ringbuf_sources("ingress-ipv6", events_ingress_ipv6)) + .collect(); + ebpf_counters = vec![ + RealtimeEbpfCounters { + label: "egress-ipv4", + dropped_packets: dropped_packets_egress_ipv4, + matched_packets: matched_packets_egress_ipv4, + submitted_events: submitted_events_egress_ipv4, + }, + RealtimeEbpfCounters { + label: "ingress-ipv4", + dropped_packets: dropped_packets_ingress_ipv4, + matched_packets: matched_packets_ingress_ipv4, + submitted_events: submitted_events_ingress_ipv4, + }, + RealtimeEbpfCounters { + label: "egress-ipv6", + dropped_packets: dropped_packets_egress_ipv6, + matched_packets: matched_packets_egress_ipv6, + submitted_events: submitted_events_egress_ipv6, + }, + RealtimeEbpfCounters { + label: "ingress-ipv6", + dropped_packets: dropped_packets_ingress_ipv6, + matched_packets: matched_packets_ingress_ipv6, + submitted_events: submitted_events_ingress_ipv6, + }, ]; } else { event_sources_v4 = labeled_ringbuf_sources("ingress-ipv4", events_ingress_ipv4); - event_sources_v6 = vec![("ingress-ipv6", events_ingress_ipv6)]; - dropped_packet_counters = vec![dropped_packets_ingress_ipv4, dropped_packets_ingress_ipv6]; + event_sources_v6 = labeled_ringbuf_sources("ingress-ipv6", events_ingress_ipv6); + ebpf_counters = vec![ + RealtimeEbpfCounters { + label: "ingress-ipv4", + dropped_packets: dropped_packets_ingress_ipv4, + matched_packets: matched_packets_ingress_ipv4, + submitted_events: submitted_events_ingress_ipv4, + }, + RealtimeEbpfCounters { + label: "ingress-ipv6", + dropped_packets: dropped_packets_ingress_ipv6, + matched_packets: matched_packets_ingress_ipv6, + submitted_events: submitted_events_ingress_ipv6, + }, + ]; } let mut shard_senders = Vec::with_capacity(num_threads as usize); @@ -369,17 +428,18 @@ where // Fetch dropped packets counter from eBPF program before terminating info!("Fetching dropped packet counters before exiting..."); let mut total_dropped = 0; - for dropped_packets_array in dropped_packet_counters { - match dropped_packets_array.get(&0, 0) { - Ok(values) => { - for cpu_val in values.iter() { - total_dropped += *cpu_val; - } - } - Err(e) => { - error!("Failed to read dropped packets counter: {:?}", e); - } - } + for counters in &ebpf_counters { + let dropped_packets = + read_per_cpu_counter(&counters.dropped_packets, counters.label, "dropped"); + let matched_packets = + read_per_cpu_counter(&counters.matched_packets, counters.label, "matched"); + let submitted_events = + read_per_cpu_counter(&counters.submitted_events, counters.label, "submitted"); + total_dropped += dropped_packets; + info!( + "eBPF counters {}: matched_packets={}, submitted_events={}, dropped_packets={}", + counters.label, matched_packets, submitted_events, dropped_packets + ); } info!("Total dropped packets before exit: {}", total_dropped); @@ -413,6 +473,23 @@ where Ok(total_dropped) } +fn read_per_cpu_counter( + counter_array: &PerCpuArray, + label: &str, + counter_name: &str, +) -> u64 { + match counter_array.get(&0, 0) { + Ok(values) => values.iter().sum(), + Err(e) => { + error!( + "Failed to read {} counter for {}: {:?}", + counter_name, label, e + ); + 0 + } + } +} + fn create_pending_batches(num_shards: usize, shard_batch_size: usize) -> Vec> { std::iter::repeat_with(|| Vec::with_capacity(shard_batch_size)) .take(num_shards) @@ -520,6 +597,14 @@ fn queue_label(label_prefix: &'static str, index: usize) -> &'static str { ("egress-ipv4", 1) => "egress-ipv4-q1", ("egress-ipv4", 2) => "egress-ipv4-q2", ("egress-ipv4", 3) => "egress-ipv4-q3", + ("ingress-ipv6", 0) => "ingress-ipv6-q0", + ("ingress-ipv6", 1) => "ingress-ipv6-q1", + ("ingress-ipv6", 2) => "ingress-ipv6-q2", + ("ingress-ipv6", 3) => "ingress-ipv6-q3", + ("egress-ipv6", 0) => "egress-ipv6-q0", + ("egress-ipv6", 1) => "egress-ipv6-q1", + ("egress-ipv6", 2) => "egress-ipv6-q2", + ("egress-ipv6", 3) => "egress-ipv6-q3", _ => panic!( "unexpected realtime queue label: {}-{}", label_prefix, index @@ -587,8 +672,23 @@ fn ebpf_binary_path(program_name: &str) -> PathBuf { .join(program_name) } +fn tc_attach_type_label(tc_attach_type: TcAttachType) -> &'static str { + match tc_attach_type { + TcAttachType::Ingress => "ingress", + TcAttachType::Egress => "egress", + TcAttachType::Custom(_) => "custom", + } +} + fn load_ebpf_ipv4(interface: &str, tc_attach_type: TcAttachType) -> Result { let binary_path = ebpf_binary_path("rustiflow-ebpf-ipv4"); + let attach_label = tc_attach_type_label(tc_attach_type); + info!( + "Loading IPv4 eBPF binary {} for {} on {}", + binary_path.display(), + attach_label, + interface + ); let mut bpf_ipv4 = Ebpf::load_file(&binary_path).with_context(|| { format!( "Failed to load eBPF IPv4 binary from {}. Build it first with `cargo xtask ebpf-ipv4`.", @@ -598,26 +698,52 @@ fn load_ebpf_ipv4(interface: &str, tc_attach_type: TcAttachType) -> Result info!("Ensured clsact qdisc on {}", interface), + Err(e) => debug!("qdisc_add_clsact({}): {:?}", interface, e), + } let program_egress_ipv4: &mut SchedClassifier = bpf_ipv4.program_mut("tc_flow_track").unwrap().try_into()?; + info!( + "Loading IPv4 tc classifier for {} on {}", + attach_label, interface + ); program_egress_ipv4.load().map_err(|e| { error!("Failed to load eBPF program: {:?}", e); e })?; + info!( + "Attaching IPv4 tc classifier to {} on {}", + attach_label, interface + ); program_egress_ipv4 - .attach(interface, tc_attach_type) + .attach_with_options( + interface, + tc_attach_type, + TcAttachOptions::Netlink(NlOptions::default()), + ) .map_err(|e| { error!("Failed to attach eBPF program: {:?}", e); e })?; + info!( + "Attached IPv4 tc classifier to {} on {}", + attach_label, interface + ); Ok(bpf_ipv4) } fn load_ebpf_ipv6(interface: &str, tc_attach_type: TcAttachType) -> Result { let binary_path = ebpf_binary_path("rustiflow-ebpf-ipv6"); + let attach_label = tc_attach_type_label(tc_attach_type); + info!( + "Loading IPv6 eBPF binary {} for {} on {}", + binary_path.display(), + attach_label, + interface + ); let mut bpf_ipv6 = Ebpf::load_file(&binary_path).with_context(|| { format!( "Failed to load eBPF IPv6 binary from {}. Build it first with `cargo xtask ebpf-ipv6`.", @@ -627,12 +753,31 @@ fn load_ebpf_ipv6(interface: &str, tc_attach_type: TcAttachType) -> Result info!("Ensured clsact qdisc on {}", interface), + Err(e) => debug!("qdisc_add_clsact({}): {:?}", interface, e), + } let program_egress_ipv6: &mut SchedClassifier = bpf_ipv6.program_mut("tc_flow_track").unwrap().try_into()?; + info!( + "Loading IPv6 tc classifier for {} on {}", + attach_label, interface + ); program_egress_ipv6.load()?; - program_egress_ipv6.attach(interface, tc_attach_type)?; + info!( + "Attaching IPv6 tc classifier to {} on {}", + attach_label, interface + ); + program_egress_ipv6.attach_with_options( + interface, + tc_attach_type, + TcAttachOptions::Netlink(NlOptions::default()), + )?; + info!( + "Attached IPv6 tc classifier to {} on {}", + attach_label, interface + ); Ok(bpf_ipv6) } From 13e833319b6bf6df8ce3083f5ccf17bc61ded1c7 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:25:55 +0200 Subject: [PATCH 04/23] Add offline and realtime parity tests --- AGENTS.md | 24 +-- docs/engineering-notes.md | 16 ++ rustiflow/src/tests/flows/flow_table_test.rs | 184 +++++++++++++++++- .../src/tests/flows/packet_features_test.rs | 122 +++++++++++- 4 files changed, 322 insertions(+), 24 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 43ab40c4..0304abd9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,28 +121,8 @@ in `docs/engineering-notes.md`. ### Current Focus -- [x] Keep the `rustiflow-t0` / `rustiflow-peer` container harness green as the - realtime throughput baseline: - `docker run --privileged --network host ... realtime rustiflow-t0 --ingress-only` - plus `iperf3 -c 10.203.0.2 -B 10.203.0.1 -u -b 2.5G -l 1400 -R`. -- [x] Prove where the current realtime bottleneck lives before redesigning it: - ring-buffer capacity, single-source drain task, shard channel backpressure, - or flow-table work. -- [x] Restructure realtime ingestion so more than one userspace task can drain - packet events in parallel instead of funnelling all ingress traffic through - one hot path in `rustiflow/src/realtime.rs`. -- [ ] Preserve semantic parity with offline mode while changing ingestion - structure: timestamps, packet lengths, biflow direction, expiration, and - export contents must remain aligned. -- [x] Add a repeatable throughput comparison after each structural change: - same `iperf3` command, same interface, same export mode, and explicit - `Total dropped packets before exit` capture. -- [x] Treat the redesign as successful only when the verification data improves: - fewer dropped packets on the single-flow `2.5G` case and materially better - behavior on the `-P 8` multi-flow ingress case. -- [x] Decide whether the current multi-queue ring-buffer design should also be - extended to IPv6, or whether the next step should be the more invasive - transport rewrite captured as Option 2 in `docs/engineering-notes.md`. +None right now. Keep this section empty when the current phase is complete and +move completed work into `docs/engineering-notes.md`. Primary files: diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index c3e8b644..f9a37613 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -372,3 +372,19 @@ This file keeps short-lived design choices and execution notes that would make neutral setting - shard batch size and queue capacity are worth keeping tunable as advanced knobs, but they are second-order compared with threads and export cadence +- Semantic parity between offline and realtime ingestion now has explicit test + coverage for the changed structure: + - constructor-level parity tests compare `PacketFeatures` built from parsed + IPv4/IPv6 packets against `PacketFeatures` built from equivalent eBPF + events, including timestamps, packet lengths, flags, sequence numbers, and + biflow-defining endpoint fields + - flow-level parity tests feed equivalent offline and realtime packet + sequences into `FlowTable` and verify matching bidirectional exports and + matching idle-timeout expiration behavior + - this keeps the ingestion redesign grounded in the invariant that, once a + packet is normalized into `PacketFeatures`, flow ownership, expiration, and + exporter output stay aligned across both ingestion modes +- Narrow validation for that semantic-parity work: + - `cargo test -p rustiflow packet_features_test -- --nocapture` + - `cargo test -p rustiflow flow_table_test -- --nocapture` + - `cargo check -p rustiflow` diff --git a/rustiflow/src/tests/flows/flow_table_test.rs b/rustiflow/src/tests/flows/flow_table_test.rs index 9aff7908..6bfba700 100644 --- a/rustiflow/src/tests/flows/flow_table_test.rs +++ b/rustiflow/src/tests/flows/flow_table_test.rs @@ -2,11 +2,16 @@ mod tests { use std::net::{IpAddr, Ipv4Addr}; + #[cfg(target_os = "linux")] + use common::EbpfEventIpv4; use tokio::sync::mpsc; use crate::{ flow_table::FlowTable, - flows::{basic_flow::BasicFlow, cidds_flow::CiddsFlow, util::FlowExpireCause}, + flows::{ + basic_flow::BasicFlow, cidds_flow::CiddsFlow, flow::Flow, rusti_flow::RustiFlow, + util::FlowExpireCause, + }, packet_features::PacketFeatures, }; @@ -22,6 +27,39 @@ mod tests { } } + #[cfg(target_os = "linux")] + fn build_realtime_packet( + source_ip: Ipv4Addr, + source_port: u16, + destination_ip: Ipv4Addr, + destination_port: u16, + timestamp_us: i64, + flags: u8, + sequence_number: u32, + sequence_number_ack: u32, + data_length: u16, + ) -> PacketFeatures { + let realtime_offset_us = 1_000_000; + let event = EbpfEventIpv4::new( + (timestamp_us - realtime_offset_us) as u64 * 1_000, + u32::from(destination_ip).to_be(), + u32::from(source_ip).to_be(), + destination_port, + source_port, + data_length, + 40 + data_length, + 4096, + flags, + 6, + 20, + sequence_number, + sequence_number_ack, + 0, + 0, + ); + PacketFeatures::from_ebpf_event_ipv4(&event, realtime_offset_us) + } + #[tokio::test] async fn exports_idle_timed_out_flow_with_idle_timeout_cause() { let (tx, mut rx) = mpsc::channel::(4); @@ -164,4 +202,148 @@ mod tests { assert_eq!(final_export.last_timestamp_us, 3_000_001); assert!(rx.try_recv().is_err()); } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn offline_and_realtime_bidirectional_exports_match() { + let (offline_tx, mut offline_rx) = mpsc::channel::(4); + let (realtime_tx, mut realtime_rx) = mpsc::channel::(4); + let mut offline_table = FlowTable::new(3600, 120, None, offline_tx, 60); + let mut realtime_table = FlowTable::new(3600, 120, None, realtime_tx, 60); + + let client_ip = Ipv4Addr::new(192, 168, 1, 1); + let server_ip = Ipv4Addr::new(192, 168, 1, 2); + + let mut offline_syn = build_packet(1_000_000); + offline_syn.syn_flag = 1; + offline_syn.flags = 0x02; + offline_syn.length = 40; + offline_syn.header_length = 20; + offline_syn.window_size = 4096; + offline_syn.sequence_number = 100; + + let offline_syn_ack = PacketFeatures { + source_ip: IpAddr::V4(server_ip), + destination_ip: IpAddr::V4(client_ip), + source_port: 443, + destination_port: 12345, + protocol: 6, + timestamp_us: 1_000_100, + syn_flag: 1, + ack_flag: 1, + flags: 0x12, + header_length: 20, + length: 40, + window_size: 4096, + sequence_number: 200, + sequence_number_ack: 101, + ..Default::default() + }; + + let mut offline_ack = build_packet(1_000_200); + offline_ack.ack_flag = 1; + offline_ack.flags = 0x10; + offline_ack.length = 40; + offline_ack.header_length = 20; + offline_ack.window_size = 4096; + offline_ack.sequence_number = 101; + offline_ack.sequence_number_ack = 201; + + let mut offline_payload = build_packet(1_000_300); + offline_payload.ack_flag = 1; + offline_payload.psh_flag = 1; + offline_payload.flags = 0x18; + offline_payload.header_length = 20; + offline_payload.data_length = 64; + offline_payload.length = 104; + offline_payload.window_size = 4096; + offline_payload.sequence_number = 101; + offline_payload.sequence_number_ack = 201; + + let realtime_syn = + build_realtime_packet(client_ip, 12345, server_ip, 443, 1_000_000, 0x02, 100, 0, 0); + let realtime_syn_ack = build_realtime_packet( + server_ip, 443, client_ip, 12345, 1_000_100, 0x12, 200, 101, 0, + ); + let realtime_ack = build_realtime_packet( + client_ip, 12345, server_ip, 443, 1_000_200, 0x10, 101, 201, 0, + ); + let realtime_payload = build_realtime_packet( + client_ip, 12345, server_ip, 443, 1_000_300, 0x18, 101, 201, 64, + ); + + for packet in [ + &offline_syn, + &offline_syn_ack, + &offline_ack, + &offline_payload, + ] { + offline_table.process_packet(packet).await; + } + for packet in [ + &realtime_syn, + &realtime_syn_ack, + &realtime_ack, + &realtime_payload, + ] { + realtime_table.process_packet(packet).await; + } + + offline_table.export_all_flows(2_000_000).await; + realtime_table.export_all_flows(2_000_000).await; + + let offline_export = offline_rx.recv().await.expect("expected offline export"); + let realtime_export = realtime_rx.recv().await.expect("expected realtime export"); + + assert_eq!(offline_export.dump(), realtime_export.dump()); + assert_eq!( + offline_export.dump_without_contamination(), + realtime_export.dump_without_contamination() + ); + assert!(offline_rx.try_recv().is_err()); + assert!(realtime_rx.try_recv().is_err()); + } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn offline_and_realtime_idle_expiration_match() { + let (offline_tx, mut offline_rx) = mpsc::channel::(4); + let (realtime_tx, mut realtime_rx) = mpsc::channel::(4); + let mut offline_table = FlowTable::new(3600, 1, None, offline_tx, 60); + let mut realtime_table = FlowTable::new(3600, 1, None, realtime_tx, 60); + + let offline_packet = build_packet(1_000_000); + let realtime_packet = build_realtime_packet( + Ipv4Addr::new(192, 168, 1, 1), + 12345, + Ipv4Addr::new(192, 168, 1, 2), + 443, + 1_000_000, + 0, + 0, + 0, + 0, + ); + + offline_table.process_packet(&offline_packet).await; + realtime_table.process_packet(&realtime_packet).await; + + offline_table.export_expired_flows(3_000_000).await; + realtime_table.export_expired_flows(3_000_000).await; + + let offline_export = offline_rx.recv().await.expect("expected offline export"); + let realtime_export = realtime_rx.recv().await.expect("expected realtime export"); + + assert_eq!(offline_export.dump(), realtime_export.dump()); + assert_eq!( + offline_export.flow_expire_cause, + FlowExpireCause::IdleTimeout + ); + assert_eq!( + realtime_export.flow_expire_cause, + FlowExpireCause::IdleTimeout + ); + assert!(offline_rx.try_recv().is_err()); + assert!(realtime_rx.try_recv().is_err()); + } } diff --git a/rustiflow/src/tests/flows/packet_features_test.rs b/rustiflow/src/tests/flows/packet_features_test.rs index ee86c6d0..cb853b36 100644 --- a/rustiflow/src/tests/flows/packet_features_test.rs +++ b/rustiflow/src/tests/flows/packet_features_test.rs @@ -1,7 +1,9 @@ #[cfg(test)] mod tests { + #[cfg(target_os = "linux")] + use common::{EbpfEventIpv4, EbpfEventIpv6}; use pnet::packet::{ip::IpNextHeaderProtocols, ipv4::Ipv4Packet, ipv6::Ipv6Packet}; - use std::net::{IpAddr, Ipv4Addr}; + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use crate::packet_features::PacketFeatures; @@ -72,6 +74,46 @@ mod tests { packet } + #[cfg(target_os = "linux")] + fn assert_packet_features_match( + parsed_packet: &PacketFeatures, + realtime_packet: &PacketFeatures, + expected_timestamp_us: i64, + ) { + assert_eq!(realtime_packet.source_ip, parsed_packet.source_ip); + assert_eq!(realtime_packet.destination_ip, parsed_packet.destination_ip); + assert_eq!(realtime_packet.source_port, parsed_packet.source_port); + assert_eq!( + realtime_packet.destination_port, + parsed_packet.destination_port + ); + assert_eq!(realtime_packet.protocol, parsed_packet.protocol); + assert_eq!(realtime_packet.timestamp_us, expected_timestamp_us); + assert_eq!(realtime_packet.fin_flag, parsed_packet.fin_flag); + assert_eq!(realtime_packet.syn_flag, parsed_packet.syn_flag); + assert_eq!(realtime_packet.rst_flag, parsed_packet.rst_flag); + assert_eq!(realtime_packet.psh_flag, parsed_packet.psh_flag); + assert_eq!(realtime_packet.ack_flag, parsed_packet.ack_flag); + assert_eq!(realtime_packet.urg_flag, parsed_packet.urg_flag); + assert_eq!(realtime_packet.cwr_flag, parsed_packet.cwr_flag); + assert_eq!(realtime_packet.ece_flag, parsed_packet.ece_flag); + assert_eq!(realtime_packet.data_length, parsed_packet.data_length); + assert_eq!(realtime_packet.header_length, parsed_packet.header_length); + assert_eq!(realtime_packet.length, parsed_packet.length); + assert_eq!(realtime_packet.window_size, parsed_packet.window_size); + assert_eq!( + realtime_packet.sequence_number, + parsed_packet.sequence_number + ); + assert_eq!( + realtime_packet.sequence_number_ack, + parsed_packet.sequence_number_ack + ); + assert_eq!(realtime_packet.icmp_type, parsed_packet.icmp_type); + assert_eq!(realtime_packet.icmp_code, parsed_packet.icmp_code); + assert_eq!(realtime_packet.flags, parsed_packet.flags); + } + #[test] fn ipv6_hop_by_hop_extension_is_skipped_before_tcp_parse() { let mut payload = vec![0_u8; 8 + 20]; @@ -184,4 +226,82 @@ mod tests { assert_eq!(features.source_port, 5353); assert_eq!(features.destination_port, 53); } + + #[cfg(target_os = "linux")] + #[test] + fn ipv4_tcp_packet_and_ebpf_event_produce_matching_features() { + let payload = b"rust"; + let mut tcp = vec![0_u8; 20 + payload.len()]; + tcp[0..2].copy_from_slice(&12345_u16.to_be_bytes()); + tcp[2..4].copy_from_slice(&443_u16.to_be_bytes()); + tcp[4..8].copy_from_slice(&0x0102_0304_u32.to_be_bytes()); + tcp[8..12].copy_from_slice(&0x0506_0708_u32.to_be_bytes()); + tcp[12] = 0x50; + tcp[13] = 0x1a; + tcp[14..16].copy_from_slice(&4096_u16.to_be_bytes()); + tcp[20..].copy_from_slice(payload); + + let bytes = build_ipv4_packet(IpNextHeaderProtocols::Tcp.0, 0, &tcp); + let packet = Ipv4Packet::new(&bytes).unwrap(); + let parsed_packet = PacketFeatures::from_ipv4_packet(&packet, 1_234_567).unwrap(); + + let realtime_offset_us = 1_000_000; + let event = EbpfEventIpv4::new( + (1_234_567 - realtime_offset_us) as u64 * 1_000, + u32::from(Ipv4Addr::new(192, 0, 2, 2)).to_be(), + u32::from(Ipv4Addr::new(192, 0, 2, 1)).to_be(), + 443, + 12345, + payload.len() as u16, + 44, + 4096, + 0x1a, + IpNextHeaderProtocols::Tcp.0, + 20, + 0x0102_0304, + 0x0506_0708, + 0, + 0, + ); + let realtime_packet = PacketFeatures::from_ebpf_event_ipv4(&event, realtime_offset_us); + + assert_packet_features_match(&parsed_packet, &realtime_packet, 1_234_567); + } + + #[cfg(target_os = "linux")] + #[test] + fn ipv6_udp_packet_and_ebpf_event_produce_matching_features() { + let payload = b"flow"; + let mut udp = vec![0_u8; 8 + payload.len()]; + udp[0..2].copy_from_slice(&5353_u16.to_be_bytes()); + udp[2..4].copy_from_slice(&53_u16.to_be_bytes()); + udp[4..6].copy_from_slice(&((8 + payload.len()) as u16).to_be_bytes()); + udp[8..].copy_from_slice(payload); + + let bytes = build_ipv6_packet(IpNextHeaderProtocols::Udp.0, &udp); + let packet = Ipv6Packet::new(&bytes).unwrap(); + let parsed_packet = PacketFeatures::from_ipv6_packet(&packet, 2_345_678).unwrap(); + + let realtime_offset_us = 2_000_000; + let event = EbpfEventIpv6::new( + (2_345_678 - realtime_offset_us) as u64 * 1_000, + u128::from(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 2)).to_be(), + u128::from(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1)).to_be(), + 53, + 5353, + payload.len() as u16, + 52, + 0, + 0, + IpNextHeaderProtocols::Udp.0, + 8, + 0, + 0, + 0, + 0, + ); + let realtime_packet = PacketFeatures::from_ebpf_event_ipv6(&event, realtime_offset_us); + + assert_packet_features_match(&parsed_packet, &realtime_packet, 2_345_678); + } } From 575e55d43982cdda4e543b6b4f862d02335ed7dc Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:02:21 +0200 Subject: [PATCH 05/23] Extend realtime ingress parallelism --- AGENTS.md | 3 +- common/src/lib.rs | 2 +- docs/engineering-notes.md | 115 +++++++++++ ebpf-ipv4/src/main.rs | 50 ++++- ebpf-ipv6/src/main.rs | 52 ++++- rustiflow/src/realtime.rs | 148 ++++++++----- rustiflow/src/tests/flows/flow_table_test.rs | 207 ++++++++++++++++++- 7 files changed, 501 insertions(+), 76 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0304abd9..1103b4bf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,8 +121,7 @@ in `docs/engineering-notes.md`. ### Current Focus -None right now. Keep this section empty when the current phase is complete and -move completed work into `docs/engineering-notes.md`. +None currently. Add only the next active bounded engineering phase here. Primary files: diff --git a/common/src/lib.rs b/common/src/lib.rs index 52378206..1f3f7462 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -2,7 +2,7 @@ pub use network_types::{icmp::IcmpHdr, tcp::TcpHdr, udp::UdpHdr}; -pub const REALTIME_EVENT_QUEUE_COUNT: usize = 4; +pub const REALTIME_EVENT_QUEUE_COUNT: usize = 8; pub const REALTIME_EVENT_RINGBUF_BYTES: u32 = 1024 * 1024 * 64; /// BasicFeaturesIpv4 is a struct collection all ipv4 traffic data. diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index f9a37613..2e5e48f9 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -388,3 +388,118 @@ This file keeps short-lived design choices and execution notes that would make - `cargo test -p rustiflow packet_features_test -- --nocapture` - `cargo test -p rustiflow flow_table_test -- --nocapture` - `cargo check -p rustiflow` +- First bounded follow-up on the new parallelization phase: + - added a per-source dispatch stage in `rustiflow/src/realtime.rs` so the + ring-buffer drain tasks no longer await shard-channel sends directly + - each realtime source now batches packets, enqueues shard work into a + bounded per-source dispatch queue, and a separate dispatcher task performs + the shard-channel `send().await` +- Immediate validation on the local slim-container harness: + - `10G`, `1400`-byte UDP, `-P 8`, `--threads 12`, `--early-export 0`: + about `12.7 Gbit/s`, `0` RustiFlow drops + - overloaded ingress case, `2.5G`, `1400`-byte UDP, `-P 8`, + `--threads 4`, `--early-export 5`: about `13.2 Gbit/s`, + `608731` RustiFlow drops +- Refined source stats on a short stats-enabled overloaded run show the first + checklist item is materially complete: + - `ingress-ipv4-q0`: `avg_event_us=0.209`, + `avg_enqueue_wait_us=0.059`, `avg_shard_send_wait_us=0.148` + - `ingress-ipv4-q2`: `avg_event_us=0.218`, + `avg_enqueue_wait_us=0.085`, `avg_shard_send_wait_us=0.080` + - `ingress-ipv4-q3`: `avg_event_us=0.222`, + `avg_enqueue_wait_us=0.117`, `avg_shard_send_wait_us=0.233` +- Current interpretation: + - the source tasks are now spending much less time blocked on downstream + backpressure than in the earlier inline-send design + - the remaining hot wait has shifted into the dispatcher-to-shard stage, + which is exactly the next place to optimize + - queue balance is still imperfect under some `iperf3` multi-flow shapes, so + queue-count and fanout tuning remain worthwhile next experiments +- Follow-up answer to the next checklist item, using the same slim-container + harness and hot workloads: + - clean `10G` case, `1400`-byte UDP, `-P 8`, `--threads 12`, + `--early-export 0`: + - repeated runs stayed at `10.9` to `12.7 Gbit/s` + - RustiFlow still reported `0` dropped packets + - overloaded ingress case, target `2.5G` per stream, `1400`-byte UDP, + `-P 8`, `--threads 4`, `--early-export 5`: + - recent runs landed between about `11.0` and `13.2 Gbit/s` + - RustiFlow drops fell between `0` and `608731` + - compared with the earlier pre-dispatch-decoupling baseline on the same + general shape (`641688` to `1233317` drops, but at higher achieved + bitrate around `15.7` to `16.3 Gbit/s`), the drop count generally + improved, but the load generator no longer drove quite as much traffic +- Current interpretation of the hot-case comparison: + - the drain/dispatch split is not a clean across-the-board throughput win + - it does preserve the proven zero-drop operating point for the clean `10G` + case + - on the overloaded multi-flow case it appears to trade lower drop counts for + somewhat lower achieved bitrate, so the next experiments need to determine + whether that reflects healthier backpressure or simply a different upstream + bottleneck +- Re-evaluated queue parallelism by increasing + `REALTIME_EVENT_QUEUE_COUNT` from `4` to `8` for both IPv4 and IPv6 ringbuf + sources: + - clean `10G` case, `1400`-byte UDP, `-P 8`, `--threads 12`, + `--early-export 0`: `9.96 Gbit/s`, `0` dropped packets + - overloaded ingress case, target `2.5G` per stream, `1400`-byte UDP, + `-P 8`, `--threads 4`, `--early-export 5`: `10.3 Gbit/s`, + `0` dropped packets + - relative to the earlier `4`-queue version on the same overloaded shape, + the `8`-queue version bought real headroom: the same workload no longer + overran RustiFlow internally +- Short stats-enabled run on the `8`-queue build: + - `ingress-ipv4-q0`: `1114790` events + - `ingress-ipv4-q1`: `1733000` events + - `ingress-ipv4-q5`: `1136284` events + - `ingress-ipv4-q6`: `565056` events + - `ingress-ipv4-q2`, `q3`, `q4`, and `q7`: `no events drained` +- Current interpretation of the `8`-queue result: + - increasing queue count beyond `4` does buy real headroom on the current + architecture and was worth doing before considering a more invasive + transport rewrite + - queue usage is still visibly skewed under the existing `iperf3` multi-flow + stress case, so fanout quality remains an active bottleneck even after the + headroom gain from `8` queues +- Explicit queue-balance follow-up after the `8`-queue change: + - switched the queue-selection hash in both eBPF programs from the earlier + XOR-and-rotate tuple combiner to a stronger canonical-tuple + `hash_combine` + `fmix32` style finalization + - this kept biflow-stable queue selection but improved spread for the + observed reverse-UDP `iperf3` flow set +- Short stats-enabled overloaded run with the revised fanout: + - `ingress-ipv4-q1`: `1174440` events + - `ingress-ipv4-q2`: `562581` events + - `ingress-ipv4-q3`: `566039` events + - `ingress-ipv4-q4`: `550207` events + - `ingress-ipv4-q5`: `566313` events + - `ingress-ipv4-q6`: `1135669` events + - only `q0` and `q7` stayed idle on this run, versus four idle queues under + the previous fanout +- Hot-case check after the fanout revision: + - overloaded ingress case, target `2.5G` per stream, `1400`-byte UDP, + `-P 8`, `--threads 4`, `--early-export 5`: `9.77 Gbit/s`, + `0` dropped packets + - clean `10G` case, `1400`-byte UDP, `-P 8`, `--threads 12`, + `--early-export 0`: `9.67 Gbit/s`, `0` dropped packets +- Current interpretation of the fanout experiment: + - queue-balance quality improved materially and no longer shows the earlier + obvious four-queue skew + - the current architecture now has more usable parallel ingress width before + a transport rewrite is justified + - some skew remains, so fanout quality is not "solved forever", but this + bounded experiment did move the real bottleneck forward +- Semantic-parity guard for the newer realtime-only tuning steps: + - existing parity coverage already protected constructor-level normalization + for IPv4 and IPv6, and flow/export parity for IPv4 + - added the missing IPv6 flow-level parity tests so the mirrored IPv6 + multi-queue path is covered at the same level: + - offline/realtime IPv6 bidirectional export parity + - offline/realtime IPv6 idle-expiration parity + - this keeps the newer queue-count, fanout, and dispatch changes grounded in + the rule that realtime parallelization must not change timestamps, packet + lengths, biflow ownership, expiration causes, or exported flow contents +- Narrow validation for the semantic-parity guard: + - `cargo test -p rustiflow packet_features_test -- --nocapture` + - `cargo test -p rustiflow flow_table_test -- --nocapture` + - `cargo check -p rustiflow` diff --git a/ebpf-ipv4/src/main.rs b/ebpf-ipv4/src/main.rs index 208709cb..b26340e0 100644 --- a/ebpf-ipv4/src/main.rs +++ b/ebpf-ipv4/src/main.rs @@ -14,6 +14,7 @@ use aya_log_ebpf::debug; use common::EbpfEventIpv4; use common::IcmpHdr; use common::NetworkHeader; +use common::REALTIME_EVENT_QUEUE_COUNT; use common::REALTIME_EVENT_RINGBUF_BYTES; use common::TcpHdr; use common::UdpHdr; @@ -48,6 +49,18 @@ static EVENTS_IPV4_2: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_B #[map] static EVENTS_IPV4_3: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); +#[map] +static EVENTS_IPV4_4: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_5: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_6: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV4_7: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + #[classifier] pub fn tc_flow_track(ctx: TcContext) -> i32 { match process_packet(&ctx) { @@ -81,7 +94,11 @@ fn submit_ipv4_event(ctx: &TcContext, event: EbpfEventIpv4, queue_index: u32) { 0 => reserve_ipv4_event(&EVENTS_IPV4_0, event), 1 => reserve_ipv4_event(&EVENTS_IPV4_1, event), 2 => reserve_ipv4_event(&EVENTS_IPV4_2, event), - _ => reserve_ipv4_event(&EVENTS_IPV4_3, event), + 3 => reserve_ipv4_event(&EVENTS_IPV4_3, event), + 4 => reserve_ipv4_event(&EVENTS_IPV4_4, event), + 5 => reserve_ipv4_event(&EVENTS_IPV4_5, event), + 6 => reserve_ipv4_event(&EVENTS_IPV4_6, event), + _ => reserve_ipv4_event(&EVENTS_IPV4_7, event), }; if !reserved { @@ -124,12 +141,14 @@ fn queue_index_ipv4(packet_info: &PacketInfo, header: &impl NetworkHeader) -> u3 packet_info.ipv4_destination, header.destination_port(), ); - let hash = mix_u32(first_ip) - ^ mix_u32(second_ip).rotate_left(7) - ^ mix_u16(first_port).rotate_left(13) - ^ mix_u16(second_port).rotate_left(19) - ^ u32::from(packet_info.protocol).rotate_left(27); - hash & 0b11 + let endpoint_ports = (u32::from(first_port) << 16) | u32::from(second_port); + let mut hash = 0x811c_9dc5; + hash = hash_combine(hash, mix_u32(first_ip)); + hash = hash_combine(hash, mix_u32(second_ip)); + hash = hash_combine(hash, endpoint_ports); + hash = hash_combine(hash, u32::from(packet_info.protocol)); + hash = finish_hash32(hash); + hash % REALTIME_EVENT_QUEUE_COUNT as u32 } #[inline(always)] @@ -157,8 +176,21 @@ fn mix_u32(mut value: u32) -> u32 { } #[inline(always)] -fn mix_u16(value: u16) -> u32 { - mix_u32(u32::from(value)) +fn hash_combine(state: u32, value: u32) -> u32 { + state + ^ value + .wrapping_add(0x9e37_79b9) + .wrapping_add(state << 6) + .wrapping_add(state >> 2) +} + +#[inline(always)] +fn finish_hash32(mut value: u32) -> u32 { + value ^= value >> 16; + value = value.wrapping_mul(0x85eb_ca6b); + value ^= value >> 13; + value = value.wrapping_mul(0xc2b2_ae35); + value ^ (value >> 16) } fn process_transport_packet( diff --git a/ebpf-ipv6/src/main.rs b/ebpf-ipv6/src/main.rs index 361c0b76..9c1b6b7c 100644 --- a/ebpf-ipv6/src/main.rs +++ b/ebpf-ipv6/src/main.rs @@ -12,7 +12,8 @@ use aya_ebpf::{ use aya_log_ebpf::debug; use common::{ - EbpfEventIpv6, IcmpHdr, NetworkHeader, TcpHdr, UdpHdr, REALTIME_EVENT_RINGBUF_BYTES, + EbpfEventIpv6, IcmpHdr, NetworkHeader, TcpHdr, UdpHdr, REALTIME_EVENT_QUEUE_COUNT, + REALTIME_EVENT_RINGBUF_BYTES, }; use network_types::{ eth::{EthHdr, EtherType}, @@ -45,6 +46,18 @@ static EVENTS_IPV6_2: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_B #[map] static EVENTS_IPV6_3: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); +#[map] +static EVENTS_IPV6_4: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_5: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_6: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + +#[map] +static EVENTS_IPV6_7: RingBuf = RingBuf::with_byte_size(REALTIME_EVENT_RINGBUF_BYTES, 0); + #[classifier] pub fn tc_flow_track(ctx: TcContext) -> i32 { match process_packet(&ctx) { @@ -136,7 +149,11 @@ fn submit_ipv6_event(ctx: &TcContext, event: EbpfEventIpv6, queue_index: u32) { 0 => reserve_ipv6_event(&EVENTS_IPV6_0, event), 1 => reserve_ipv6_event(&EVENTS_IPV6_1, event), 2 => reserve_ipv6_event(&EVENTS_IPV6_2, event), - _ => reserve_ipv6_event(&EVENTS_IPV6_3, event), + 3 => reserve_ipv6_event(&EVENTS_IPV6_3, event), + 4 => reserve_ipv6_event(&EVENTS_IPV6_4, event), + 5 => reserve_ipv6_event(&EVENTS_IPV6_5, event), + 6 => reserve_ipv6_event(&EVENTS_IPV6_6, event), + _ => reserve_ipv6_event(&EVENTS_IPV6_7, event), }; if !reserved { @@ -179,12 +196,14 @@ fn queue_index_ipv6(packet_info: &PacketInfo, header: &impl NetworkHeader) -> u3 packet_info.ipv6_destination, header.destination_port(), ); - let hash = mix_u128(first_ip) - ^ mix_u128(second_ip).rotate_left(11) - ^ mix_u16(first_port).rotate_left(17) - ^ mix_u16(second_port).rotate_left(23) - ^ u32::from(packet_info.protocol).rotate_left(29); - hash & 0b11 + let endpoint_ports = (u32::from(first_port) << 16) | u32::from(second_port); + let mut hash = 0x811c_9dc5; + hash = hash_combine(hash, mix_u128(first_ip)); + hash = hash_combine(hash, mix_u128(second_ip)); + hash = hash_combine(hash, endpoint_ports); + hash = hash_combine(hash, u32::from(packet_info.protocol)); + hash = finish_hash32(hash); + hash % REALTIME_EVENT_QUEUE_COUNT as u32 } #[inline(always)] @@ -221,8 +240,21 @@ fn mix_u64(mut value: u64) -> u32 { } #[inline(always)] -fn mix_u16(value: u16) -> u32 { - mix_u64(u64::from(value)) +fn hash_combine(state: u32, value: u32) -> u32 { + state + ^ value + .wrapping_add(0x9e37_79b9) + .wrapping_add(state << 6) + .wrapping_add(state >> 2) +} + +#[inline(always)] +fn finish_hash32(mut value: u32) -> u32 { + value ^= value >> 16; + value = value.wrapping_mul(0x85eb_ca6b); + value ^= value >> 13; + value = value.wrapping_mul(0xc2b2_ae35); + value ^ (value >> 16) } fn process_transport_packet( diff --git a/rustiflow/src/realtime.rs b/rustiflow/src/realtime.rs index 283e6419..c0022bb8 100644 --- a/rustiflow/src/realtime.rs +++ b/rustiflow/src/realtime.rs @@ -29,7 +29,7 @@ use tokio::sync::watch; use tokio::{ io::unix::AsyncFd, signal, - sync::mpsc::{self, Sender}, + sync::mpsc::{self, Receiver, Sender}, sync::Mutex, task::JoinSet, }; @@ -38,7 +38,8 @@ use tokio::{ struct RealtimeSourceStats { events: AtomicU64, decode_and_shard_ns: AtomicU64, - send_wait_ns: AtomicU64, + dispatch_enqueue_ns: AtomicU64, + shard_send_wait_ns: AtomicU64, packet_graph_ns: AtomicU64, total_event_ns: AtomicU64, send_errors: AtomicU64, @@ -56,8 +57,12 @@ impl RealtimeSourceStats { self.decode_and_shard_ns.fetch_add(value, Ordering::Relaxed); } - fn add_send_wait_ns(&self, value: u64) { - self.send_wait_ns.fetch_add(value, Ordering::Relaxed); + fn add_dispatch_enqueue_ns(&self, value: u64) { + self.dispatch_enqueue_ns.fetch_add(value, Ordering::Relaxed); + } + + fn add_shard_send_wait_ns(&self, value: u64) { + self.shard_send_wait_ns.fetch_add(value, Ordering::Relaxed); } fn add_packet_graph_ns(&self, value: u64) { @@ -84,7 +89,8 @@ fn elapsed_ns(start: Instant) -> u64 { fn log_source_stats(label: &str, stats: &RealtimeSourceStats) { let events = stats.events.load(Ordering::Relaxed); let decode_and_shard_ns = stats.decode_and_shard_ns.load(Ordering::Relaxed); - let send_wait_ns = stats.send_wait_ns.load(Ordering::Relaxed); + let dispatch_enqueue_ns = stats.dispatch_enqueue_ns.load(Ordering::Relaxed); + let shard_send_wait_ns = stats.shard_send_wait_ns.load(Ordering::Relaxed); let packet_graph_ns = stats.packet_graph_ns.load(Ordering::Relaxed); let total_event_ns = stats.total_event_ns.load(Ordering::Relaxed); let send_errors = stats.send_errors.load(Ordering::Relaxed); @@ -95,21 +101,24 @@ fn log_source_stats(label: &str, stats: &RealtimeSourceStats) { } info!( - "Realtime source {}: events={} total_ms={:.3} decode_ms={:.3} send_wait_ms={:.3} packet_graph_ms={:.3} avg_event_us={:.3} avg_send_wait_us={:.3} send_errors={}", + "Realtime source {}: events={} total_ms={:.3} decode_ms={:.3} enqueue_wait_ms={:.3} shard_send_wait_ms={:.3} packet_graph_ms={:.3} avg_event_us={:.3} avg_enqueue_wait_us={:.3} avg_shard_send_wait_us={:.3} send_errors={}", label, events, total_event_ns as f64 / 1_000_000.0, decode_and_shard_ns as f64 / 1_000_000.0, - send_wait_ns as f64 / 1_000_000.0, + dispatch_enqueue_ns as f64 / 1_000_000.0, + shard_send_wait_ns as f64 / 1_000_000.0, packet_graph_ns as f64 / 1_000_000.0, total_event_ns as f64 / events as f64 / 1_000.0, - send_wait_ns as f64 / events as f64 / 1_000.0, + dispatch_enqueue_ns as f64 / events as f64 / 1_000.0, + shard_send_wait_ns as f64 / events as f64 / 1_000.0, send_errors, ); } const DEFAULT_SHARD_BATCH_SIZE: usize = 128; const DEFAULT_SHARD_QUEUE_CAPACITY: usize = 512; +const DEFAULT_SOURCE_DISPATCH_QUEUE_CAPACITY: usize = 1024; /// Starts the realtime processing of packets on the given interface. /// The function will return the number of packets dropped by the eBPF program. @@ -136,6 +145,10 @@ where "RUSTIFLOW_REALTIME_SHARD_QUEUE_CAPACITY", DEFAULT_SHARD_QUEUE_CAPACITY, ); + let source_dispatch_queue_capacity = read_env_usize( + "RUSTIFLOW_REALTIME_SOURCE_DISPATCH_QUEUE_CAPACITY", + DEFAULT_SOURCE_DISPATCH_QUEUE_CAPACITY, + ); // Needed for older kernels bump_memlock_rlimit(); @@ -288,6 +301,12 @@ where let packet_graph = packet_graph.clone(); let stats = enable_source_stats.then(|| Arc::new(RealtimeSourceStats::default())); source_stats.push((label, stats.clone())); + let dispatch_sender = spawn_source_dispatcher( + &mut handle_set, + shard_senders_clone.clone(), + source_dispatch_queue_capacity, + stats.clone(), + ); handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio @@ -324,8 +343,8 @@ where pending_batches[shard_index].push(packet_features); if pending_batches[shard_index].len() >= shard_batch_size { - flush_shard_batch( - &shard_senders_clone[shard_index], + enqueue_shard_batch( + &dispatch_sender, &mut pending_batches[shard_index], stats.as_ref(), shard_index, @@ -341,7 +360,7 @@ where } } - flush_pending_batches(&shard_senders_clone, &mut pending_batches, stats.as_ref()) + enqueue_pending_batches(&dispatch_sender, &mut pending_batches, stats.as_ref()) .await; // Clear the readiness state for the next iteration @@ -355,6 +374,12 @@ where let packet_graph = packet_graph.clone(); let stats = enable_source_stats.then(|| Arc::new(RealtimeSourceStats::default())); source_stats.push((label, stats.clone())); + let dispatch_sender = spawn_source_dispatcher( + &mut handle_set, + shard_senders_clone.clone(), + source_dispatch_queue_capacity, + stats.clone(), + ); handle_set.spawn(async move { // Wrap the RingBuf in AsyncFd to poll it with tokio @@ -391,8 +416,8 @@ where pending_batches[shard_index].push(packet_features); if pending_batches[shard_index].len() >= shard_batch_size { - flush_shard_batch( - &shard_senders_clone[shard_index], + enqueue_shard_batch( + &dispatch_sender, &mut pending_batches[shard_index], stats.as_ref(), shard_index, @@ -408,7 +433,7 @@ where } } - flush_pending_batches(&shard_senders_clone, &mut pending_batches, stats.as_ref()) + enqueue_pending_batches(&dispatch_sender, &mut pending_batches, stats.as_ref()) .await; // Clear the readiness state for the next iteration @@ -496,24 +521,61 @@ fn create_pending_batches(num_shards: usize, shard_batch_size: usize) -> Vec>], +async fn enqueue_pending_batches( + dispatch_sender: &Sender, pending_batches: &mut [Vec], stats: Option<&Arc>, ) { for (shard_index, pending_batch) in pending_batches.iter_mut().enumerate() { - flush_shard_batch( - &shard_senders[shard_index], - pending_batch, - stats, - shard_index, - ) - .await; + enqueue_shard_batch(dispatch_sender, pending_batch, stats, shard_index).await; + } +} + +struct ShardDispatchBatch { + shard_index: usize, + batch: Vec, +} + +fn spawn_source_dispatcher( + handle_set: &mut JoinSet<()>, + shard_senders: Vec>>, + source_dispatch_queue_capacity: usize, + stats: Option>, +) -> Sender { + let (dispatch_sender, mut dispatch_receiver) = + mpsc::channel::(source_dispatch_queue_capacity); + + handle_set.spawn(async move { + run_source_dispatcher(&shard_senders, &mut dispatch_receiver, stats.as_ref()).await; + }); + + dispatch_sender +} + +async fn run_source_dispatcher( + shard_senders: &[Sender>], + dispatch_receiver: &mut Receiver, + stats: Option<&Arc>, +) { + while let Some(ShardDispatchBatch { shard_index, batch }) = dispatch_receiver.recv().await { + let send_start = stats.as_ref().map(|_| Instant::now()); + if let Err(e) = shard_senders[shard_index].send(batch).await { + if let Some(stats) = stats { + stats.increment_send_errors(); + } + error!( + "Failed to send packet batch to shard {}: {}", + shard_index, e + ); + } + if let (Some(stats), Some(send_start)) = (stats, send_start) { + stats.add_shard_send_wait_ns(elapsed_ns(send_start)); + } } } -async fn flush_shard_batch( - shard_sender: &Sender>, +async fn enqueue_shard_batch( + dispatch_sender: &Sender, pending_batch: &mut Vec, stats: Option<&Arc>, shard_index: usize, @@ -525,17 +587,20 @@ async fn flush_shard_batch( let next_capacity = pending_batch.capacity().max(1); let batch = std::mem::replace(pending_batch, Vec::with_capacity(next_capacity)); let send_start = stats.as_ref().map(|_| Instant::now()); - if let Err(e) = shard_sender.send(batch).await { + if let Err(e) = dispatch_sender + .send(ShardDispatchBatch { shard_index, batch }) + .await + { if let Some(stats) = stats { stats.increment_send_errors(); } error!( - "Failed to send packet batch to shard {}: {}", + "Failed to queue packet batch for shard {}: {}", shard_index, e ); } if let (Some(stats), Some(send_start)) = (stats, send_start) { - stats.add_send_wait_ns(elapsed_ns(send_start)); + stats.add_dispatch_enqueue_ns(elapsed_ns(send_start)); } } @@ -579,7 +644,7 @@ fn compute_realtime_offset_us() -> Result { fn labeled_ringbuf_sources( label_prefix: &'static str, ring_bufs: Vec>, -) -> Vec<(&'static str, RingBuf)> { +) -> Vec<(String, RingBuf)> { ring_bufs .into_iter() .enumerate() @@ -587,29 +652,8 @@ fn labeled_ringbuf_sources( .collect() } -fn queue_label(label_prefix: &'static str, index: usize) -> &'static str { - match (label_prefix, index) { - ("ingress-ipv4", 0) => "ingress-ipv4-q0", - ("ingress-ipv4", 1) => "ingress-ipv4-q1", - ("ingress-ipv4", 2) => "ingress-ipv4-q2", - ("ingress-ipv4", 3) => "ingress-ipv4-q3", - ("egress-ipv4", 0) => "egress-ipv4-q0", - ("egress-ipv4", 1) => "egress-ipv4-q1", - ("egress-ipv4", 2) => "egress-ipv4-q2", - ("egress-ipv4", 3) => "egress-ipv4-q3", - ("ingress-ipv6", 0) => "ingress-ipv6-q0", - ("ingress-ipv6", 1) => "ingress-ipv6-q1", - ("ingress-ipv6", 2) => "ingress-ipv6-q2", - ("ingress-ipv6", 3) => "ingress-ipv6-q3", - ("egress-ipv6", 0) => "egress-ipv6-q0", - ("egress-ipv6", 1) => "egress-ipv6-q1", - ("egress-ipv6", 2) => "egress-ipv6-q2", - ("egress-ipv6", 3) => "egress-ipv6-q3", - _ => panic!( - "unexpected realtime queue label: {}-{}", - label_prefix, index - ), - } +fn queue_label(label_prefix: &'static str, index: usize) -> String { + format!("{label_prefix}-q{index}") } fn take_ring_buf_maps( diff --git a/rustiflow/src/tests/flows/flow_table_test.rs b/rustiflow/src/tests/flows/flow_table_test.rs index 6bfba700..53cb3c70 100644 --- a/rustiflow/src/tests/flows/flow_table_test.rs +++ b/rustiflow/src/tests/flows/flow_table_test.rs @@ -1,9 +1,9 @@ #[cfg(test)] mod tests { - use std::net::{IpAddr, Ipv4Addr}; + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; #[cfg(target_os = "linux")] - use common::EbpfEventIpv4; + use common::{EbpfEventIpv4, EbpfEventIpv6}; use tokio::sync::mpsc; use crate::{ @@ -60,6 +60,39 @@ mod tests { PacketFeatures::from_ebpf_event_ipv4(&event, realtime_offset_us) } + #[cfg(target_os = "linux")] + fn build_realtime_packet_ipv6( + source_ip: Ipv6Addr, + source_port: u16, + destination_ip: Ipv6Addr, + destination_port: u16, + timestamp_us: i64, + flags: u8, + sequence_number: u32, + sequence_number_ack: u32, + data_length: u16, + ) -> PacketFeatures { + let realtime_offset_us = 1_000_000; + let event = EbpfEventIpv6::new( + (timestamp_us - realtime_offset_us) as u64 * 1_000, + u128::from(destination_ip).to_be(), + u128::from(source_ip).to_be(), + destination_port, + source_port, + data_length, + 40 + data_length, + 4096, + flags, + 6, + 20, + sequence_number, + sequence_number_ack, + 0, + 0, + ); + PacketFeatures::from_ebpf_event_ipv6(&event, realtime_offset_us) + } + #[tokio::test] async fn exports_idle_timed_out_flow_with_idle_timeout_cause() { let (tx, mut rx) = mpsc::channel::(4); @@ -346,4 +379,174 @@ mod tests { assert!(offline_rx.try_recv().is_err()); assert!(realtime_rx.try_recv().is_err()); } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn offline_and_realtime_ipv6_bidirectional_exports_match() { + let (offline_tx, mut offline_rx) = mpsc::channel::(4); + let (realtime_tx, mut realtime_rx) = mpsc::channel::(4); + let mut offline_table = FlowTable::new(3600, 120, None, offline_tx, 60); + let mut realtime_table = FlowTable::new(3600, 120, None, realtime_tx, 60); + + let client_ip = Ipv6Addr::new(0x2001, 0xdb8, 0, 1, 0, 0, 0, 1); + let server_ip = Ipv6Addr::new(0x2001, 0xdb8, 0, 1, 0, 0, 0, 2); + + let offline_syn = PacketFeatures { + source_ip: IpAddr::V6(client_ip), + destination_ip: IpAddr::V6(server_ip), + source_port: 12345, + destination_port: 443, + protocol: 6, + timestamp_us: 1_000_000, + syn_flag: 1, + flags: 0x02, + header_length: 20, + length: 40, + window_size: 4096, + sequence_number: 100, + ..Default::default() + }; + + let offline_syn_ack = PacketFeatures { + source_ip: IpAddr::V6(server_ip), + destination_ip: IpAddr::V6(client_ip), + source_port: 443, + destination_port: 12345, + protocol: 6, + timestamp_us: 1_000_100, + syn_flag: 1, + ack_flag: 1, + flags: 0x12, + header_length: 20, + length: 40, + window_size: 4096, + sequence_number: 200, + sequence_number_ack: 101, + ..Default::default() + }; + + let offline_ack = PacketFeatures { + source_ip: IpAddr::V6(client_ip), + destination_ip: IpAddr::V6(server_ip), + source_port: 12345, + destination_port: 443, + protocol: 6, + timestamp_us: 1_000_200, + ack_flag: 1, + flags: 0x10, + header_length: 20, + length: 40, + window_size: 4096, + sequence_number: 101, + sequence_number_ack: 201, + ..Default::default() + }; + + let offline_payload = PacketFeatures { + source_ip: IpAddr::V6(client_ip), + destination_ip: IpAddr::V6(server_ip), + source_port: 12345, + destination_port: 443, + protocol: 6, + timestamp_us: 1_000_300, + ack_flag: 1, + psh_flag: 1, + flags: 0x18, + header_length: 20, + data_length: 64, + length: 104, + window_size: 4096, + sequence_number: 101, + sequence_number_ack: 201, + ..Default::default() + }; + + let realtime_syn = build_realtime_packet_ipv6( + client_ip, 12345, server_ip, 443, 1_000_000, 0x02, 100, 0, 0, + ); + let realtime_syn_ack = build_realtime_packet_ipv6( + server_ip, 443, client_ip, 12345, 1_000_100, 0x12, 200, 101, 0, + ); + let realtime_ack = build_realtime_packet_ipv6( + client_ip, 12345, server_ip, 443, 1_000_200, 0x10, 101, 201, 0, + ); + let realtime_payload = build_realtime_packet_ipv6( + client_ip, 12345, server_ip, 443, 1_000_300, 0x18, 101, 201, 64, + ); + + for packet in [ + &offline_syn, + &offline_syn_ack, + &offline_ack, + &offline_payload, + ] { + offline_table.process_packet(packet).await; + } + for packet in [ + &realtime_syn, + &realtime_syn_ack, + &realtime_ack, + &realtime_payload, + ] { + realtime_table.process_packet(packet).await; + } + + offline_table.export_all_flows(2_000_000).await; + realtime_table.export_all_flows(2_000_000).await; + + let offline_export = offline_rx.recv().await.expect("expected offline export"); + let realtime_export = realtime_rx.recv().await.expect("expected realtime export"); + + assert_eq!(offline_export.dump(), realtime_export.dump()); + assert_eq!( + offline_export.dump_without_contamination(), + realtime_export.dump_without_contamination() + ); + assert!(offline_rx.try_recv().is_err()); + assert!(realtime_rx.try_recv().is_err()); + } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn offline_and_realtime_ipv6_idle_expiration_match() { + let (offline_tx, mut offline_rx) = mpsc::channel::(4); + let (realtime_tx, mut realtime_rx) = mpsc::channel::(4); + let mut offline_table = FlowTable::new(3600, 1, None, offline_tx, 60); + let mut realtime_table = FlowTable::new(3600, 1, None, realtime_tx, 60); + + let client_ip = Ipv6Addr::new(0x2001, 0xdb8, 0, 2, 0, 0, 0, 10); + let server_ip = Ipv6Addr::new(0x2001, 0xdb8, 0, 2, 0, 0, 0, 20); + let offline_packet = PacketFeatures { + source_ip: IpAddr::V6(client_ip), + destination_ip: IpAddr::V6(server_ip), + source_port: 12345, + destination_port: 443, + protocol: 6, + timestamp_us: 1_000_000, + ..Default::default() + }; + let realtime_packet = + build_realtime_packet_ipv6(client_ip, 12345, server_ip, 443, 1_000_000, 0, 0, 0, 0); + + offline_table.process_packet(&offline_packet).await; + realtime_table.process_packet(&realtime_packet).await; + + offline_table.export_expired_flows(3_000_000).await; + realtime_table.export_expired_flows(3_000_000).await; + + let offline_export = offline_rx.recv().await.expect("expected offline export"); + let realtime_export = realtime_rx.recv().await.expect("expected realtime export"); + + assert_eq!(offline_export.dump(), realtime_export.dump()); + assert_eq!( + offline_export.flow_expire_cause, + FlowExpireCause::IdleTimeout + ); + assert_eq!( + realtime_export.flow_expire_cause, + FlowExpireCause::IdleTimeout + ); + assert!(offline_rx.try_recv().is_err()); + assert!(realtime_rx.try_recv().is_err()); + } } From 1bf9a610ed3d834a16f195dc3e37f30f974084f0 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:32:31 +0200 Subject: [PATCH 06/23] Add opt-in profiling support --- AGENTS.md | 21 ++- docs/performance-roadmap.md | 275 -------------------------------- rustiflow/Cargo.toml | 1 + rustiflow/src/main.rs | 32 ++++ rustiflow/src/profiling.rs | 166 +++++++++++++++++++ rustiflow/src/profiling_stub.rs | 13 ++ 6 files changed, 232 insertions(+), 276 deletions(-) delete mode 100644 docs/performance-roadmap.md create mode 100644 rustiflow/src/profiling.rs create mode 100644 rustiflow/src/profiling_stub.rs diff --git a/AGENTS.md b/AGENTS.md index 1103b4bf..9f98d000 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,7 +121,26 @@ in `docs/engineering-notes.md`. ### Current Focus -None currently. Add only the next active bounded engineering phase here. +- [ ] Add profiling on the current parallel realtime path before bigger + redesigns: + capture CPU usage and flamegraph-style evidence for drain/dispatch, + FlowTable work, and export cost on the `rustiflow-t0` harness. +- [ ] Quantify resource usage for the proven local throughput tiers: + for the current `10G` operating point, and later `25/40G` attempts, record + CPU use, memory use, drop behavior, and export rate instead of bitrate alone. +- [ ] Re-measure the remaining dispatcher bottleneck before transport rewrite: + after the current queue-count and fanout wins, determine whether the next + limiter is shard-channel backpressure, FlowTable processing, or export cost. +- [ ] Revisit cheaper running statistics only after profiling confirms they are + still hot on the newer ingestion path. +- [ ] Measure export-path cost explicitly before redesigning flow snapshots or + CSV serialization: + confirm whether cloning/export formatting is now a real limiter under high + export pressure. +- [ ] Keep updating `docs/engineering-notes.md` after each bounded experiment + with: + workload, achieved bitrate, dropped-packet total, and what the new + bottleneck appears to be. Primary files: diff --git a/docs/performance-roadmap.md b/docs/performance-roadmap.md deleted file mode 100644 index b407abad..00000000 --- a/docs/performance-roadmap.md +++ /dev/null @@ -1,275 +0,0 @@ -# Performance Roadmap - -This file tracks performance work for pushing RustiFlow beyond already-successful -`10Gbps` realtime capture. - -Use this as an execution checklist, not as a design essay. - -## Ground Rules - -- [ ] Measure before and after every meaningful optimization. -- [ ] Prefer hot-path wins over broad rewrites. -- [ ] Do not trade away feature correctness for speed without making that trade explicit. -- [ ] Keep performance work in clean, bounded commits. -- [ ] After recent ingestion-semantics fixes, stabilize and measure before expanding the eBPF event payload further. - -## Phase 0: Baseline And Profiling - -- [ ] Establish a repeatable Linux benchmark setup on a real target machine. -- [ ] Capture baseline numbers for: - - packets per second - - dropped packets - - CPU usage by userspace and kernel path - - active flow count - - export throughput -- [ ] Collect flamegraphs or equivalent profiling for: - - realtime ingest - - flow-table updates - - export path -- [ ] Separate ingress-only and ingress+egress benchmark modes. - -Why this matters: -The current implementation already performs well. Past this point, guessing is expensive. - -## Phase 1: Biggest Likely Wins - -### 1. Typed Flow Keys - -- [ ] Replace string-based packet keys with compact typed keys. -- [ ] Remove repeated `String` creation from: - - `flow_key()` - - `flow_key_bwd()` - - `biflow_key()` -- [ ] Use typed keys in shard selection and flow-table lookup. -- [ ] Keep string formatting only for export. - -Primary files: - -- `rustiflow/src/packet_features.rs` -- `rustiflow/src/flow_table.rs` -- `rustiflow/src/realtime.rs` - -Expected value: -Very high. This is a likely hot-path allocation and hashing tax. - -### 2. Cheaper Running Statistics - -- [ ] Replace per-update standard deviation work with a running variance form such as Welford. -- [ ] Store enough state to compute `std` at dump/close time. -- [ ] Benchmark impact across heavily used feature families. - -Primary files: - -- `rustiflow/src/flows/features/util.rs` -- `rustiflow/src/flows/features/*.rs` - -Expected value: -High. Many feature modules pay this cost on every packet. - -### 3. Realtime Timestamp Fix - -- [ ] Carry capture timestamps from kernel to userspace instead of calling `Utc::now()` per event. -- [ ] Keep timestamp semantics aligned with offline mode as much as possible. -- [ ] Re-benchmark after this change because it affects both correctness and overhead. - -Primary files: - -- `common/src/lib.rs` -- `ebpf-ipv4/src/main.rs` -- `ebpf-ipv6/src/main.rs` -- `rustiflow/src/packet_features.rs` - -Expected value: -High. Improves correctness and removes per-event userspace time acquisition. - -## Phase 2: Hot-Path Structural Cleanup - -### 4. FlowTable Access Patterns - -- [x] Reduce repeated hashing and key rebuilding in flow lookup. -- [x] Avoid `contains_key` plus `remove` plus `insert` churn where possible. -- [x] Revisit direction resolution after typed keys are introduced. - -Primary file: - -- `rustiflow/src/flow_table.rs` - -Expected value: -High once typed keys exist. - -### 5. Export Without Cloning Full Flow State - -- [ ] Reduce or remove full-flow cloning for early export and termination export. -- [ ] Consider separating mutable hot-path state from export snapshots. -- [ ] Measure clone cost for `RustiFlow` specifically before redesigning too far. - -Primary files: - -- `rustiflow/src/flow_table.rs` -- `rustiflow/src/flows/flow.rs` -- `rustiflow/src/flows/*.rs` - -Expected value: -Medium to high depending on export rate and flow size. - -### 6. Performance Mode Should Mean Performance - -- [x] Make sure high-throughput runs bypass packet-TUI work completely. -- [x] Audit mutexes, watch channels, and per-packet UI accounting in performance-sensitive modes. -- [x] Keep observability available, but not in the critical path by default. - -Primary files: - -- `rustiflow/src/realtime.rs` -- `rustiflow/src/packet_counts.rs` -- `rustiflow/src/flow_tui.rs` - -Expected value: -Medium to high for very fast realtime capture. - -## Phase 3: Throughput Scaling - -### 7. Batching Between Stages - -- [ ] Benchmark per-packet `mpsc` overhead. -- [ ] Try batched ring-buffer draining. -- [ ] Try batched shard submission. -- [ ] Keep changes narrow until measurement proves batching is worth the complexity. - -Primary file: - -- `rustiflow/src/realtime.rs` - -Expected value: -Medium, possibly high at very large packet rates. - -### 8. Faster Internal Hashing - -- [ ] Benchmark a faster internal hasher after typed keys are in place. -- [ ] Prefer a fast non-adversarial hasher only for internal packet-processing paths. -- [ ] Keep any public or security-sensitive hashing decisions separate. - -Primary files: - -- `rustiflow/src/realtime.rs` -- `rustiflow/src/flow_table.rs` - -Expected value: -Medium. Probably not worth doing first. - -### 9. Smarter Expiration Scheduling - -- [ ] Benchmark expiration scans at high concurrent flow counts. -- [ ] If scans become costly, evaluate timing buckets or a timer wheel. -- [ ] Do not build a more complex expiry structure before profiling says the scan is a real bottleneck. - -Primary file: - -- `rustiflow/src/flow_table.rs` - -Expected value: -Medium, but workload-dependent. - -## Phase 4: Export Path Optimization - -### 10. Cheaper Serialization - -- [ ] Measure cost of giant `format!`-based CSV assembly. -- [ ] Consider more streaming-oriented serialization for high export rates. -- [ ] Keep export-path changes isolated from flow semantics. - -Primary files: - -- `rustiflow/src/output.rs` -- `rustiflow/src/flows/*.rs` - -Expected value: -Medium. Important once export volume becomes the limiter. - -## Operational Metrics To Add - -- [ ] Per-source ring buffer drain rate -- [ ] Per-shard queue depth or backlog -- [ ] Active flow count over time -- [ ] Export throughput and export lag -- [ ] Dropped packet counters split by ingress/egress and IPv4/IPv6 -- [ ] A lightweight performance summary mode for realtime runs - -Why this matters: -If RustiFlow is going to chase higher link rates, it needs good self-observability. - -## Deferred Stress Testing Notes - -- [ ] Remember that `10Gbps+` software-path testing is possible without a physical external link. -- [ ] Prefer doing this on an actual Linux development machine instead of macOS. -- [ ] Treat software-only stress testing as useful for RustiFlow and eBPF/userspace throughput, but not as a full substitute for real NIC validation. - -Practical options for later: - -- `veth` pair + network namespaces + RustiFlow on one side -- Linux `pktgen` for high packet-rate stress -- TRex for more realistic replay and traffic profiles -- MoonGen for high-rate scripted generation - -What this is good for: - -- packet-rate pressure -- flow-table pressure -- eBPF event rate -- userspace queueing and dropped packets -- export throughput - -What this does not fully prove: - -- physical NIC behavior -- PCIe and DMA effects -- hardware offloads -- real RSS / hardware queue behavior - -## Not Early Priorities - -- [ ] Do not start with micro-optimizing individual feature modules before fixing keying and stats math. -- [ ] Do not move large parts of flow aggregation into eBPF without profiling evidence. -- [ ] Do not do broad architecture rewrites before collecting hard measurements. -- [ ] Do not let exporter churn distract from realtime hot-path costs. - -## Current Best Order - -- [ ] Phase 0: Baseline and profiling -- [ ] Phase 1.1: Typed flow keys -- [ ] Phase 1.2: Cheaper running statistics -- [ ] Phase 1.3: Kernel-carried timestamps -- [ ] Phase 2.4: FlowTable access cleanup -- [ ] Phase 2.5: Export without cloning -- [ ] Phase 2.6: Strict performance mode -- [ ] Phase 3.7: Batching -- [ ] Phase 3.8: Faster hashing -- [ ] Phase 3.9: Smarter expiration scheduling -- [ ] Phase 4.10: Serialization optimization - -## Progress Notes - -- Use short dated notes here when a measurement or optimization changes priorities. -- If a planned optimization turns out not to matter, mark it done and note that it was ruled out. -- 2026-03-25: Decision: stabilize and measure after the current timestamp and length/header-length alignment work before adding more packet metadata to eBPF events. -- 2026-03-25: Typed internal flow keys now replace string keys in sharding and - flow-table lookup while keeping string formatting only for exported flow ids. - On a locally amplified offline fixture (`nmap_udp_version.pcap` packet - records repeated 400x, ~2.5 MB), warm-cache `--release` runs dropped from - about `39.8 ms` to `19.7 ms` mean over 5 runs on this machine. Treat this as - a local directional signal, not a Linux realtime substitute. -- 2026-03-25: Realtime packet-graph state is now only constructed when the - graph is actually enabled. High-throughput CSV/performance-mode runs no - longer allocate the watch channel, mutex-protected packet counter, or - per-packet UI accounting state on the hot path. Linux-side throughput numbers - are still pending the broader benchmark work in Phase 0. -- 2026-03-25: `FeatureStats` now uses a Welford-style running variance - accumulator and computes `std` at readout time. On the same local amplified - offline fixture, warm-cache `--release` runs moved from about `21.3 ms` to - `20.5 ms` mean over 5 runs on this machine. Smaller win than typed keys, but - still in the expected direction. -- 2026-03-26: Existing-flow updates in `FlowTable` now stay in place on the - hot path instead of removing and reinserting map entries for every packet. - On the same local amplified offline fixture, warm-cache `--release` runs - moved from about `18.84 ms` to `18.30 ms` mean over 5 runs. This is still an - offline directional signal, not a Linux realtime benchmark. diff --git a/rustiflow/Cargo.toml b/rustiflow/Cargo.toml index 388f38b7..f20da783 100644 --- a/rustiflow/Cargo.toml +++ b/rustiflow/Cargo.toml @@ -40,6 +40,7 @@ strum_macros = "0.26.4" [target.'cfg(target_os = "linux")'.dependencies] aya = { version = "0.13.0", features = ["async_tokio"] } aya-log = "0.2.1" +pprof = { version = "0.14.1", features = ["flamegraph"] } [[bin]] name = "rustiflow" diff --git a/rustiflow/src/main.rs b/rustiflow/src/main.rs index 954cb29f..b23449a5 100644 --- a/rustiflow/src/main.rs +++ b/rustiflow/src/main.rs @@ -10,6 +10,11 @@ mod packet_counts; mod packet_features; mod pcap; #[cfg(target_os = "linux")] +mod profiling; +#[cfg(not(target_os = "linux"))] +#[path = "profiling_stub.rs"] +mod profiling; +#[cfg(target_os = "linux")] mod realtime; #[cfg(not(target_os = "linux"))] #[path = "realtime_stub.rs"] @@ -29,6 +34,7 @@ use flows::{ }; use log::{debug, error, info}; use output::OutputWriter; +use profiling::ProfilingSession; use realtime_mode::packet_graph_mode; use std::time::Instant; use tokio::sync::mpsc; @@ -139,6 +145,13 @@ async fn run_with_config(config: Config) { debug!("Starting realtime processing..."); let start = Instant::now(); + let profiling_session = match ProfilingSession::start_from_env("realtime") { + Ok(session) => session, + Err(err) => { + error!("Error starting profiler: {:?}", err); + None + } + }; let result = handle_realtime::<$flow_ty>( &interface, sender, @@ -157,6 +170,12 @@ async fn run_with_config(config: Config) { error!("Error waiting for output task: {:?}", e); } + if let Some(profiling_session) = profiling_session { + if let Err(err) = profiling_session.finish() { + error!("Error finishing profiler: {:?}", err); + } + } + let end = Instant::now(); info!( "Duration: {:.4} seconds", @@ -219,6 +238,13 @@ async fn run_with_config(config: Config) { }); let start = Instant::now(); + let profiling_session = match ProfilingSession::start_from_env("offline") { + Ok(session) => session, + Err(err) => { + error!("Error starting profiler: {:?}", err); + None + } + }; if let Err(err) = read_pcap_file::<$flow_ty>( &path, @@ -239,6 +265,12 @@ async fn run_with_config(config: Config) { error!("Error waiting for output task: {:?}", e); }); + if let Some(profiling_session) = profiling_session { + if let Err(err) = profiling_session.finish() { + error!("Error finishing profiler: {:?}", err); + } + } + let end = Instant::now(); debug!( "Duration: {:?} milliseconds", diff --git a/rustiflow/src/profiling.rs b/rustiflow/src/profiling.rs new file mode 100644 index 00000000..599ae548 --- /dev/null +++ b/rustiflow/src/profiling.rs @@ -0,0 +1,166 @@ +use anyhow::{Context, Result}; +use log::info; +use pprof::{ProfilerGuard, ProfilerGuardBuilder}; +use std::{env, fs::File, path::PathBuf}; + +const DEFAULT_PROFILE_FREQUENCY_HZ: i32 = 99; + +pub struct ProfilingSession { + mode: &'static str, + flamegraph_path: Option, + guard: Option>, + usage_start: ResourceUsage, +} + +impl ProfilingSession { + pub fn start_from_env(mode: &'static str) -> Result> { + let flamegraph_path = env::var_os("RUSTIFLOW_PROFILE_FLAMEGRAPH").map(PathBuf::from); + let sampling_frequency_hz = env::var("RUSTIFLOW_PROFILE_FREQUENCY_HZ") + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(DEFAULT_PROFILE_FREQUENCY_HZ); + + let enabled = flamegraph_path.is_some() || env_flag("RUSTIFLOW_PROFILE_RESOURCE_SUMMARY"); + if !enabled { + return Ok(None); + } + + let guard = if flamegraph_path.is_some() { + Some( + ProfilerGuardBuilder::default() + .frequency(sampling_frequency_hz) + .blocklist(&["libc", "libgcc", "pthread", "vdso"]) + .build() + .context("failed to start userspace profiler")?, + ) + } else { + None + }; + + info!( + "Profiling enabled for {}: flamegraph={}, resource_summary={}, frequency_hz={}", + mode, + flamegraph_path + .as_ref() + .map(|path| path.display().to_string()) + .unwrap_or_else(|| "disabled".to_string()), + env_flag("RUSTIFLOW_PROFILE_RESOURCE_SUMMARY"), + sampling_frequency_hz + ); + + Ok(Some(Self { + mode, + flamegraph_path, + guard, + usage_start: ResourceUsage::read()?, + })) + } + + pub fn finish(self) -> Result<()> { + let usage_end = ResourceUsage::read()?; + let usage_delta = usage_end.delta_from(&self.usage_start); + info!( + "Profile summary for {}: user_cpu_ms={:.3} sys_cpu_ms={:.3} max_rss_kb={} voluntary_ctx_switches={} involuntary_ctx_switches={}", + self.mode, + usage_delta.user_cpu_us as f64 / 1_000.0, + usage_delta.system_cpu_us as f64 / 1_000.0, + usage_end.max_rss_kb, + usage_delta.voluntary_context_switches, + usage_delta.involuntary_context_switches + ); + + if let (Some(guard), Some(flamegraph_path)) = (self.guard, self.flamegraph_path) { + let report = guard + .report() + .build() + .context("failed to build flamegraph report")?; + let file = File::create(&flamegraph_path) + .with_context(|| format!("failed to create {}", flamegraph_path.display()))?; + report + .flamegraph(file) + .with_context(|| format!("failed to write {}", flamegraph_path.display()))?; + info!( + "Wrote userspace flamegraph for {} to {}", + self.mode, + flamegraph_path.display() + ); + } + + Ok(()) + } +} + +#[derive(Clone, Copy)] +struct ResourceUsage { + user_cpu_us: i64, + system_cpu_us: i64, + max_rss_kb: i64, + voluntary_context_switches: i64, + involuntary_context_switches: i64, +} + +impl ResourceUsage { + fn read() -> Result { + let mut usage = libc::rusage { + ru_utime: libc::timeval { + tv_sec: 0, + tv_usec: 0, + }, + ru_stime: libc::timeval { + tv_sec: 0, + tv_usec: 0, + }, + ru_maxrss: 0, + ru_ixrss: 0, + ru_idrss: 0, + ru_isrss: 0, + ru_minflt: 0, + ru_majflt: 0, + ru_nswap: 0, + ru_inblock: 0, + ru_oublock: 0, + ru_msgsnd: 0, + ru_msgrcv: 0, + ru_nsignals: 0, + ru_nvcsw: 0, + ru_nivcsw: 0, + }; + + let result = unsafe { libc::getrusage(libc::RUSAGE_SELF, &mut usage) }; + if result != 0 { + return Err(std::io::Error::last_os_error()).context("getrusage failed"); + } + + Ok(Self { + user_cpu_us: timeval_to_us(usage.ru_utime), + system_cpu_us: timeval_to_us(usage.ru_stime), + max_rss_kb: usage.ru_maxrss, + voluntary_context_switches: usage.ru_nvcsw, + involuntary_context_switches: usage.ru_nivcsw, + }) + } + + fn delta_from(&self, start: &Self) -> Self { + Self { + user_cpu_us: self.user_cpu_us - start.user_cpu_us, + system_cpu_us: self.system_cpu_us - start.system_cpu_us, + max_rss_kb: self.max_rss_kb, + voluntary_context_switches: self.voluntary_context_switches + - start.voluntary_context_switches, + involuntary_context_switches: self.involuntary_context_switches + - start.involuntary_context_switches, + } + } +} + +fn timeval_to_us(value: libc::timeval) -> i64 { + value.tv_sec * 1_000_000 + value.tv_usec +} + +fn env_flag(name: &str) -> bool { + matches!( + env::var(name).ok().as_deref(), + Some("1" | "true" | "TRUE" | "yes" | "YES" | "on" | "ON") + ) +} diff --git a/rustiflow/src/profiling_stub.rs b/rustiflow/src/profiling_stub.rs new file mode 100644 index 00000000..a31cd226 --- /dev/null +++ b/rustiflow/src/profiling_stub.rs @@ -0,0 +1,13 @@ +use anyhow::Result; + +pub struct ProfilingSession; + +impl ProfilingSession { + pub fn start_from_env(_mode: &'static str) -> Result> { + Ok(None) + } + + pub fn finish(self) -> Result<()> { + Ok(()) + } +} From 16950dbb2930a4f4cc7533be962cb86e843a1232 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:07:04 +0200 Subject: [PATCH 07/23] Refactor realtime flow table test packet specs --- rustiflow/src/tests/flows/flow_table_test.rs | 200 +++++++++++++------ 1 file changed, 140 insertions(+), 60 deletions(-) diff --git a/rustiflow/src/tests/flows/flow_table_test.rs b/rustiflow/src/tests/flows/flow_table_test.rs index 53cb3c70..8a12c8b4 100644 --- a/rustiflow/src/tests/flows/flow_table_test.rs +++ b/rustiflow/src/tests/flows/flow_table_test.rs @@ -28,7 +28,7 @@ mod tests { } #[cfg(target_os = "linux")] - fn build_realtime_packet( + struct RealtimePacketIpv4Spec { source_ip: Ipv4Addr, source_port: u16, destination_ip: Ipv4Addr, @@ -38,22 +38,25 @@ mod tests { sequence_number: u32, sequence_number_ack: u32, data_length: u16, - ) -> PacketFeatures { + } + + #[cfg(target_os = "linux")] + fn build_realtime_packet(spec: RealtimePacketIpv4Spec) -> PacketFeatures { let realtime_offset_us = 1_000_000; let event = EbpfEventIpv4::new( - (timestamp_us - realtime_offset_us) as u64 * 1_000, - u32::from(destination_ip).to_be(), - u32::from(source_ip).to_be(), - destination_port, - source_port, - data_length, - 40 + data_length, + (spec.timestamp_us - realtime_offset_us) as u64 * 1_000, + u32::from(spec.destination_ip).to_be(), + u32::from(spec.source_ip).to_be(), + spec.destination_port, + spec.source_port, + spec.data_length, + 40 + spec.data_length, 4096, - flags, + spec.flags, 6, 20, - sequence_number, - sequence_number_ack, + spec.sequence_number, + spec.sequence_number_ack, 0, 0, ); @@ -61,7 +64,7 @@ mod tests { } #[cfg(target_os = "linux")] - fn build_realtime_packet_ipv6( + struct RealtimePacketIpv6Spec { source_ip: Ipv6Addr, source_port: u16, destination_ip: Ipv6Addr, @@ -71,22 +74,25 @@ mod tests { sequence_number: u32, sequence_number_ack: u32, data_length: u16, - ) -> PacketFeatures { + } + + #[cfg(target_os = "linux")] + fn build_realtime_packet_ipv6(spec: RealtimePacketIpv6Spec) -> PacketFeatures { let realtime_offset_us = 1_000_000; let event = EbpfEventIpv6::new( - (timestamp_us - realtime_offset_us) as u64 * 1_000, - u128::from(destination_ip).to_be(), - u128::from(source_ip).to_be(), - destination_port, - source_port, - data_length, - 40 + data_length, + (spec.timestamp_us - realtime_offset_us) as u64 * 1_000, + u128::from(spec.destination_ip).to_be(), + u128::from(spec.source_ip).to_be(), + spec.destination_port, + spec.source_port, + spec.data_length, + 40 + spec.data_length, 4096, - flags, + spec.flags, 6, 20, - sequence_number, - sequence_number_ack, + spec.sequence_number, + spec.sequence_number_ack, 0, 0, ); @@ -293,17 +299,50 @@ mod tests { offline_payload.sequence_number = 101; offline_payload.sequence_number_ack = 201; - let realtime_syn = - build_realtime_packet(client_ip, 12345, server_ip, 443, 1_000_000, 0x02, 100, 0, 0); - let realtime_syn_ack = build_realtime_packet( - server_ip, 443, client_ip, 12345, 1_000_100, 0x12, 200, 101, 0, - ); - let realtime_ack = build_realtime_packet( - client_ip, 12345, server_ip, 443, 1_000_200, 0x10, 101, 201, 0, - ); - let realtime_payload = build_realtime_packet( - client_ip, 12345, server_ip, 443, 1_000_300, 0x18, 101, 201, 64, - ); + let realtime_syn = build_realtime_packet(RealtimePacketIpv4Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_000, + flags: 0x02, + sequence_number: 100, + sequence_number_ack: 0, + data_length: 0, + }); + let realtime_syn_ack = build_realtime_packet(RealtimePacketIpv4Spec { + source_ip: server_ip, + source_port: 443, + destination_ip: client_ip, + destination_port: 12345, + timestamp_us: 1_000_100, + flags: 0x12, + sequence_number: 200, + sequence_number_ack: 101, + data_length: 0, + }); + let realtime_ack = build_realtime_packet(RealtimePacketIpv4Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_200, + flags: 0x10, + sequence_number: 101, + sequence_number_ack: 201, + data_length: 0, + }); + let realtime_payload = build_realtime_packet(RealtimePacketIpv4Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_300, + flags: 0x18, + sequence_number: 101, + sequence_number_ack: 201, + data_length: 64, + }); for packet in [ &offline_syn, @@ -346,17 +385,17 @@ mod tests { let mut realtime_table = FlowTable::new(3600, 1, None, realtime_tx, 60); let offline_packet = build_packet(1_000_000); - let realtime_packet = build_realtime_packet( - Ipv4Addr::new(192, 168, 1, 1), - 12345, - Ipv4Addr::new(192, 168, 1, 2), - 443, - 1_000_000, - 0, - 0, - 0, - 0, - ); + let realtime_packet = build_realtime_packet(RealtimePacketIpv4Spec { + source_ip: Ipv4Addr::new(192, 168, 1, 1), + source_port: 12345, + destination_ip: Ipv4Addr::new(192, 168, 1, 2), + destination_port: 443, + timestamp_us: 1_000_000, + flags: 0, + sequence_number: 0, + sequence_number_ack: 0, + data_length: 0, + }); offline_table.process_packet(&offline_packet).await; realtime_table.process_packet(&realtime_packet).await; @@ -461,18 +500,50 @@ mod tests { ..Default::default() }; - let realtime_syn = build_realtime_packet_ipv6( - client_ip, 12345, server_ip, 443, 1_000_000, 0x02, 100, 0, 0, - ); - let realtime_syn_ack = build_realtime_packet_ipv6( - server_ip, 443, client_ip, 12345, 1_000_100, 0x12, 200, 101, 0, - ); - let realtime_ack = build_realtime_packet_ipv6( - client_ip, 12345, server_ip, 443, 1_000_200, 0x10, 101, 201, 0, - ); - let realtime_payload = build_realtime_packet_ipv6( - client_ip, 12345, server_ip, 443, 1_000_300, 0x18, 101, 201, 64, - ); + let realtime_syn = build_realtime_packet_ipv6(RealtimePacketIpv6Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_000, + flags: 0x02, + sequence_number: 100, + sequence_number_ack: 0, + data_length: 0, + }); + let realtime_syn_ack = build_realtime_packet_ipv6(RealtimePacketIpv6Spec { + source_ip: server_ip, + source_port: 443, + destination_ip: client_ip, + destination_port: 12345, + timestamp_us: 1_000_100, + flags: 0x12, + sequence_number: 200, + sequence_number_ack: 101, + data_length: 0, + }); + let realtime_ack = build_realtime_packet_ipv6(RealtimePacketIpv6Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_200, + flags: 0x10, + sequence_number: 101, + sequence_number_ack: 201, + data_length: 0, + }); + let realtime_payload = build_realtime_packet_ipv6(RealtimePacketIpv6Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_300, + flags: 0x18, + sequence_number: 101, + sequence_number_ack: 201, + data_length: 64, + }); for packet in [ &offline_syn, @@ -525,8 +596,17 @@ mod tests { timestamp_us: 1_000_000, ..Default::default() }; - let realtime_packet = - build_realtime_packet_ipv6(client_ip, 12345, server_ip, 443, 1_000_000, 0, 0, 0, 0); + let realtime_packet = build_realtime_packet_ipv6(RealtimePacketIpv6Spec { + source_ip: client_ip, + source_port: 12345, + destination_ip: server_ip, + destination_port: 443, + timestamp_us: 1_000_000, + flags: 0, + sequence_number: 0, + sequence_number_ack: 0, + data_length: 0, + }); offline_table.process_packet(&offline_packet).await; realtime_table.process_packet(&realtime_packet).await; From d52f0eae19d73bec623409e51cc6169b356a007f Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:07:07 +0200 Subject: [PATCH 08/23] Profile export hot path and trim row assembly --- docs/engineering-notes.md | 173 +++++++++++++++++ rustiflow/src/flows/basic_flow.rs | 48 +++-- rustiflow/src/flows/rusti_flow.rs | 306 +++++++++++++++++++----------- rustiflow/src/output.rs | 6 +- 4 files changed, 396 insertions(+), 137 deletions(-) diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 2e5e48f9..c17dbfa1 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -503,3 +503,176 @@ This file keeps short-lived design choices and execution notes that would make - `cargo test -p rustiflow packet_features_test -- --nocapture` - `cargo test -p rustiflow flow_table_test -- --nocapture` - `cargo check -p rustiflow` + +## 2026-03-31 + +- First bounded profiling pass on the current parallel realtime path used the + local `rustiflow-t0` harness with the existing userspace profiling hooks: + - `RUSTIFLOW_REALTIME_STATS=1` for per-source drain/dispatch timing in + `rustiflow/src/realtime.rs` + - `RUSTIFLOW_PROFILE_RESOURCE_SUMMARY=1` for process CPU/RSS/context-switch + summaries + - `RUSTIFLOW_PROFILE_FLAMEGRAPH=...svg` for userspace flamegraph output +- Workload shape for the main bounded comparisons: + - ingress IPv4 only on `rustiflow-t0` + - reverse UDP traffic from the local namespace peer + - `iperf3 -c 10.203.0.2 -B 10.203.0.1 -u -b 1.25G -l 1400 -P 8 -t 10 -R -p 5201` + - container image `rustiflow:test-slim` + - `--threads 12` +- `basic` with no `--early-export` flag: + - receiver bitrate about `9.85 Gbit/s` + - RustiFlow dropped packets `0` + - process summary: about `17.7 s` user CPU, `6.0 s` sys CPU, max RSS about + `2.19 GB` + - per-source stats show the clean `10G` operating point is no longer + dominated by shard-send wait: + - active queues stayed around `0.10` to `0.12 us` average enqueue wait + - active queues stayed around `0.11` to `0.18 us` average shard-send wait + - decode-and-shard stayed around `0.22` to `0.23 us` average event cost + - current interpretation: + - on the proven `10G` case, the parallel ingress path is comfortably past + the earlier multi-millisecond backpressure regime + - the remaining sampled hot userspace work is mostly inside the source task + / batching path rather than export +- `rustiflow` with `--early-export 5` on the same traffic shape: + - receiver bitrate about `9.99 Gbit/s` + - RustiFlow dropped packets `0` + - process summary: about `21.4 s` user CPU, `6.9 s` sys CPU, max RSS about + `2.28 GB` + - exported CSV volume rose sharply to about `636590` lines and about + `625 MB` in a `10 s` run + - per-source stats moved back into a materially slower regime: + - average enqueue wait about `1.67` to `5.20 us` + - average shard-send wait about `1.81` to `5.26 us` + - average total event cost about `1.73` to `5.03 us` + - the userspace flamegraph on this export-heavy run shows the export subtree + is now a real cost center: + - `rustiflow::output::OutputWriter::::write_flow` occupied about `35%` + inclusive sampled width + - `RustiFlow::dump` directly underneath it also occupied about `35%` + inclusive sampled width + - formatting-heavy feature dumps such as payload and window-size stats were + visible hot subtrees under `dump` + - current interpretation: + - export formatting and serialization are now confirmed costs under high + export pressure; they are no longer merely speculative redesign targets + - the dispatcher/backpressure path still matters, but on this shape export + work is large enough that snapshot / CSV redesign questions should now be + evidence-driven rather than deferred as unmeasured risk +- `rustiflow` with no `--early-export` flag on the same traffic shape: + - receiver bitrate about `9.98 Gbit/s` + - RustiFlow dropped packets `0` + - process summary: about `18.1 s` user CPU, `5.9 s` sys CPU, max RSS about + `2.19 GB` + - exported CSV volume collapsed back to only the final-flow output: about + `10` lines and about `15 KB` + - per-source stats returned to the same low-wait regime as the `basic` + no-early-export case: + - average enqueue wait about `0.09` to `0.12 us` + - average shard-send wait about `0.11` to `0.17 us` + - average total event cost about `0.21` to `0.22 us` + - the no-early-export flamegraph no longer shows `OutputWriter::write_flow` + or `RustiFlow::dump` as meaningful hot subtrees + - instead, the visible sampled work is concentrated in: + - realtime source-task batching (`enqueue_pending_batches` / + `enqueue_shard_batch`) + - dispatcher tasks spawned by `spawn_source_dispatcher` + - `FlowTable::process_packet` / `process_existing_flow` + - `RustiFlow::update_flow` + - current interpretation: + - at the proven `10G` operating point, richer feature extraction by itself + is not the main remaining limiter + - the large extra CPU cost seen with `--early-export 5` is primarily export + pressure rather than inherent `RustiFlow` feature-update cost + - this gives a cleaner priority order for follow-up work: + measure export-path redesign options first, and only revisit cheaper + running-statistics implementations if later profiling still shows the + feature modules hot after export pressure is removed +- Export-cost isolation follow-up with `basic --early-export 5` on the same + `10G` shape: + - receiver bitrate about `9.96 Gbit/s` + - RustiFlow dropped packets `0` + - process summary: about `20.6 s` user CPU, `6.7 s` sys CPU, max RSS about + `2.31 GB` + - exported output grew to about `2296493` CSV lines and about `326 MB` + - per-source waits clearly rose relative to the no-early-export `basic` + baseline: + - average enqueue wait about `1.34` to `4.27 us` + - average shard-send wait about `2.27` to `4.37 us` + - the flamegraph confirms that periodic export alone is enough to create a + visible export subtree even for the cheap schema: + - `OutputWriter::write_flow` occupied about `7.3%` inclusive sampled width + - `BasicFlow::dump` occupied about `7.0%` inclusive sampled width + - timestamp / formatting work under `BasicFlow::dump` was visible, but much + smaller than the richer `RustiFlow::dump` export tree +- Current interpretation after the export-cost isolation pass: + - export pressure by itself is a real limiter even for `basic` + - richer exporter formatting in `rustiflow` magnifies that cost sharply: + - `basic --early-export 5`: about `7%` sampled export subtree + - `rustiflow --early-export 5`: about `35%` sampled export subtree + - line count alone is not the right proxy for export cost: + - `basic --early-export 5` emitted more lines than `rustiflow --early-export 5` + - but `rustiflow --early-export 5` still produced the much hotter export + flamegraph because each serialized record is substantially heavier + - the next export-path experiments should focus on reducing per-record + formatting / serialization cost before redesigning ingestion again for the + clean `10G` operating point +- First bounded export-path mitigation on `2026-03-31`: + - changed `OutputWriter` to write the serialized flow string directly with + `write_all()` plus `\n` instead of going back through `writeln!` + - rewrote top-level `BasicFlow` and `RustiFlow` CSV assembly to build one + output buffer directly instead of creating `Vec` plus `join(",")` + - validation: + - `cargo fmt` + - `cargo test -p rustiflow flow_table_test -- --nocapture` + - `cargo check -p rustiflow` + - `cargo clippy -p rustiflow --all-targets` +- Reprofile of the same hot case after that bounded export change: + - workload unchanged: `rustiflow`, `--early-export 5`, `10G`, `1400`-byte + UDP, `-P 8`, `--threads 12`, ingress on `rustiflow-t0` + - receiver bitrate stayed at about `9.99 Gbit/s` + - RustiFlow dropped packets stayed at `0` + - process summary improved from about `21.4 s` user / `6.9 s` sys CPU to + about `19.4 s` user / `6.4 s` sys CPU + - flamegraph comparison: + - before: `OutputWriter::write_flow` and `RustiFlow::dump` each occupied + about `35%` inclusive sampled width + - after: the same subtree fell to about `28%` inclusive sampled width + - payload/window-size dump helpers are still visible hot leaves underneath + `RustiFlow::dump` + - current interpretation: + - the bounded CSV-assembly cleanup produced a real but not decisive win + - export formatting remains a major cost center under periodic export even + after removing obvious top-level string assembly overhead + - the next profitable layer is likely inside the heavier feature dump + helpers themselves, or a more structural change to how CSV rows are + emitted, rather than another ingress redesign +- Follow-up bounded experiment on the shared feature dump helpers: + - tried replacing several feature-level `format!` / nested `dump_values()` + paths with append-style `String` assembly in shared `FeatureStats`, + `PayloadLengthStats`, `WindowSizeStats`, `PacketLengthStats`, + `HeaderLengthStats`, `IATStats`, `ActiveIdleStats`, `BulkStats`, + `TimingStats`, and `TcpFlagStats` + - reprofiled the same `rustiflow --early-export 5`, `10G`, `-P 8`, + `--threads 12` case: + - receiver bitrate stayed about `9.99 Gbit/s` + - RustiFlow dropped packets stayed at `0` + - process summary came back at about `21.6 s` user / `7.2 s` sys CPU + - CSV output was about `712,637` lines / `734 MB` + - current interpretation: + - this second-pass feature-dump refactor did not show a reliable win on the + same hot case + - output volume also varied upward enough that the run is not cleaner than + the earlier `opt1` result + - keep the earlier top-level CSV assembly cleanup, but do not keep this + wider feature-dump rewrite as a trusted optimization + - the next export-path work should target a more structural bottleneck than + replacing additional leaf `format!` calls one by one +- One accidental command detail also matters operationally: + - passing `--early-export 0` does not disable early export; it produces + effectively continuous early export because the CLI passes `Some(0)` + - on the same `10G` shape with `basic`, that setting still produced `0` + RustiFlow drops but exploded output to about `5.85 million` CSV lines and + about `832 MB` in `10 s` + - for throughput experiments, disabling early export still means omitting the + flag entirely rather than passing `0` diff --git a/rustiflow/src/flows/basic_flow.rs b/rustiflow/src/flows/basic_flow.rs index 7c64f8ff..fa235848 100644 --- a/rustiflow/src/flows/basic_flow.rs +++ b/rustiflow/src/flows/basic_flow.rs @@ -1,4 +1,5 @@ use std::net::IpAddr; +use std::{fmt::Display, fmt::Write as _}; use chrono::{DateTime, Utc}; use pnet::packet::ip::IpNextHeaderProtocols; @@ -239,6 +240,13 @@ impl BasicFlow { } } +fn push_csv_display(output: &mut String, value: impl Display) { + if !output.is_empty() { + output.push(','); + } + let _ = write!(output, "{value}"); +} + impl Flow for BasicFlow { fn new( flow_id: String, @@ -306,19 +314,18 @@ impl Flow for BasicFlow { } fn dump(&self) -> String { - format!( - "{},{},{},{},{},{},{},{},{},{}", - self.flow_key, - self.ip_source, - self.port_source, - self.ip_destination, - self.port_destination, - self.protocol, - self.get_first_timestamp(), - self.get_last_timestamp(), - self.get_flow_duration_usec(), - self.flow_expire_cause.as_str() - ) + let mut output = String::with_capacity(192); + push_csv_display(&mut output, &self.flow_key); + push_csv_display(&mut output, self.ip_source); + push_csv_display(&mut output, self.port_source); + push_csv_display(&mut output, self.ip_destination); + push_csv_display(&mut output, self.port_destination); + push_csv_display(&mut output, self.protocol); + push_csv_display(&mut output, self.get_first_timestamp()); + push_csv_display(&mut output, self.get_last_timestamp()); + push_csv_display(&mut output, self.get_flow_duration_usec()); + push_csv_display(&mut output, self.flow_expire_cause.as_str()); + output } fn get_features() -> String { @@ -328,14 +335,13 @@ impl Flow for BasicFlow { } fn dump_without_contamination(&self) -> String { - format!( - "{},{},{},{},{}", - iana_port_mapping(self.port_source), - iana_port_mapping(self.port_destination), - self.protocol, - self.get_flow_duration_usec(), - self.flow_expire_cause.as_str(), - ) + let mut output = String::with_capacity(96); + push_csv_display(&mut output, iana_port_mapping(self.port_source)); + push_csv_display(&mut output, iana_port_mapping(self.port_destination)); + push_csv_display(&mut output, self.protocol); + push_csv_display(&mut output, self.get_flow_duration_usec()); + push_csv_display(&mut output, self.flow_expire_cause.as_str()); + output } fn get_features_without_contamination() -> String { diff --git a/rustiflow/src/flows/rusti_flow.rs b/rustiflow/src/flows/rusti_flow.rs index d1f55902..a464f2d5 100644 --- a/rustiflow/src/flows/rusti_flow.rs +++ b/rustiflow/src/flows/rusti_flow.rs @@ -1,4 +1,5 @@ use std::net::IpAddr; +use std::{fmt::Display, fmt::Write as _}; use crate::{ flows::{ @@ -41,6 +42,20 @@ pub struct RustiFlow { pub timing_stats: TimingStats, } +fn push_csv_display(output: &mut String, value: impl Display) { + if !output.is_empty() { + output.push(','); + } + let _ = write!(output, "{value}"); +} + +fn push_csv_str(output: &mut String, value: &str) { + if !output.is_empty() { + output.push(','); + } + output.push_str(value); +} + impl Flow for RustiFlow { fn new( flow_id: String, @@ -123,98 +138,128 @@ impl Flow for RustiFlow { fn dump(&self) -> String { let duration_us = self.basic_flow.get_flow_duration_usec(); - vec![ - self.basic_flow.flow_key.clone(), - self.basic_flow.ip_source.to_string(), - self.basic_flow.port_source.to_string(), - self.basic_flow.ip_destination.to_string(), - self.basic_flow.port_destination.to_string(), - self.basic_flow.protocol.to_string(), - self.basic_flow.get_ip_version().to_string(), - self.basic_flow.get_source_ip_scope().as_str().to_string(), - self.basic_flow - .get_destination_ip_scope() - .as_str() - .to_string(), - self.basic_flow.get_path_locality().as_str().to_string(), - self.basic_flow.get_first_timestamp().to_string(), - self.basic_flow.get_last_timestamp().to_string(), - duration_us.to_string(), - self.basic_flow.flow_expire_cause.as_str().to_string(), - u8::from(self.basic_flow.tcp_handshake_completed).to_string(), - u8::from(self.basic_flow.tcp_reset_before_handshake).to_string(), - u8::from(self.basic_flow.tcp_reset_after_handshake).to_string(), - self.basic_flow.tcp_close_style.as_str().to_string(), - self.timing_stats.dump(), - self.iat_stats.dump(), - self.packet_len_stats.dump(), - self.header_len_stats.dump(), - self.payload_len_stats.dump(), - self.bulk_stats.dump(), - self.subflow_stats.dump(), - self.active_idle_stats.dump(), - self.icmp_stats.dump(), - self.retransmission_stats.dump(), - self.tcp_quality_stats.dump(), - self.window_size_stats.dump(), - self.tcp_flags_stats.dump(), + let mut output = String::with_capacity(4096); + push_csv_display(&mut output, &self.basic_flow.flow_key); + push_csv_display(&mut output, self.basic_flow.ip_source); + push_csv_display(&mut output, self.basic_flow.port_source); + push_csv_display(&mut output, self.basic_flow.ip_destination); + push_csv_display(&mut output, self.basic_flow.port_destination); + push_csv_display(&mut output, self.basic_flow.protocol); + push_csv_display(&mut output, self.basic_flow.get_ip_version()); + push_csv_display(&mut output, self.basic_flow.get_source_ip_scope().as_str()); + push_csv_display( + &mut output, + self.basic_flow.get_destination_ip_scope().as_str(), + ); + push_csv_display(&mut output, self.basic_flow.get_path_locality().as_str()); + push_csv_display(&mut output, self.basic_flow.get_first_timestamp()); + push_csv_display(&mut output, self.basic_flow.get_last_timestamp()); + push_csv_display(&mut output, duration_us); + push_csv_display(&mut output, self.basic_flow.flow_expire_cause.as_str()); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_handshake_completed), + ); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_reset_before_handshake), + ); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_reset_after_handshake), + ); + push_csv_display(&mut output, self.basic_flow.tcp_close_style.as_str()); + push_csv_str(&mut output, &self.timing_stats.dump()); + push_csv_str(&mut output, &self.iat_stats.dump()); + push_csv_str(&mut output, &self.packet_len_stats.dump()); + push_csv_str(&mut output, &self.header_len_stats.dump()); + push_csv_str(&mut output, &self.payload_len_stats.dump()); + push_csv_str(&mut output, &self.bulk_stats.dump()); + push_csv_str(&mut output, &self.subflow_stats.dump()); + push_csv_str(&mut output, &self.active_idle_stats.dump()); + push_csv_str(&mut output, &self.icmp_stats.dump()); + push_csv_str(&mut output, &self.retransmission_stats.dump()); + push_csv_str(&mut output, &self.tcp_quality_stats.dump()); + push_csv_str(&mut output, &self.window_size_stats.dump()); + push_csv_str(&mut output, &self.tcp_flags_stats.dump()); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.fwd_payload_len.get_count(), self.subflow_stats.subflow_count, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div( self.payload_len_stats.fwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.subflow_stats.subflow_count, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div( self.payload_len_stats.bwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.payload_len_stats.fwd_payload_len.get_count(), - ) - .to_string(), - ] - .join(",") + ), + ); + output } fn get_features() -> String { @@ -267,94 +312,127 @@ impl Flow for RustiFlow { fn dump_without_contamination(&self) -> String { let duration_us = self.basic_flow.get_flow_duration_usec(); - vec![ - iana_port_mapping(self.basic_flow.port_source).to_string(), - iana_port_mapping(self.basic_flow.port_destination).to_string(), - self.basic_flow.protocol.to_string(), - self.basic_flow.get_ip_version().to_string(), - self.basic_flow.get_source_ip_scope().as_str().to_string(), - self.basic_flow - .get_destination_ip_scope() - .as_str() - .to_string(), - self.basic_flow.get_path_locality().as_str().to_string(), - duration_us.to_string(), - self.basic_flow.flow_expire_cause.as_str().to_string(), - u8::from(self.basic_flow.tcp_handshake_completed).to_string(), - u8::from(self.basic_flow.tcp_reset_before_handshake).to_string(), - u8::from(self.basic_flow.tcp_reset_after_handshake).to_string(), - self.basic_flow.tcp_close_style.as_str().to_string(), - self.timing_stats.get_fwd_duration().to_string(), - self.timing_stats.get_bwd_duration().to_string(), - self.iat_stats.dump(), - self.packet_len_stats.dump(), - self.header_len_stats.dump(), - self.payload_len_stats.dump(), - self.bulk_stats.dump(), - self.subflow_stats.dump(), - self.active_idle_stats.dump(), - self.icmp_stats.dump(), - self.retransmission_stats.dump(), - self.tcp_quality_stats.dump(), - self.window_size_stats.dump(), - self.tcp_flags_stats.dump(), + let mut output = String::with_capacity(3072); + push_csv_display(&mut output, iana_port_mapping(self.basic_flow.port_source)); + push_csv_display( + &mut output, + iana_port_mapping(self.basic_flow.port_destination), + ); + push_csv_display(&mut output, self.basic_flow.protocol); + push_csv_display(&mut output, self.basic_flow.get_ip_version()); + push_csv_display(&mut output, self.basic_flow.get_source_ip_scope().as_str()); + push_csv_display( + &mut output, + self.basic_flow.get_destination_ip_scope().as_str(), + ); + push_csv_display(&mut output, self.basic_flow.get_path_locality().as_str()); + push_csv_display(&mut output, duration_us); + push_csv_display(&mut output, self.basic_flow.flow_expire_cause.as_str()); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_handshake_completed), + ); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_reset_before_handshake), + ); + push_csv_display( + &mut output, + u8::from(self.basic_flow.tcp_reset_after_handshake), + ); + push_csv_display(&mut output, self.basic_flow.tcp_close_style.as_str()); + push_csv_display(&mut output, self.timing_stats.get_fwd_duration()); + push_csv_display(&mut output, self.timing_stats.get_bwd_duration()); + push_csv_str(&mut output, &self.iat_stats.dump()); + push_csv_str(&mut output, &self.packet_len_stats.dump()); + push_csv_str(&mut output, &self.header_len_stats.dump()); + push_csv_str(&mut output, &self.payload_len_stats.dump()); + push_csv_str(&mut output, &self.bulk_stats.dump()); + push_csv_str(&mut output, &self.subflow_stats.dump()); + push_csv_str(&mut output, &self.active_idle_stats.dump()); + push_csv_str(&mut output, &self.icmp_stats.dump()); + push_csv_str(&mut output, &self.retransmission_stats.dump()); + push_csv_str(&mut output, &self.tcp_quality_stats.dump()); + push_csv_str(&mut output, &self.window_size_stats.dump()); + push_csv_str(&mut output, &self.tcp_flags_stats.dump()); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_total(), duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_count() as f64, duration_us as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.fwd_payload_len.get_count(), self.subflow_stats.subflow_count, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div( self.payload_len_stats.fwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.subflow_stats.subflow_count, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div( self.payload_len_stats.bwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, - ) - .to_string(), + ), + ); + push_csv_display( + &mut output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.payload_len_stats.fwd_payload_len.get_count(), - ) - .to_string(), - ] - .join(",") + ), + ); + output } fn get_features_without_contamination() -> String { diff --git a/rustiflow/src/output.rs b/rustiflow/src/output.rs index 517c33d7..92fd3ad0 100644 --- a/rustiflow/src/output.rs +++ b/rustiflow/src/output.rs @@ -58,7 +58,8 @@ where flow.dump() }; - writeln!(self.writer, "{}", flow_str) + self.writer.write_all(flow_str.as_bytes())?; + self.writer.write_all(b"\n") } /// Flushes the writer and closes the output file @@ -75,6 +76,7 @@ where } else { T::get_features() }; - writeln!(self.writer, "{}", header) + self.writer.write_all(header.as_bytes())?; + self.writer.write_all(b"\n") } } From 902a84130c916a3d0af0b08278bcce63ecaf80cf Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:07:15 +0200 Subject: [PATCH 09/23] Refocus checklist on structural export work --- AGENTS.md | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9f98d000..af82fc19 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,36 +121,36 @@ in `docs/engineering-notes.md`. ### Current Focus -- [ ] Add profiling on the current parallel realtime path before bigger - redesigns: - capture CPU usage and flamegraph-style evidence for drain/dispatch, - FlowTable work, and export cost on the `rustiflow-t0` harness. -- [ ] Quantify resource usage for the proven local throughput tiers: - for the current `10G` operating point, and later `25/40G` attempts, record - CPU use, memory use, drop behavior, and export rate instead of bitrate alone. -- [ ] Re-measure the remaining dispatcher bottleneck before transport rewrite: - after the current queue-count and fanout wins, determine whether the next - limiter is shard-channel backpressure, FlowTable processing, or export cost. -- [ ] Revisit cheaper running statistics only after profiling confirms they are - still hot on the newer ingestion path. -- [ ] Measure export-path cost explicitly before redesigning flow snapshots or - CSV serialization: - confirm whether cloning/export formatting is now a real limiter under high - export pressure. +- [ ] Measure how much of the remaining hot export path is snapshot ownership + cost versus row serialization cost: + isolate clone/copy work from string/CSV formatting work under the proven + `10G` `--early-export 5` case. +- [ ] Prototype a structural export path that writes CSV fields directly to the + buffered writer instead of requiring one fully assembled row `String` per + exported flow, then reprofile the same workload. +- [ ] Evaluate whether a typed export snapshot or borrow-based export view can + reduce per-export cloning/allocation without violating flow ownership, + sharding, or semantic parity. +- [ ] Identify the heaviest remaining field families inside `RustiFlow::dump` + after the accepted top-level CSV cleanup, and only optimize subsystems that + still show up materially in flamegraphs. +- [ ] Re-run the export-heavy comparison after each bounded structural change + using at least: + `basic --early-export 5`, `rustiflow --early-export 5`, and one no-early- + export control, recording CPU, RSS, drop total, output size, and bitrate. - [ ] Keep updating `docs/engineering-notes.md` after each bounded experiment with: - workload, achieved bitrate, dropped-packet total, and what the new - bottleneck appears to be. + workload, achieved bitrate, dropped-packet total, resource summary, and what + the new bottleneck appears to be. Primary files: -- `rustiflow/src/realtime.rs` -- `ebpf-ipv4/src/main.rs` -- `ebpf-ipv6/src/main.rs` -- `common/src/lib.rs` -- `rustiflow/src/packet_features.rs` -- `rustiflow/src/flow_table.rs` +- `rustiflow/src/output.rs` - `rustiflow/src/flows/basic_flow.rs` +- `rustiflow/src/flows/rusti_flow.rs` +- `rustiflow/src/flows/features/` +- `rustiflow/src/flow_table.rs` +- `rustiflow/src/realtime.rs` - `docs/engineering-notes.md` ### Later Work From a01927a544eca808733302669a00a35041411c58 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:12:45 +0200 Subject: [PATCH 10/23] Measure export clone and serialization costs --- docs/engineering-notes.md | 29 ++++++++ rustiflow/src/export_profile.rs | 113 ++++++++++++++++++++++++++++++++ rustiflow/src/flow_table.rs | 15 +++-- rustiflow/src/main.rs | 5 ++ rustiflow/src/output.rs | 10 ++- 5 files changed, 166 insertions(+), 6 deletions(-) create mode 100644 rustiflow/src/export_profile.rs diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index c17dbfa1..034b2432 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -668,6 +668,35 @@ This file keeps short-lived design choices and execution notes that would make wider feature-dump rewrite as a trusted optimization - the next export-path work should target a more structural bottleneck than replacing additional leaf `format!` calls one by one +- Export-breakdown measurement for the first structural checklist item: + - added an opt-in userspace export breakdown behind + `RUSTIFLOW_PROFILE_EXPORT_BREAKDOWN=1` + - current instrumentation records: + - flow snapshot clone time inside `FlowTable::apply_packet_to_flow` + - row serialization time inside `OutputWriter::write_flow` around + `dump()` / `dump_without_contamination()` + - buffered write time for the serialized row bytes + - reran the same rebuilt `rustiflow:test-slim` hot case: + - `rustiflow`, `--early-export 5`, `10G`, `1400`-byte UDP, `-P 8`, + `--threads 12`, ingress on `rustiflow-t0` + - receiver bitrate about `9.98 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `20.0 s` user CPU / `6.3 s` sys CPU, max RSS about + `2.28 GB` + - exported output about `474,875` rows / `489 MB` + - measured export split on that run: + - `clone_count=474,866` + - flow snapshot clone time about `92 ms` total + - row serialization (`dump`) time about `5,030 ms` total + - buffered write time about `561 ms` total + - current interpretation: + - at the proven `10G` early-export operating point, snapshot ownership cost + is not the main export bottleneck + - per-row serialization dominates clone cost by roughly two orders of + magnitude on the measured hot case + - buffered row writes are visible but still much smaller than `dump()` + - the next structural export experiment should focus on avoiding or + reshaping full-row string serialization, not on clone elimination first - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` diff --git a/rustiflow/src/export_profile.rs b/rustiflow/src/export_profile.rs new file mode 100644 index 00000000..748e4af1 --- /dev/null +++ b/rustiflow/src/export_profile.rs @@ -0,0 +1,113 @@ +use std::{ + env, + sync::{ + atomic::{AtomicU64, Ordering}, + OnceLock, + }, + time::Duration, +}; + +use log::info; + +struct ExportProfile { + clone_count: AtomicU64, + clone_time_ns: AtomicU64, + serialized_flow_count: AtomicU64, + serialized_bytes: AtomicU64, + dump_time_ns: AtomicU64, + write_time_ns: AtomicU64, +} + +impl ExportProfile { + const fn new() -> Self { + Self { + clone_count: AtomicU64::new(0), + clone_time_ns: AtomicU64::new(0), + serialized_flow_count: AtomicU64::new(0), + serialized_bytes: AtomicU64::new(0), + dump_time_ns: AtomicU64::new(0), + write_time_ns: AtomicU64::new(0), + } + } + + fn record_clone(&self, duration: Duration) { + self.clone_count.fetch_add(1, Ordering::Relaxed); + self.clone_time_ns + .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed); + } + + fn record_dump(&self, duration: Duration, bytes: usize) { + self.serialized_flow_count.fetch_add(1, Ordering::Relaxed); + self.serialized_bytes + .fetch_add(bytes as u64, Ordering::Relaxed); + self.dump_time_ns + .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed); + } + + fn record_write(&self, duration: Duration) { + self.write_time_ns + .fetch_add(duration.as_nanos() as u64, Ordering::Relaxed); + } + + fn log_summary(&self, mode: &str) { + let clone_count = self.clone_count.load(Ordering::Relaxed); + let serialized_flow_count = self.serialized_flow_count.load(Ordering::Relaxed); + let serialized_bytes = self.serialized_bytes.load(Ordering::Relaxed); + let clone_time_ns = self.clone_time_ns.load(Ordering::Relaxed); + let dump_time_ns = self.dump_time_ns.load(Ordering::Relaxed); + let write_time_ns = self.write_time_ns.load(Ordering::Relaxed); + + info!( + "Export breakdown for {}: clone_count={} clone_ms={:.3} serialized_flows={} serialized_bytes={} dump_ms={:.3} write_ms={:.3}", + mode, + clone_count, + clone_time_ns as f64 / 1_000_000.0, + serialized_flow_count, + serialized_bytes, + dump_time_ns as f64 / 1_000_000.0, + write_time_ns as f64 / 1_000_000.0, + ); + } +} + +fn export_profile() -> Option<&'static ExportProfile> { + static ENABLED: OnceLock = OnceLock::new(); + static PROFILE: ExportProfile = ExportProfile::new(); + + if *ENABLED.get_or_init(|| env_flag("RUSTIFLOW_PROFILE_EXPORT_BREAKDOWN")) { + Some(&PROFILE) + } else { + None + } +} + +pub fn record_clone(duration: Duration) { + if let Some(profile) = export_profile() { + profile.record_clone(duration); + } +} + +pub fn record_dump(duration: Duration, bytes: usize) { + if let Some(profile) = export_profile() { + profile.record_dump(duration, bytes); + } +} + +pub fn record_write(duration: Duration) { + if let Some(profile) = export_profile() { + profile.record_write(duration); + } +} + +pub fn log_summary(mode: &str) { + if let Some(profile) = export_profile() { + profile.log_summary(mode); + } +} + +fn env_flag(name: &str) -> bool { + matches!( + env::var(name).ok().as_deref(), + Some("1" | "true" | "TRUE" | "yes" | "YES" | "on" | "ON") + ) +} diff --git a/rustiflow/src/flow_table.rs b/rustiflow/src/flow_table.rs index 6b72bd92..04afc930 100644 --- a/rustiflow/src/flow_table.rs +++ b/rustiflow/src/flow_table.rs @@ -1,7 +1,8 @@ -use std::collections::HashMap; +use std::{collections::HashMap, time::Instant}; use crate::{ - flow_key::FlowKey, flows::util::FlowExpireCause, packet_features::PacketFeatures, Flow, + export_profile, flow_key::FlowKey, flows::util::FlowExpireCause, + packet_features::PacketFeatures, Flow, }; use log::{debug, error}; use tokio::sync::mpsc; @@ -158,12 +159,18 @@ where early_export: Option, ) -> FlowUpdate { if flow.update_flow(packet, is_forward) { - FlowUpdate::Terminated(flow.clone()) + let clone_start = Instant::now(); + let snapshot = flow.clone(); + export_profile::record_clone(clone_start.elapsed()); + FlowUpdate::Terminated(snapshot) } else if early_export.is_some_and(|early_export| { ((packet.timestamp_us - flow.get_first_timestamp_us()) / 1_000_000) as u64 > early_export }) { - FlowUpdate::EarlyExport(flow.clone()) + let clone_start = Instant::now(); + let snapshot = flow.clone(); + export_profile::record_clone(clone_start.elapsed()); + FlowUpdate::EarlyExport(snapshot) } else { FlowUpdate::Active } diff --git a/rustiflow/src/main.rs b/rustiflow/src/main.rs index b23449a5..4b383d17 100644 --- a/rustiflow/src/main.rs +++ b/rustiflow/src/main.rs @@ -1,4 +1,5 @@ mod args; +mod export_profile; mod flow_key; mod flow_table; #[cfg(target_os = "linux")] @@ -176,6 +177,8 @@ async fn run_with_config(config: Config) { } } + export_profile::log_summary("realtime"); + let end = Instant::now(); info!( "Duration: {:.4} seconds", @@ -265,6 +268,8 @@ async fn run_with_config(config: Config) { error!("Error waiting for output task: {:?}", e); }); + export_profile::log_summary("offline"); + if let Some(profiling_session) = profiling_session { if let Err(err) = profiling_session.finish() { error!("Error finishing profiler: {:?}", err); diff --git a/rustiflow/src/output.rs b/rustiflow/src/output.rs index 92fd3ad0..1dab5e2e 100644 --- a/rustiflow/src/output.rs +++ b/rustiflow/src/output.rs @@ -1,8 +1,9 @@ -use crate::{args::ExportMethodType, flows::flow::Flow}; +use crate::{args::ExportMethodType, export_profile, flows::flow::Flow}; use log::{debug, error}; use std::{ fs::File, io::{BufWriter, Write}, + time::Instant, }; pub struct OutputWriter { @@ -52,14 +53,19 @@ where } pub fn write_flow(&mut self, flow: T) -> std::io::Result<()> { + let dump_start = Instant::now(); let flow_str = if self.skip_contaminant_features { flow.dump_without_contamination() } else { flow.dump() }; + export_profile::record_dump(dump_start.elapsed(), flow_str.len()); + let write_start = Instant::now(); self.writer.write_all(flow_str.as_bytes())?; - self.writer.write_all(b"\n") + self.writer.write_all(b"\n")?; + export_profile::record_write(write_start.elapsed()); + Ok(()) } /// Flushes the writer and closes the output file From 2b4494f6c6794baddd9c6547d8c3eeeafadc6d09 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:23:41 +0200 Subject: [PATCH 11/23] Record direct-write export experiment --- docs/engineering-notes.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 034b2432..5ecd7fb1 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -697,6 +697,30 @@ This file keeps short-lived design choices and execution notes that would make - buffered row writes are visible but still much smaller than `dump()` - the next structural export experiment should focus on avoiding or reshaping full-row string serialization, not on clone elimination first +- First bounded direct-to-writer CSV prototype: + - added `write_csv_row` / `write_csv_row_without_contamination` on the hot + `BasicFlow` and `RustiFlow` exporters so `OutputWriter` could emit rows + directly to the buffered writer instead of first building one full row + `String` + - reran the same rebuilt `rustiflow:test-slim` hot case with export + breakdown enabled: + - receiver bitrate about `9.99 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `19.6 s` user CPU / `7.0 s` sys CPU, max RSS about + `2.27 GB` + - exported output about `526,580` rows / `542 MB` + - measured export breakdown: + - `clone_count=526,571` + - clone time about `97 ms` + - export-path timed section about `5,954 ms` + - trailing newline write about `12 ms` + - current interpretation: + - this first direct-to-writer version did not produce a clean trusted win + over the prior row-string path on the same hot case + - output volume varied upward, and total export-path time did not fall in a + way strong enough to justify keeping the added complexity + - revert this prototype and keep looking for a more structural reduction in + per-row serialization cost - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` From 5301ef4a7ac3221606b4c88ce156b89e226fb7c4 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:01:57 +0200 Subject: [PATCH 12/23] Reuse export row buffers --- AGENTS.md | 4 +- docs/engineering-notes.md | 32 +++++ rustiflow/src/flows/basic_flow.rs | 38 +++--- rustiflow/src/flows/flow.rs | 22 +++- rustiflow/src/flows/rusti_flow.rs | 195 +++++++++++++----------------- rustiflow/src/output.rs | 15 ++- 6 files changed, 162 insertions(+), 144 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index af82fc19..9439b40f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,11 +121,11 @@ in `docs/engineering-notes.md`. ### Current Focus -- [ ] Measure how much of the remaining hot export path is snapshot ownership +- [x] Measure how much of the remaining hot export path is snapshot ownership cost versus row serialization cost: isolate clone/copy work from string/CSV formatting work under the proven `10G` `--early-export 5` case. -- [ ] Prototype a structural export path that writes CSV fields directly to the +- [x] Prototype a structural export path that writes CSV fields directly to the buffered writer instead of requiring one fully assembled row `String` per exported flow, then reprofile the same workload. - [ ] Evaluate whether a typed export snapshot or borrow-based export view can diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 5ecd7fb1..e36f42ed 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -721,6 +721,38 @@ This file keeps short-lived design choices and execution notes that would make way strong enough to justify keeping the added complexity - revert this prototype and keep looking for a more structural reduction in per-row serialization cost +- Accepted follow-up export-path change: reuse one top-level row buffer per + `OutputWriter` instead of allocating a fresh row `String` on every export: + - kept the existing row-assembly semantics and feature `dump()` calls, but + added append-style flow methods so `OutputWriter` can clear and reuse an + owned `String` buffer for each row + - reran the same rebuilt `rustiflow:test-slim` hot case with export + breakdown enabled: + - `rustiflow`, `--early-export 5`, `10G`, `1400`-byte UDP, `-P 8`, + `--threads 12`, ingress on `rustiflow-t0` + - receiver bitrate about `9.99 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `19.1 s` user CPU / `6.3 s` sys CPU, max RSS about + `2.27 GB` + - exported output about `524,089` rows / `540 MB` + - measured export breakdown: + - `clone_count=524,080` + - clone time about `89 ms` + - row serialization (`dump`) time about `4,807 ms` + - buffered write time about `545 ms` + - normalized comparison against the earlier `breakdown2` row-string baseline: + - dump cost fell from about `10.59 us` per row to about `9.17 us` per row + - write cost fell from about `1.18 us` per row to about `1.04 us` per row + - total CPU time fell from about `55.3 us` per row to about `48.4 us` per + row + - current interpretation: + - reusing the top-level export row buffer is a clean trusted win even + though the sampled run exported more rows than the earlier baseline + - the gain is meaningfully smaller than a full serialization redesign, but + it removes one recurring allocation layer without changing export + semantics + - the remaining structural export bottleneck is still inside per-feature + serialization work rather than snapshot cloning or top-level row writes - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` diff --git a/rustiflow/src/flows/basic_flow.rs b/rustiflow/src/flows/basic_flow.rs index fa235848..ea1c13a7 100644 --- a/rustiflow/src/flows/basic_flow.rs +++ b/rustiflow/src/flows/basic_flow.rs @@ -313,19 +313,17 @@ impl Flow for BasicFlow { self.update_tcp_close_style(cause); } - fn dump(&self) -> String { - let mut output = String::with_capacity(192); - push_csv_display(&mut output, &self.flow_key); - push_csv_display(&mut output, self.ip_source); - push_csv_display(&mut output, self.port_source); - push_csv_display(&mut output, self.ip_destination); - push_csv_display(&mut output, self.port_destination); - push_csv_display(&mut output, self.protocol); - push_csv_display(&mut output, self.get_first_timestamp()); - push_csv_display(&mut output, self.get_last_timestamp()); - push_csv_display(&mut output, self.get_flow_duration_usec()); - push_csv_display(&mut output, self.flow_expire_cause.as_str()); - output + fn append_to_csv_row(&self, output: &mut String) { + push_csv_display(output, &self.flow_key); + push_csv_display(output, self.ip_source); + push_csv_display(output, self.port_source); + push_csv_display(output, self.ip_destination); + push_csv_display(output, self.port_destination); + push_csv_display(output, self.protocol); + push_csv_display(output, self.get_first_timestamp()); + push_csv_display(output, self.get_last_timestamp()); + push_csv_display(output, self.get_flow_duration_usec()); + push_csv_display(output, self.flow_expire_cause.as_str()); } fn get_features() -> String { @@ -334,14 +332,12 @@ impl Flow for BasicFlow { .to_string() } - fn dump_without_contamination(&self) -> String { - let mut output = String::with_capacity(96); - push_csv_display(&mut output, iana_port_mapping(self.port_source)); - push_csv_display(&mut output, iana_port_mapping(self.port_destination)); - push_csv_display(&mut output, self.protocol); - push_csv_display(&mut output, self.get_flow_duration_usec()); - push_csv_display(&mut output, self.flow_expire_cause.as_str()); - output + fn append_to_csv_row_without_contamination(&self, output: &mut String) { + push_csv_display(output, iana_port_mapping(self.port_source)); + push_csv_display(output, iana_port_mapping(self.port_destination)); + push_csv_display(output, self.protocol); + push_csv_display(output, self.get_flow_duration_usec()); + push_csv_display(output, self.flow_expire_cause.as_str()); } fn get_features_without_contamination() -> String { diff --git a/rustiflow/src/flows/flow.rs b/rustiflow/src/flows/flow.rs index b6cfd7c5..e3911cc4 100644 --- a/rustiflow/src/flows/flow.rs +++ b/rustiflow/src/flows/flow.rs @@ -71,7 +71,16 @@ pub trait Flow: Send + Sync + 'static + Clone { /// ### Returns /// /// Returns a `String` that represents the current state of the flow. - fn dump(&self) -> String; + fn dump(&self) -> String { + let mut output = String::new(); + self.append_to_csv_row(&mut output); + output + } + + /// Appends the current state of the flow to an existing CSV row buffer. + fn append_to_csv_row(&self, output: &mut String) { + output.push_str(&self.dump()); + } /// Dumps the current state of the flow without contaminant features. /// @@ -80,7 +89,16 @@ pub trait Flow: Send + Sync + 'static + Clone { /// ### Returns /// /// Returns a `String` that represents the current state of the flow without contaminant features. - fn dump_without_contamination(&self) -> String; + fn dump_without_contamination(&self) -> String { + let mut output = String::new(); + self.append_to_csv_row_without_contamination(&mut output); + output + } + + /// Appends the contamination-free state of the flow to an existing CSV row buffer. + fn append_to_csv_row_without_contamination(&self, output: &mut String) { + output.push_str(&self.dump_without_contamination()); + } /// Returns the first timestamp of the flow. /// diff --git a/rustiflow/src/flows/rusti_flow.rs b/rustiflow/src/flows/rusti_flow.rs index a464f2d5..52677f20 100644 --- a/rustiflow/src/flows/rusti_flow.rs +++ b/rustiflow/src/flows/rusti_flow.rs @@ -136,130 +136,116 @@ impl Flow for RustiFlow { self.timing_stats.close(timestamp_us, cause); } - fn dump(&self) -> String { + fn append_to_csv_row(&self, output: &mut String) { let duration_us = self.basic_flow.get_flow_duration_usec(); - let mut output = String::with_capacity(4096); - push_csv_display(&mut output, &self.basic_flow.flow_key); - push_csv_display(&mut output, self.basic_flow.ip_source); - push_csv_display(&mut output, self.basic_flow.port_source); - push_csv_display(&mut output, self.basic_flow.ip_destination); - push_csv_display(&mut output, self.basic_flow.port_destination); - push_csv_display(&mut output, self.basic_flow.protocol); - push_csv_display(&mut output, self.basic_flow.get_ip_version()); - push_csv_display(&mut output, self.basic_flow.get_source_ip_scope().as_str()); + push_csv_display(output, &self.basic_flow.flow_key); + push_csv_display(output, self.basic_flow.ip_source); + push_csv_display(output, self.basic_flow.port_source); + push_csv_display(output, self.basic_flow.ip_destination); + push_csv_display(output, self.basic_flow.port_destination); + push_csv_display(output, self.basic_flow.protocol); + push_csv_display(output, self.basic_flow.get_ip_version()); + push_csv_display(output, self.basic_flow.get_source_ip_scope().as_str()); + push_csv_display(output, self.basic_flow.get_destination_ip_scope().as_str()); + push_csv_display(output, self.basic_flow.get_path_locality().as_str()); + push_csv_display(output, self.basic_flow.get_first_timestamp()); + push_csv_display(output, self.basic_flow.get_last_timestamp()); + push_csv_display(output, duration_us); + push_csv_display(output, self.basic_flow.flow_expire_cause.as_str()); + push_csv_display(output, u8::from(self.basic_flow.tcp_handshake_completed)); + push_csv_display(output, u8::from(self.basic_flow.tcp_reset_before_handshake)); + push_csv_display(output, u8::from(self.basic_flow.tcp_reset_after_handshake)); + push_csv_display(output, self.basic_flow.tcp_close_style.as_str()); + push_csv_str(output, &self.timing_stats.dump()); + push_csv_str(output, &self.iat_stats.dump()); + push_csv_str(output, &self.packet_len_stats.dump()); + push_csv_str(output, &self.header_len_stats.dump()); + push_csv_str(output, &self.payload_len_stats.dump()); + push_csv_str(output, &self.bulk_stats.dump()); + push_csv_str(output, &self.subflow_stats.dump()); + push_csv_str(output, &self.active_idle_stats.dump()); + push_csv_str(output, &self.icmp_stats.dump()); + push_csv_str(output, &self.retransmission_stats.dump()); + push_csv_str(output, &self.tcp_quality_stats.dump()); + push_csv_str(output, &self.window_size_stats.dump()); + push_csv_str(output, &self.tcp_flags_stats.dump()); push_csv_display( - &mut output, - self.basic_flow.get_destination_ip_scope().as_str(), - ); - push_csv_display(&mut output, self.basic_flow.get_path_locality().as_str()); - push_csv_display(&mut output, self.basic_flow.get_first_timestamp()); - push_csv_display(&mut output, self.basic_flow.get_last_timestamp()); - push_csv_display(&mut output, duration_us); - push_csv_display(&mut output, self.basic_flow.flow_expire_cause.as_str()); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_handshake_completed), - ); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_reset_before_handshake), - ); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_reset_after_handshake), - ); - push_csv_display(&mut output, self.basic_flow.tcp_close_style.as_str()); - push_csv_str(&mut output, &self.timing_stats.dump()); - push_csv_str(&mut output, &self.iat_stats.dump()); - push_csv_str(&mut output, &self.packet_len_stats.dump()); - push_csv_str(&mut output, &self.header_len_stats.dump()); - push_csv_str(&mut output, &self.payload_len_stats.dump()); - push_csv_str(&mut output, &self.bulk_stats.dump()); - push_csv_str(&mut output, &self.subflow_stats.dump()); - push_csv_str(&mut output, &self.active_idle_stats.dump()); - push_csv_str(&mut output, &self.icmp_stats.dump()); - push_csv_str(&mut output, &self.retransmission_stats.dump()); - push_csv_str(&mut output, &self.tcp_quality_stats.dump()); - push_csv_str(&mut output, &self.window_size_stats.dump()); - push_csv_str(&mut output, &self.tcp_flags_stats.dump()); - push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.fwd_payload_len.get_count(), self.subflow_stats.subflow_count, ), ); push_csv_display( - &mut output, + output, safe_div( self.payload_len_stats.fwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.subflow_stats.subflow_count, ), ); push_csv_display( - &mut output, + output, safe_div( self.payload_len_stats.bwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.payload_len_stats.fwd_payload_len.get_count(), ), ); - output } fn get_features() -> String { @@ -310,129 +296,112 @@ impl Flow for RustiFlow { .join(",") } - fn dump_without_contamination(&self) -> String { + fn append_to_csv_row_without_contamination(&self, output: &mut String) { let duration_us = self.basic_flow.get_flow_duration_usec(); - let mut output = String::with_capacity(3072); - push_csv_display(&mut output, iana_port_mapping(self.basic_flow.port_source)); - push_csv_display( - &mut output, - iana_port_mapping(self.basic_flow.port_destination), - ); - push_csv_display(&mut output, self.basic_flow.protocol); - push_csv_display(&mut output, self.basic_flow.get_ip_version()); - push_csv_display(&mut output, self.basic_flow.get_source_ip_scope().as_str()); - push_csv_display( - &mut output, - self.basic_flow.get_destination_ip_scope().as_str(), - ); - push_csv_display(&mut output, self.basic_flow.get_path_locality().as_str()); - push_csv_display(&mut output, duration_us); - push_csv_display(&mut output, self.basic_flow.flow_expire_cause.as_str()); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_handshake_completed), - ); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_reset_before_handshake), - ); - push_csv_display( - &mut output, - u8::from(self.basic_flow.tcp_reset_after_handshake), - ); - push_csv_display(&mut output, self.basic_flow.tcp_close_style.as_str()); - push_csv_display(&mut output, self.timing_stats.get_fwd_duration()); - push_csv_display(&mut output, self.timing_stats.get_bwd_duration()); - push_csv_str(&mut output, &self.iat_stats.dump()); - push_csv_str(&mut output, &self.packet_len_stats.dump()); - push_csv_str(&mut output, &self.header_len_stats.dump()); - push_csv_str(&mut output, &self.payload_len_stats.dump()); - push_csv_str(&mut output, &self.bulk_stats.dump()); - push_csv_str(&mut output, &self.subflow_stats.dump()); - push_csv_str(&mut output, &self.active_idle_stats.dump()); - push_csv_str(&mut output, &self.icmp_stats.dump()); - push_csv_str(&mut output, &self.retransmission_stats.dump()); - push_csv_str(&mut output, &self.tcp_quality_stats.dump()); - push_csv_str(&mut output, &self.window_size_stats.dump()); - push_csv_str(&mut output, &self.tcp_flags_stats.dump()); + push_csv_display(output, iana_port_mapping(self.basic_flow.port_source)); + push_csv_display(output, iana_port_mapping(self.basic_flow.port_destination)); + push_csv_display(output, self.basic_flow.protocol); + push_csv_display(output, self.basic_flow.get_ip_version()); + push_csv_display(output, self.basic_flow.get_source_ip_scope().as_str()); + push_csv_display(output, self.basic_flow.get_destination_ip_scope().as_str()); + push_csv_display(output, self.basic_flow.get_path_locality().as_str()); + push_csv_display(output, duration_us); + push_csv_display(output, self.basic_flow.flow_expire_cause.as_str()); + push_csv_display(output, u8::from(self.basic_flow.tcp_handshake_completed)); + push_csv_display(output, u8::from(self.basic_flow.tcp_reset_before_handshake)); + push_csv_display(output, u8::from(self.basic_flow.tcp_reset_after_handshake)); + push_csv_display(output, self.basic_flow.tcp_close_style.as_str()); + push_csv_display(output, self.timing_stats.get_fwd_duration()); + push_csv_display(output, self.timing_stats.get_bwd_duration()); + push_csv_str(output, &self.iat_stats.dump()); + push_csv_str(output, &self.packet_len_stats.dump()); + push_csv_str(output, &self.header_len_stats.dump()); + push_csv_str(output, &self.payload_len_stats.dump()); + push_csv_str(output, &self.bulk_stats.dump()); + push_csv_str(output, &self.subflow_stats.dump()); + push_csv_str(output, &self.active_idle_stats.dump()); + push_csv_str(output, &self.icmp_stats.dump()); + push_csv_str(output, &self.retransmission_stats.dump()); + push_csv_str(output, &self.tcp_quality_stats.dump()); + push_csv_str(output, &self.window_size_stats.dump()); + push_csv_str(output, &self.tcp_flags_stats.dump()); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.fwd_payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_total(), duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_per_second_rate( self.payload_len_stats.bwd_payload_len.get_count() as f64, duration_us as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.fwd_payload_len.get_count(), self.subflow_stats.subflow_count, ), ); push_csv_display( - &mut output, + output, safe_div( self.payload_len_stats.fwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.subflow_stats.subflow_count, ), ); push_csv_display( - &mut output, + output, safe_div( self.payload_len_stats.bwd_payload_len.get_total(), self.subflow_stats.subflow_count as f64, ), ); push_csv_display( - &mut output, + output, safe_div_int( self.payload_len_stats.bwd_payload_len.get_count(), self.payload_len_stats.fwd_payload_len.get_count(), ), ); - output } fn get_features_without_contamination() -> String { diff --git a/rustiflow/src/output.rs b/rustiflow/src/output.rs index 1dab5e2e..0f9f208e 100644 --- a/rustiflow/src/output.rs +++ b/rustiflow/src/output.rs @@ -10,6 +10,7 @@ pub struct OutputWriter { write_header: bool, skip_contaminant_features: bool, writer: BufWriter>, + row_buffer: String, _phantom_data: std::marker::PhantomData, } @@ -38,6 +39,7 @@ where write_header, skip_contaminant_features, writer, + row_buffer: String::with_capacity(4096), _phantom_data: std::marker::PhantomData, } } @@ -54,15 +56,16 @@ where pub fn write_flow(&mut self, flow: T) -> std::io::Result<()> { let dump_start = Instant::now(); - let flow_str = if self.skip_contaminant_features { - flow.dump_without_contamination() + self.row_buffer.clear(); + if self.skip_contaminant_features { + flow.append_to_csv_row_without_contamination(&mut self.row_buffer); } else { - flow.dump() - }; - export_profile::record_dump(dump_start.elapsed(), flow_str.len()); + flow.append_to_csv_row(&mut self.row_buffer); + } + export_profile::record_dump(dump_start.elapsed(), self.row_buffer.len()); let write_start = Instant::now(); - self.writer.write_all(flow_str.as_bytes())?; + self.writer.write_all(self.row_buffer.as_bytes())?; self.writer.write_all(b"\n")?; export_profile::record_write(write_start.elapsed()); Ok(()) From ecfd7bf29f1d4159c528596f570a36bf17bb3946 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:38:27 +0200 Subject: [PATCH 13/23] Record export snapshot evaluation --- AGENTS.md | 2 +- docs/engineering-notes.md | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 9439b40f..5995e2e3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -128,7 +128,7 @@ in `docs/engineering-notes.md`. - [x] Prototype a structural export path that writes CSV fields directly to the buffered writer instead of requiring one fully assembled row `String` per exported flow, then reprofile the same workload. -- [ ] Evaluate whether a typed export snapshot or borrow-based export view can +- [x] Evaluate whether a typed export snapshot or borrow-based export view can reduce per-export cloning/allocation without violating flow ownership, sharding, or semantic parity. - [ ] Identify the heaviest remaining field families inside `RustiFlow::dump` diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index e36f42ed..48bbd7ba 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -753,6 +753,46 @@ This file keeps short-lived design choices and execution notes that would make semantics - the remaining structural export bottleneck is still inside per-feature serialization work rather than snapshot cloning or top-level row writes +- Evaluation of a typed snapshot-vs-owned export message at the channel + boundary: + - tried a bounded `ExportedFlow::{Snapshot, Owned}` split so early export + could keep sending cloned snapshots while terminated / expired / shutdown + flows moved out of the shard without cloning + - this was the most plausible safe substitute for a borrow-based export view, + because the current writer task consumes exports asynchronously over + `mpsc`; that boundary needs owned data and does not permit borrowing flow + state out of the shard task + - validation was clean: + - `cargo fmt` + - `cargo check -p rustiflow` + - `cargo test -p rustiflow flow_table_test -- --nocapture` + - `cargo test -p rustiflow pcap_fixture_test -- --nocapture` + - `cargo clippy -p rustiflow --all-targets` + - reran the same rebuilt `rustiflow:test-slim` hot case with export + breakdown enabled: + - `rustiflow`, `--early-export 5`, `10G`, `1400`-byte UDP, `-P 8`, + `--threads 12`, ingress on `rustiflow-t0` + - receiver bitrate about `9.98 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `20.0 s` user CPU / `6.7 s` sys CPU, max RSS about + `2.29 GB` + - exported output about `603,424` rows / `621 MB` + - measured export breakdown: + - `clone_count=603,415` + - clone time about `122 ms` + - row serialization (`dump`) time about `5,348 ms` + - buffered write time about `628 ms` + - current interpretation: + - on the proven hot case, the typed ownership split does not materially + change what the exporter is paying for + - only `9` of `603,424` exported rows were owned final exports; the rest + were still early-export snapshots, so clone elimination on termination + paths is not where the current pressure lives + - a borrow-based export view is not compatible with the present + cross-task writer architecture without a larger ownership and scheduling + redesign + - revert the typed ownership split and keep the conclusion in notes rather + than carrying extra plumbing that does not improve the measured hot path - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` From 67e602ad2f9b273de95a883d7938d177ddee76ac Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:48:33 +0200 Subject: [PATCH 14/23] Trim hot feature dump allocations --- AGENTS.md | 2 +- docs/engineering-notes.md | 57 +++++++++++++++++++ rustiflow/src/flows/features/bulk_stats.rs | 15 ++++- rustiflow/src/flows/features/iat_stats.rs | 6 ++ rustiflow/src/flows/features/packet_stats.rs | 15 ++++- rustiflow/src/flows/features/payload_stats.rs | 11 +++- .../src/flows/features/tcp_flag_stats.rs | 30 +++++++++- rustiflow/src/flows/features/util.rs | 22 +++++++ .../src/flows/features/window_size_stats.rs | 10 +++- rustiflow/src/flows/rusti_flow.rs | 24 ++++---- 10 files changed, 174 insertions(+), 18 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5995e2e3..9fbf1c28 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -131,7 +131,7 @@ in `docs/engineering-notes.md`. - [x] Evaluate whether a typed export snapshot or borrow-based export view can reduce per-export cloning/allocation without violating flow ownership, sharding, or semantic parity. -- [ ] Identify the heaviest remaining field families inside `RustiFlow::dump` +- [x] Identify the heaviest remaining field families inside `RustiFlow::dump` after the accepted top-level CSV cleanup, and only optimize subsystems that still show up materially in flamegraphs. - [ ] Re-run the export-heavy comparison after each bounded structural change diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index 48bbd7ba..f079e7c9 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -793,6 +793,63 @@ This file keeps short-lived design choices and execution notes that would make redesign - revert the typed ownership split and keep the conclusion in notes rather than carrying extra plumbing that does not improve the measured hot path +- Remaining hot-family identification after the accepted top-level row-buffer + reuse: + - inspected the kept `rusti-10g-ee5-reuse1.svg` hot-case flamegraph and the + `RustiFlow::dump` call structure + - the feature families still showing up materially as direct dump leaves were: + - `BulkStats` about `3.1%` + - `PacketLengthStats` about `3.0%` + - `WindowSizeStats` about `2.5%` + - `PayloadLengthStats` about `2.4%` + - `IATStats` about `2.3%` + - `TcpFlagStats` about `2.2%` + - smaller but still visible families included `HeaderLengthStats` and + `ActiveIdleStats`, while the colder families did not justify another broad + rewrite +- Accepted targeted dump-path change for those first-tier families: + - added append-style CSV emission to `FeatureStats` and `FlowFeature`, but + only overrode it for the six feature families still showing up materially + in the flamegraph: + `BulkStats`, `PacketLengthStats`, `WindowSizeStats`, + `PayloadLengthStats`, `IATStats`, and `TcpFlagStats` + - `RustiFlow::dump` and the contamination-free dump path now append those + fields directly into the shared row buffer instead of allocating one + intermediate feature `String` for each of them + - validation: + - `cargo fmt` + - `cargo check -p rustiflow` + - `cargo clippy -p rustiflow --all-targets` + - `cargo test -p rustiflow pcap_fixture_test -- --nocapture` + - `cargo test -p rustiflow nf_flow_test -- --nocapture` + - reran the same rebuilt `rustiflow:test-slim` hot case with export + breakdown enabled: + - `rustiflow`, `--early-export 5`, `10G`, `1400`-byte UDP, `-P 8`, + `--threads 12`, ingress on `rustiflow-t0` + - receiver bitrate about `9.91 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `20.2 s` user CPU / `7.1 s` sys CPU, max RSS about + `2.29 GB` + - exported output about `700,668` rows / `721 MB` + - measured export breakdown: + - `clone_count=700,659` + - clone time about `121 ms` + - row serialization (`dump`) time about `5,110 ms` + - buffered write time about `790 ms` + - normalized comparison against `rusti-10g-ee5-reuse1`: + - dump cost fell from about `9.17 us` per row to about `7.29 us` per row + - total CPU time fell from about `48.4 us` per row to about `38.8 us` per + row + - the targeted feature families no longer appeared as standalone hot dump + leaves in the new flamegraph + - current interpretation: + - the targeted append-path change is worth keeping + - the flamegraph-guided approach worked better than the earlier broad + feature-dump rewrite because it cut one allocation layer only where the + profile still showed meaningful cost + - the next export work should re-run the comparison matrix + (`basic --early-export 5`, `rustiflow --early-export 5`, and a + no-early-export control) before picking the next colder feature family - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` diff --git a/rustiflow/src/flows/features/bulk_stats.rs b/rustiflow/src/flows/features/bulk_stats.rs index f23e7b65..a9ab07db 100644 --- a/rustiflow/src/flows/features/bulk_stats.rs +++ b/rustiflow/src/flows/features/bulk_stats.rs @@ -1,6 +1,6 @@ use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; -use super::util::{FeatureStats, FlowFeature}; +use super::util::{push_csv_display, FeatureStats, FlowFeature}; const MIN_BULK_PACKETS: u32 = 4; const BULK_IDLE_MS: i64 = 1000; @@ -182,6 +182,19 @@ impl FlowFeature for BulkStats { ) } + fn append_to_csv(&self, output: &mut String) { + push_csv_display(output, self.fwd_bulk_rate()); + push_csv_display(output, self.bwd_bulk_rate()); + push_csv_display(output, self.fwd_bulk_packets.get_count()); + push_csv_display(output, self.bwd_bulk_packets.get_count()); + self.fwd_bulk_packets.append_csv_values(output); + self.bwd_bulk_packets.append_csv_values(output); + self.fwd_bulk_payload_size.append_csv_values(output); + self.bwd_bulk_payload_size.append_csv_values(output); + self.fwd_bulk_duration.append_csv_values(output); + self.bwd_bulk_duration.append_csv_values(output); + } + fn headers() -> String { format!( "{},{},{},{},{},{},{},{},{},{}", diff --git a/rustiflow/src/flows/features/iat_stats.rs b/rustiflow/src/flows/features/iat_stats.rs index b5f90850..da91b2f6 100644 --- a/rustiflow/src/flows/features/iat_stats.rs +++ b/rustiflow/src/flows/features/iat_stats.rs @@ -64,6 +64,12 @@ impl FlowFeature for IATStats { ) } + fn append_to_csv(&self, output: &mut String) { + self.iat.append_csv_values(output); + self.fwd_iat.append_csv_values(output); + self.bwd_iat.append_csv_values(output); + } + fn headers() -> String { format!( "{},{},{}", diff --git a/rustiflow/src/flows/features/packet_stats.rs b/rustiflow/src/flows/features/packet_stats.rs index 9107791f..e03a1ac7 100644 --- a/rustiflow/src/flows/features/packet_stats.rs +++ b/rustiflow/src/flows/features/packet_stats.rs @@ -1,6 +1,6 @@ use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; -use super::util::{FeatureStats, FlowFeature}; +use super::util::{push_csv_display, FeatureStats, FlowFeature}; #[derive(Clone)] pub struct PacketLengthStats { @@ -124,6 +124,19 @@ impl FlowFeature for PacketLengthStats { ) } + fn append_to_csv(&self, output: &mut String) { + push_csv_display(output, self.flow_count()); + push_csv_display(output, self.flow_total()); + push_csv_display(output, self.flow_mean()); + push_csv_display(output, self.flow_max()); + push_csv_display(output, self.flow_min()); + push_csv_display(output, self.flow_std()); + push_csv_display(output, self.fwd_packet_len.get_count()); + push_csv_display(output, self.bwd_packet_len.get_count()); + self.fwd_packet_len.append_csv_values(output); + self.bwd_packet_len.append_csv_values(output); + } + fn headers() -> String { format!( "{},{},{},{},{},{},{},{},{},{}", diff --git a/rustiflow/src/flows/features/payload_stats.rs b/rustiflow/src/flows/features/payload_stats.rs index ceb75082..fde5068e 100644 --- a/rustiflow/src/flows/features/payload_stats.rs +++ b/rustiflow/src/flows/features/payload_stats.rs @@ -1,6 +1,6 @@ use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; -use super::util::{FeatureStats, FlowFeature}; +use super::util::{push_csv_display, FeatureStats, FlowFeature}; #[derive(Clone)] pub struct PayloadLengthStats { @@ -55,6 +55,15 @@ impl FlowFeature for PayloadLengthStats { ) } + fn append_to_csv(&self, output: &mut String) { + self.payload_len.append_csv_values(output); + push_csv_display(output, self.payload_len.get_std().powi(2)); + self.fwd_payload_len.append_csv_values(output); + self.bwd_payload_len.append_csv_values(output); + push_csv_display(output, self.fwd_non_zero_payload_packets); + push_csv_display(output, self.bwd_non_zero_payload_packets); + } + fn headers() -> String { format!( "{},{},{},{},{},{}", diff --git a/rustiflow/src/flows/features/tcp_flag_stats.rs b/rustiflow/src/flows/features/tcp_flag_stats.rs index 7de09d1e..bce648ad 100644 --- a/rustiflow/src/flows/features/tcp_flag_stats.rs +++ b/rustiflow/src/flows/features/tcp_flag_stats.rs @@ -1,6 +1,6 @@ use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; -use super::util::FlowFeature; +use super::util::{push_csv_display, FlowFeature}; #[derive(Clone)] pub struct TcpFlagStats { @@ -143,6 +143,34 @@ impl FlowFeature for TcpFlagStats { ) } + fn append_to_csv(&self, output: &mut String) { + push_csv_display(output, self.fwd_fin_flag_count); + push_csv_display(output, self.fwd_syn_flag_count); + push_csv_display(output, self.fwd_rst_flag_count); + push_csv_display(output, self.fwd_psh_flag_count); + push_csv_display(output, self.fwd_ack_flag_count); + push_csv_display(output, self.fwd_urg_flag_count); + push_csv_display(output, self.fwd_cwr_flag_count); + push_csv_display(output, self.fwd_ece_flag_count); + push_csv_display(output, self.bwd_fin_flag_count); + push_csv_display(output, self.bwd_syn_flag_count); + push_csv_display(output, self.bwd_rst_flag_count); + push_csv_display(output, self.bwd_psh_flag_count); + push_csv_display(output, self.bwd_ack_flag_count); + push_csv_display(output, self.bwd_urg_flag_count); + push_csv_display(output, self.bwd_cwr_flag_count); + push_csv_display(output, self.bwd_ece_flag_count); + push_csv_display(output, self.fwd_fin_flag_count + self.bwd_fin_flag_count); + push_csv_display(output, self.fwd_syn_flag_count + self.bwd_syn_flag_count); + push_csv_display(output, self.fwd_rst_flag_count + self.bwd_rst_flag_count); + push_csv_display(output, self.fwd_psh_flag_count + self.bwd_psh_flag_count); + push_csv_display(output, self.fwd_ack_flag_count + self.bwd_ack_flag_count); + push_csv_display(output, self.fwd_urg_flag_count + self.bwd_urg_flag_count); + push_csv_display(output, self.fwd_cwr_flag_count + self.bwd_cwr_flag_count); + push_csv_display(output, self.fwd_ece_flag_count + self.bwd_ece_flag_count); + push_csv_display(output, self.get_flags()); + } + fn headers() -> String { [ "fwd_fin_flag_count", diff --git a/rustiflow/src/flows/features/util.rs b/rustiflow/src/flows/features/util.rs index 6e92b2c5..06d01d27 100644 --- a/rustiflow/src/flows/features/util.rs +++ b/rustiflow/src/flows/features/util.rs @@ -1,3 +1,5 @@ +use std::{fmt::Display, fmt::Write as _}; + use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; /// Trait for network flow features that can be updated, closed, and dumped to CSV format @@ -11,6 +13,11 @@ pub trait FlowFeature: Send + Sync + Clone { /// Dumps the current state as a CSV string fn dump(&self) -> String; + /// Appends the current state as CSV fields to an existing row buffer. + fn append_to_csv(&self, output: &mut String) { + output.push_str(&self.dump()); + } + /// Returns the CSV headers for this feature fn headers() -> String where @@ -113,6 +120,21 @@ impl FeatureStats { self.get_min(), ) } + + pub fn append_csv_values(&self, output: &mut String) { + push_csv_display(output, self.get_total()); + push_csv_display(output, self.get_mean()); + push_csv_display(output, self.get_std()); + push_csv_display(output, self.get_max()); + push_csv_display(output, self.get_min()); + } +} + +pub fn push_csv_display(output: &mut String, value: impl Display) { + if !output.is_empty() { + output.push(','); + } + let _ = write!(output, "{value}"); } /// Safely performs floating point division, returning 0.0 if denominator is 0 diff --git a/rustiflow/src/flows/features/window_size_stats.rs b/rustiflow/src/flows/features/window_size_stats.rs index 5ab62eba..6cb1d669 100644 --- a/rustiflow/src/flows/features/window_size_stats.rs +++ b/rustiflow/src/flows/features/window_size_stats.rs @@ -1,6 +1,6 @@ use crate::{flows::util::FlowExpireCause, packet_features::PacketFeatures}; -use super::util::{FeatureStats, FlowFeature}; +use super::util::{push_csv_display, FeatureStats, FlowFeature}; #[derive(Clone)] pub struct WindowSizeStats { @@ -54,6 +54,14 @@ impl FlowFeature for WindowSizeStats { ) } + fn append_to_csv(&self, output: &mut String) { + push_csv_display(output, self.fwd_init_window_size); + push_csv_display(output, self.bwd_init_window_size); + self.window_size.append_csv_values(output); + self.fwd_window_size.append_csv_values(output); + self.bwd_window_size.append_csv_values(output); + } + fn headers() -> String { format!( "{},{},{},{},{}", diff --git a/rustiflow/src/flows/rusti_flow.rs b/rustiflow/src/flows/rusti_flow.rs index 52677f20..aa239668 100644 --- a/rustiflow/src/flows/rusti_flow.rs +++ b/rustiflow/src/flows/rusti_flow.rs @@ -157,18 +157,18 @@ impl Flow for RustiFlow { push_csv_display(output, u8::from(self.basic_flow.tcp_reset_after_handshake)); push_csv_display(output, self.basic_flow.tcp_close_style.as_str()); push_csv_str(output, &self.timing_stats.dump()); - push_csv_str(output, &self.iat_stats.dump()); - push_csv_str(output, &self.packet_len_stats.dump()); + self.iat_stats.append_to_csv(output); + self.packet_len_stats.append_to_csv(output); push_csv_str(output, &self.header_len_stats.dump()); - push_csv_str(output, &self.payload_len_stats.dump()); - push_csv_str(output, &self.bulk_stats.dump()); + self.payload_len_stats.append_to_csv(output); + self.bulk_stats.append_to_csv(output); push_csv_str(output, &self.subflow_stats.dump()); push_csv_str(output, &self.active_idle_stats.dump()); push_csv_str(output, &self.icmp_stats.dump()); push_csv_str(output, &self.retransmission_stats.dump()); push_csv_str(output, &self.tcp_quality_stats.dump()); - push_csv_str(output, &self.window_size_stats.dump()); - push_csv_str(output, &self.tcp_flags_stats.dump()); + self.window_size_stats.append_to_csv(output); + self.tcp_flags_stats.append_to_csv(output); push_csv_display( output, safe_per_second_rate( @@ -313,18 +313,18 @@ impl Flow for RustiFlow { push_csv_display(output, self.basic_flow.tcp_close_style.as_str()); push_csv_display(output, self.timing_stats.get_fwd_duration()); push_csv_display(output, self.timing_stats.get_bwd_duration()); - push_csv_str(output, &self.iat_stats.dump()); - push_csv_str(output, &self.packet_len_stats.dump()); + self.iat_stats.append_to_csv(output); + self.packet_len_stats.append_to_csv(output); push_csv_str(output, &self.header_len_stats.dump()); - push_csv_str(output, &self.payload_len_stats.dump()); - push_csv_str(output, &self.bulk_stats.dump()); + self.payload_len_stats.append_to_csv(output); + self.bulk_stats.append_to_csv(output); push_csv_str(output, &self.subflow_stats.dump()); push_csv_str(output, &self.active_idle_stats.dump()); push_csv_str(output, &self.icmp_stats.dump()); push_csv_str(output, &self.retransmission_stats.dump()); push_csv_str(output, &self.tcp_quality_stats.dump()); - push_csv_str(output, &self.window_size_stats.dump()); - push_csv_str(output, &self.tcp_flags_stats.dump()); + self.window_size_stats.append_to_csv(output); + self.tcp_flags_stats.append_to_csv(output); push_csv_display( output, safe_per_second_rate( From 952631b2e374501151e38b86d932220be11203a4 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:05:57 +0200 Subject: [PATCH 15/23] Record export comparison matrix --- AGENTS.md | 2 +- docs/engineering-notes.md | 52 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 9fbf1c28..e40b0e84 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -134,7 +134,7 @@ in `docs/engineering-notes.md`. - [x] Identify the heaviest remaining field families inside `RustiFlow::dump` after the accepted top-level CSV cleanup, and only optimize subsystems that still show up materially in flamegraphs. -- [ ] Re-run the export-heavy comparison after each bounded structural change +- [x] Re-run the export-heavy comparison after each bounded structural change using at least: `basic --early-export 5`, `rustiflow --early-export 5`, and one no-early- export control, recording CPU, RSS, drop total, output size, and bitrate. diff --git a/docs/engineering-notes.md b/docs/engineering-notes.md index f079e7c9..692965cb 100644 --- a/docs/engineering-notes.md +++ b/docs/engineering-notes.md @@ -850,6 +850,58 @@ This file keeps short-lived design choices and execution notes that would make - the next export work should re-run the comparison matrix (`basic --early-export 5`, `rustiflow --early-export 5`, and a no-early-export control) before picking the next colder feature family +- Post-change export-heavy comparison matrix on the current kept codepath: + - workload shape kept constant for all three runs: + `10G`, `1400`-byte UDP, `-P 8`, `10s`, `--threads 12`, ingress on + `rustiflow-t0`, containerized `rustiflow:test-slim` + - `basic --early-export 5`: + - receiver bitrate about `9.98 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `19.3 s` user CPU / `6.1 s` sys CPU, max RSS about + `2.26 GB` + - exported output about `3,085,329` rows / `459 MB` + - export breakdown: + - `clone_count=3,085,320` + - clone time about `350 ms` + - row serialization (`dump`) time about `1,849 ms` + - buffered write time about `634 ms` + - `rustiflow --early-export 5` after the targeted hot-family change: + - receiver bitrate about `9.91 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `20.2 s` user CPU / `7.1 s` sys CPU, max RSS about + `2.29 GB` + - exported output about `700,668` rows / `721 MB` + - export breakdown: + - `clone_count=700,659` + - clone time about `121 ms` + - row serialization (`dump`) time about `5,110 ms` + - buffered write time about `790 ms` + - this run saw noticeably higher `iperf3` receiver loss than the other two + matrix runs, so the export-timer normalization is the stronger signal + than raw loss percentage here + - `rustiflow` with no `--early-export`: + - receiver bitrate about `9.98 Gbit/s` + - RustiFlow dropped packets `0` + - process summary about `17.0 s` user CPU / `6.1 s` sys CPU, max RSS about + `2.19 GB` + - exported output about `9` rows / `11 KB` + - export breakdown: + - `clone_count=0` + - clone time `0 ms` + - row serialization (`dump`) time about `0.097 ms` + - buffered write time about `0.030 ms` + - current interpretation after the matrix rerun: + - the current structural export work continues to preserve the practical + `10G` operating point with `0` RustiFlow drops across the matrix + - export pressure is still the dominant differentiator: + the no-early-export control nearly removes export cost entirely, while + both `--early-export 5` cases spend real time in serialization + - `basic --early-export 5` remains much cheaper per exported row than + `rustiflow --early-export 5`, but the gap is narrower after the targeted + hot-family append-path change + - the next export-path target should come from a colder second tier under + `RustiFlow::dump`, not from revisiting snapshot cloning or whole-pipeline + ownership changes - One accidental command detail also matters operationally: - passing `--early-export 0` does not disable early export; it produces effectively continuous early export because the CLI passes `Some(0)` From 5f36b6b8d1e423fd7dea2b64dc5151089772185a Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:18:11 +0200 Subject: [PATCH 16/23] Add local container publish script --- AGENTS.md | 23 ++--------------- scripts/build-push-local.sh | 49 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 21 deletions(-) create mode 100755 scripts/build-push-local.sh diff --git a/AGENTS.md b/AGENTS.md index e40b0e84..e732b294 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -121,27 +121,8 @@ in `docs/engineering-notes.md`. ### Current Focus -- [x] Measure how much of the remaining hot export path is snapshot ownership - cost versus row serialization cost: - isolate clone/copy work from string/CSV formatting work under the proven - `10G` `--early-export 5` case. -- [x] Prototype a structural export path that writes CSV fields directly to the - buffered writer instead of requiring one fully assembled row `String` per - exported flow, then reprofile the same workload. -- [x] Evaluate whether a typed export snapshot or borrow-based export view can - reduce per-export cloning/allocation without violating flow ownership, - sharding, or semantic parity. -- [x] Identify the heaviest remaining field families inside `RustiFlow::dump` - after the accepted top-level CSV cleanup, and only optimize subsystems that - still show up materially in flamegraphs. -- [x] Re-run the export-heavy comparison after each bounded structural change - using at least: - `basic --early-export 5`, `rustiflow --early-export 5`, and one no-early- - export control, recording CPU, RSS, drop total, output size, and bitrate. -- [ ] Keep updating `docs/engineering-notes.md` after each bounded experiment - with: - workload, achieved bitrate, dropped-packet total, resource summary, and what - the new bottleneck appears to be. +None currently. See `docs/engineering-notes.md` for completed experiments and +decision history. Primary files: diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh new file mode 100755 index 00000000..bd3f3daf --- /dev/null +++ b/scripts/build-push-local.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +set -euo pipefail + +GHCR_USER="${GHCR_USER:-Str-Gen}" +GHCR_REGISTRY="${GHCR_REGISTRY:-ghcr.io}" +GHCR_OWNER="${GHCR_OWNER:-idlab-discover}" +IMAGE_NAME="${IMAGE_NAME:-rustiflow}" +PLATFORM="${PLATFORM:-linux/amd64}" +BUILDER_NAME="${BUILDER_NAME:-rustiflow-builder}" +TOKEN_FILE="${GHCR_TOKEN_FILE:-/home/strgenix/postdoc/projects/AIDE-FL/rgbcore-classic-arch.key}" + +if [[ ! -f "$TOKEN_FILE" ]]; then + echo "missing GHCR token file: $TOKEN_FILE" >&2 + exit 1 +fi + +GIT_SHA="$(git rev-parse --short HEAD)" +BRANCH_TAG="$(git rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]' | tr '/' '-')" +IMAGE_REF="$GHCR_REGISTRY/$GHCR_OWNER/$IMAGE_NAME" + +echo "Logging in to $GHCR_REGISTRY as $GHCR_USER" +cat "$TOKEN_FILE" | docker login "$GHCR_REGISTRY" -u "$GHCR_USER" --password-stdin + +docker buildx create --name "$BUILDER_NAME" --use >/dev/null 2>&1 || docker buildx use "$BUILDER_NAME" +docker buildx inspect --bootstrap + +echo "Building and pushing $IMAGE_REF for $PLATFORM" +docker buildx build \ + --platform "$PLATFORM" \ + -f Dockerfile \ + -t "$IMAGE_REF:sha-$GIT_SHA" \ + -t "$IMAGE_REF:$BRANCH_TAG" \ + --push \ + . + +echo "Building and pushing slim variant for $PLATFORM" +docker buildx build \ + --platform "$PLATFORM" \ + -f Dockerfile-slim \ + -t "$IMAGE_REF:sha-$GIT_SHA-slim" \ + -t "$IMAGE_REF:$BRANCH_TAG-slim" \ + --push \ + . + +echo "Pushed:" +echo " $IMAGE_REF:sha-$GIT_SHA" +echo " $IMAGE_REF:$BRANCH_TAG" +echo " $IMAGE_REF:sha-$GIT_SHA-slim" +echo " $IMAGE_REF:$BRANCH_TAG-slim" From dd980581c4b7f91847c7620f90bfdfaad4ca52d2 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:24:45 +0200 Subject: [PATCH 17/23] Disable local buildx attestations --- scripts/build-push-local.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh index bd3f3daf..6891a24d 100755 --- a/scripts/build-push-local.sh +++ b/scripts/build-push-local.sh @@ -26,7 +26,10 @@ docker buildx inspect --bootstrap echo "Building and pushing $IMAGE_REF for $PLATFORM" docker buildx build \ + --progress plain \ --platform "$PLATFORM" \ + --provenance false \ + --sbom false \ -f Dockerfile \ -t "$IMAGE_REF:sha-$GIT_SHA" \ -t "$IMAGE_REF:$BRANCH_TAG" \ @@ -35,7 +38,10 @@ docker buildx build \ echo "Building and pushing slim variant for $PLATFORM" docker buildx build \ + --progress plain \ --platform "$PLATFORM" \ + --provenance false \ + --sbom false \ -f Dockerfile-slim \ -t "$IMAGE_REF:sha-$GIT_SHA-slim" \ -t "$IMAGE_REF:$BRANCH_TAG-slim" \ From 4bf59fc7212c09fe989a8ce7dc1cc1e160a0c563 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:26:18 +0200 Subject: [PATCH 18/23] Use local image push for containers --- scripts/build-push-local.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh index 6891a24d..88952393 100755 --- a/scripts/build-push-local.sh +++ b/scripts/build-push-local.sh @@ -32,9 +32,11 @@ docker buildx build \ --sbom false \ -f Dockerfile \ -t "$IMAGE_REF:sha-$GIT_SHA" \ - -t "$IMAGE_REF:$BRANCH_TAG" \ - --push \ + --load \ . +docker tag "$IMAGE_REF:sha-$GIT_SHA" "$IMAGE_REF:$BRANCH_TAG" +docker push "$IMAGE_REF:sha-$GIT_SHA" +docker push "$IMAGE_REF:$BRANCH_TAG" echo "Building and pushing slim variant for $PLATFORM" docker buildx build \ @@ -44,9 +46,11 @@ docker buildx build \ --sbom false \ -f Dockerfile-slim \ -t "$IMAGE_REF:sha-$GIT_SHA-slim" \ - -t "$IMAGE_REF:$BRANCH_TAG-slim" \ - --push \ + --load \ . +docker tag "$IMAGE_REF:sha-$GIT_SHA-slim" "$IMAGE_REF:$BRANCH_TAG-slim" +docker push "$IMAGE_REF:sha-$GIT_SHA-slim" +docker push "$IMAGE_REF:$BRANCH_TAG-slim" echo "Pushed:" echo " $IMAGE_REF:sha-$GIT_SHA" From 3843c691fd966371b0d3a1d5a981e0501327dd1e Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:32:35 +0200 Subject: [PATCH 19/23] Default local container pushes to str-gen --- scripts/build-push-local.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh index 88952393..24fb7a97 100755 --- a/scripts/build-push-local.sh +++ b/scripts/build-push-local.sh @@ -3,7 +3,7 @@ set -euo pipefail GHCR_USER="${GHCR_USER:-Str-Gen}" GHCR_REGISTRY="${GHCR_REGISTRY:-ghcr.io}" -GHCR_OWNER="${GHCR_OWNER:-idlab-discover}" +GHCR_OWNER="${GHCR_OWNER:-str-gen}" IMAGE_NAME="${IMAGE_NAME:-rustiflow}" PLATFORM="${PLATFORM:-linux/amd64}" BUILDER_NAME="${BUILDER_NAME:-rustiflow-builder}" From 61d75a128a761324aac4b64436637e6bd1ba01d6 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 15:54:29 +0200 Subject: [PATCH 20/23] Use existing Docker login for local pushes --- scripts/build-push-local.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh index 24fb7a97..ee51a12b 100755 --- a/scripts/build-push-local.sh +++ b/scripts/build-push-local.sh @@ -7,19 +7,12 @@ GHCR_OWNER="${GHCR_OWNER:-str-gen}" IMAGE_NAME="${IMAGE_NAME:-rustiflow}" PLATFORM="${PLATFORM:-linux/amd64}" BUILDER_NAME="${BUILDER_NAME:-rustiflow-builder}" -TOKEN_FILE="${GHCR_TOKEN_FILE:-/home/strgenix/postdoc/projects/AIDE-FL/rgbcore-classic-arch.key}" - -if [[ ! -f "$TOKEN_FILE" ]]; then - echo "missing GHCR token file: $TOKEN_FILE" >&2 - exit 1 -fi GIT_SHA="$(git rev-parse --short HEAD)" BRANCH_TAG="$(git rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]' | tr '/' '-')" IMAGE_REF="$GHCR_REGISTRY/$GHCR_OWNER/$IMAGE_NAME" -echo "Logging in to $GHCR_REGISTRY as $GHCR_USER" -cat "$TOKEN_FILE" | docker login "$GHCR_REGISTRY" -u "$GHCR_USER" --password-stdin +echo "Using existing Docker login for $GHCR_REGISTRY as $GHCR_USER" docker buildx create --name "$BUILDER_NAME" --use >/dev/null 2>&1 || docker buildx use "$BUILDER_NAME" docker buildx inspect --bootstrap From 65a48dc695762e5281390c337bc33dafdcd14316 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 16:02:30 +0200 Subject: [PATCH 21/23] Resolve Dockerfiles from repo root --- scripts/build-push-local.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/build-push-local.sh b/scripts/build-push-local.sh index ee51a12b..7ec5cb5d 100755 --- a/scripts/build-push-local.sh +++ b/scripts/build-push-local.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash set -euo pipefail +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)" + GHCR_USER="${GHCR_USER:-Str-Gen}" GHCR_REGISTRY="${GHCR_REGISTRY:-ghcr.io}" GHCR_OWNER="${GHCR_OWNER:-str-gen}" @@ -8,8 +11,8 @@ IMAGE_NAME="${IMAGE_NAME:-rustiflow}" PLATFORM="${PLATFORM:-linux/amd64}" BUILDER_NAME="${BUILDER_NAME:-rustiflow-builder}" -GIT_SHA="$(git rev-parse --short HEAD)" -BRANCH_TAG="$(git rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]' | tr '/' '-')" +GIT_SHA="$(git -C "$REPO_ROOT" rev-parse --short HEAD)" +BRANCH_TAG="$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD | tr '[:upper:]' '[:lower:]' | tr '/' '-')" IMAGE_REF="$GHCR_REGISTRY/$GHCR_OWNER/$IMAGE_NAME" echo "Using existing Docker login for $GHCR_REGISTRY as $GHCR_USER" @@ -23,10 +26,10 @@ docker buildx build \ --platform "$PLATFORM" \ --provenance false \ --sbom false \ - -f Dockerfile \ + -f "$REPO_ROOT/Dockerfile" \ -t "$IMAGE_REF:sha-$GIT_SHA" \ --load \ - . + "$REPO_ROOT" docker tag "$IMAGE_REF:sha-$GIT_SHA" "$IMAGE_REF:$BRANCH_TAG" docker push "$IMAGE_REF:sha-$GIT_SHA" docker push "$IMAGE_REF:$BRANCH_TAG" @@ -37,10 +40,10 @@ docker buildx build \ --platform "$PLATFORM" \ --provenance false \ --sbom false \ - -f Dockerfile-slim \ + -f "$REPO_ROOT/Dockerfile-slim" \ -t "$IMAGE_REF:sha-$GIT_SHA-slim" \ --load \ - . + "$REPO_ROOT" docker tag "$IMAGE_REF:sha-$GIT_SHA-slim" "$IMAGE_REF:$BRANCH_TAG-slim" docker push "$IMAGE_REF:sha-$GIT_SHA-slim" docker push "$IMAGE_REF:$BRANCH_TAG-slim" From e6585a7872be521e3b1f4e1d0b4080afff79db81 Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 16:42:41 +0200 Subject: [PATCH 22/23] Modernize container publish workflow --- .github/workflows/docker.yml | 120 ++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 43 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index af8be7cd..8eaaf75d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,49 +1,83 @@ -name: Docker Image CI +name: Publish Containers on: push: - branches: ["main"] + branches: + - main + tags: + - "v*" + workflow_dispatch: + +concurrency: + group: docker-${{ github.ref }} + cancel-in-progress: true + +env: + REGISTRY: ghcr.io + IMAGE_NAME: idlab-discover/rustiflow jobs: + docker: + name: Publish ${{ matrix.variant.name }} + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + variant: + - name: default + dockerfile: Dockerfile + latest_tag: latest + flavor_suffix: "" + - name: slim + dockerfile: Dockerfile-slim + latest_tag: slim + flavor_suffix: -slim + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract image metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + flavor: | + latest=false + tags: | + type=raw,value=${{ matrix.variant.latest_tag }},enable={{is_default_branch}} + type=ref,event=branch,suffix=${{ matrix.variant.flavor_suffix }} + type=ref,event=tag,suffix=${{ matrix.variant.flavor_suffix }} + type=sha,prefix=sha-,suffix=${{ matrix.variant.flavor_suffix }} + labels: | + org.opencontainers.image.title=RustiFlow + org.opencontainers.image.description=Network flow extractor with offline and realtime capture modes + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} - docker: - - name: Publish Docker image - runs-on: ubuntu-latest - - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to the GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push the Docker image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile - push: true - tags: ghcr.io/idlab-discover/rustiflow:ubuntu-20 - - - name: Build and push the slim Docker image - uses: docker/build-push-action@v6 - with: - context: . - file: Dockerfile-slim - push: true - tags: ghcr.io/idlab-discover/rustiflow:slim + - name: Build and push image + uses: docker/build-push-action@v6 + with: + context: . + file: ${{ matrix.variant.dockerfile }} + push: true + provenance: false + sbom: false + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} From 19f8a934a152dde5b7c34203b79ce49371331d6a Mon Sep 17 00:00:00 2001 From: Str-Gen <20303842+Str-Gen@users.noreply.github.com> Date: Tue, 31 Mar 2026 16:54:18 +0200 Subject: [PATCH 23/23] Update workflow action versions --- .github/workflows/docker.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 8eaaf75d..53758869 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -38,16 +38,16 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Log in to GHCR - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -55,7 +55,7 @@ jobs: - name: Extract image metadata id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} flavor: | @@ -72,7 +72,7 @@ jobs: org.opencontainers.image.revision=${{ github.sha }} - name: Build and push image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v7 with: context: . file: ${{ matrix.variant.dockerfile }}