From 0c1fab22ba4edee9183cf4761e17b5c7087d1083 Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Wed, 10 Jun 2026 10:12:59 -0300 Subject: [PATCH 1/3] Add syscall duration display (-T, --syscall-times) The sys_enter program now records a bpf_ktime_get_ns() timestamp in the enter map; submit_compact_payload() computes the elapsed time at exit and carries it in a new duration_ns wire header field (wire version bumped to 5). The enter map entry is now removed at submit time rather than in get_args() so the timestamp survives until the event is built; entries for aborted paths are overwritten by the tid's next syscall enter. The client's new -T/--syscall-times flag appends the time spent in each syscall to the trace line, strace-style: ` <0.000123>`. Events with no matching enter (e.g. execve, which uses its own enter tracepoint) simply omit the suffix. Co-Authored-By: Claude Fable 5 --- README.md | 5 +++++ pinchy-common/src/lib.rs | 4 +++- pinchy-ebpf/src/main.rs | 2 ++ pinchy-ebpf/src/util.rs | 22 +++++++++++++++++----- pinchy/src/client.rs | 20 +++++++++++++++----- pinchy/src/formatting.rs | 22 +++++++++++++++++++++- pinchy/src/tests/mod.rs | 23 +++++++++++++++++++++++ pinchy/tests/integration.rs | 23 +++++++++++++++++++++++ 8 files changed, 109 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 91c64e2..740f443 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,11 @@ pinchy -f -- sh -c 'ls | wc -l' The daemon learns about new children from the parent's fork/clone exit, so a child's very first syscalls may be missed. +Show the time spent in each syscall (like `strace -T`): +```shell +pinchy -T -p +``` + Other knobs: - `--format one-line|multi-line`: trace line formatting (default `one-line`) diff --git a/pinchy-common/src/lib.rs b/pinchy-common/src/lib.rs index 19a11d8..b066e27 100644 --- a/pinchy-common/src/lib.rs +++ b/pinchy-common/src/lib.rs @@ -101,7 +101,7 @@ pub const Q_XQUOTASYNC: i32 = 0x5807; pub const Q_XGETQSTATV: i32 = 0x5808; pub const Q_XGETNEXTQUOTA: i32 = 0x5809; -pub const WIRE_VERSION: u16 = 4; +pub const WIRE_VERSION: u16 = 5; pub const EFF_STAT_EVENTS_SUBMITTED: u32 = 0; pub const EFF_STAT_BYTES_SUBMITTED: u32 = 1; @@ -591,6 +591,8 @@ pub struct WireEventHeader { pub pid: u32, pub tid: u32, pub return_value: i64, + // Time spent in the syscall; 0 when no matching enter was seen. + pub duration_ns: u64, } #[repr(C)] diff --git a/pinchy-ebpf/src/main.rs b/pinchy-ebpf/src/main.rs index 560e632..49edf42 100644 --- a/pinchy-ebpf/src/main.rs +++ b/pinchy-ebpf/src/main.rs @@ -66,6 +66,7 @@ pub struct SyscallEnterData { pub tgid: u32, pub syscall_nr: i64, pub args: [usize; SYSCALL_ARGS_COUNT], + pub enter_ns: u64, } #[inline(always)] @@ -130,6 +131,7 @@ pub fn pinchy(ctx: TracePointContext) -> u32 { tgid, syscall_nr, args, + enter_ns: unsafe { aya_ebpf::helpers::bpf_ktime_get_ns() }, }; if let Err(err) = unsafe { ENTER_MAP.insert(&tid, &data, 0) } { diff --git a/pinchy-ebpf/src/util.rs b/pinchy-ebpf/src/util.rs index 22af40c..3e0d406 100644 --- a/pinchy-ebpf/src/util.rs +++ b/pinchy-ebpf/src/util.rs @@ -144,12 +144,11 @@ pub fn get_args(ctx: &TracePointContext, expected_syscall_nr: i64) -> Result<[us return Err(1); } - // Copy the part of the enter data we care about... + // Copy the part of the enter data we care about. The entry stays in the + // map so submit_compact_payload() can compute the syscall duration; it + // is removed there (or overwritten by the tid's next syscall enter). let args = enter_data.args; - // Then remove the item from the map. - let _ = unsafe { ENTER_MAP.remove(&tid) }; - Ok(args) } @@ -194,14 +193,27 @@ where let event = unsafe { event.assume_init_mut() as *mut WireCompactPayload as *mut u8 }; let header_ptr = event as *mut WireEventHeader; + let tid = ctx.pid(); + let mut duration_ns = 0u64; + + if let Some(enter_data) = unsafe { ENTER_MAP.get(&tid) } { + if enter_data.syscall_nr == syscall_nr { + duration_ns = unsafe { aya_ebpf::helpers::bpf_ktime_get_ns() } + .saturating_sub(enter_data.enter_ns); + + let _ = unsafe { ENTER_MAP.remove(&tid) }; + } + } + unsafe { header_ptr.write(WireEventHeader { version: WIRE_VERSION, payload_len: core::mem::size_of::() as u32, syscall_nr, pid: ctx.tgid(), - tid: ctx.pid(), + tid, return_value, + duration_ns, }); } diff --git a/pinchy/src/client.rs b/pinchy/src/client.rs index 8b9e3cd..a1d93b3 100644 --- a/pinchy/src/client.rs +++ b/pinchy/src/client.rs @@ -150,6 +150,10 @@ struct Args { #[arg(short = 'f', long = "follow-forks")] follow_forks: bool, + /// Show the time spent in each syscall, strace-style + #[arg(short = 'T', long = "syscall-times")] + syscall_times: bool, + /// Write trace output to FILE instead of stderr #[arg(short = 'o', long = "output")] output: Option, @@ -193,13 +197,13 @@ async fn main() -> Result<()> { // Read everything there is to read, the server will close the write end // of the pipe - relay_to_sink(fd, style, args.output).await?; + relay_to_sink(fd, style, args.output, args.syscall_times).await?; pinchy_client::cleanup_and_quit(pid); } else if let Some(pid) = args.pid { let fd = pinchy_client::attach(pid, syscalls, args.follow_forks).await; - relay_to_sink(fd, style, args.output).await?; + relay_to_sink(fd, style, args.output, args.syscall_times).await?; } else { // Print clap's usage message to stderr and exit eprintln!("{}", Args::command().render_help()); @@ -215,15 +219,16 @@ async fn relay_to_sink( fd: OwnedFd, style: FormattingStyle, output: Option, + syscall_times: bool, ) -> Result<()> { match output { Some(path) => { let file = tokio::fs::File::create(&path).await?; - relay_trace(fd, style, file, false).await + relay_trace(fd, style, file, false, syscall_times).await } None => { let is_terminal = std::io::stderr().is_terminal(); - relay_trace(fd, style, tokio::io::stderr(), is_terminal).await + relay_trace(fd, style, tokio::io::stderr(), is_terminal, syscall_times).await } } } @@ -233,6 +238,7 @@ async fn relay_trace( formatting_style: FormattingStyle, sink: W, sink_is_terminal: bool, + syscall_times: bool, ) -> Result<()> { let mut reader = tokio::io::BufReader::with_capacity( 64 * 1024, @@ -298,7 +304,11 @@ async fn relay_trace( // Safety: we own the output vector and won't progress before handle_event() returns. let pin_output = unsafe { Pin::new_unchecked(&mut output) }; - let formatter = Formatter::new(pin_output, formatting_style); + let mut formatter = Formatter::new(pin_output, formatting_style); + + if syscall_times && header.duration_ns > 0 { + formatter = formatter.with_duration(header.duration_ns); + } events::handle_event(&header, &payload, formatter).await?; diff --git a/pinchy/src/formatting.rs b/pinchy/src/formatting.rs index 3108f46..32a6b22 100644 --- a/pinchy/src/formatting.rs +++ b/pinchy/src/formatting.rs @@ -70,11 +70,23 @@ pub enum FormattingStyle { pub struct Formatter<'f> { style: FormattingStyle, output: Pin<&'f mut dyn AsyncWrite>, + duration_ns: Option, } impl<'f> Formatter<'f> { pub fn new(output: Pin<&'f mut dyn AsyncWrite>, style: FormattingStyle) -> Self { - Formatter { style, output } + Formatter { + style, + output, + duration_ns: None, + } + } + + // When set, finish() appends the time spent in the syscall, strace-style: + // ` <0.000123>`. + pub fn with_duration(mut self, duration_ns: u64) -> Self { + self.duration_ns = Some(duration_ns); + self } pub async fn push_syscall(mut self, pid: u32, syscall_nr: i64) -> Result> { @@ -224,6 +236,14 @@ impl<'f> SyscallFormatter<'f> { output.write_all(suffix).await?; } + if let Some(duration_ns) = self.formatter.duration_ns { + let seconds = duration_ns as f64 / 1_000_000_000.0; + + output + .write_all(format!(" <{seconds:.6}>").as_bytes()) + .await?; + } + output.write_all(b"\n").await?; Ok(self.formatter) diff --git a/pinchy/src/tests/mod.rs b/pinchy/src/tests/mod.rs index 72a9994..8ebbead 100644 --- a/pinchy/src/tests/mod.rs +++ b/pinchy/src/tests/mod.rs @@ -29,6 +29,7 @@ pub(crate) fn make_compact_test_data( pid, tid: pid, return_value, + duration_ns: 0, }; let payload = unsafe { @@ -64,6 +65,28 @@ macro_rules! syscall_test { }; } +#[::tokio::test] +async fn syscall_duration_suffix() { + let data = pinchy_common::CloseData { fd: 3 }; + let (header, payload) = + make_compact_test_data(pinchy_common::syscalls::SYS_close, 100, 0, &data); + + let mut output: Vec = vec![]; + let pin_output = unsafe { std::pin::Pin::new_unchecked(&mut output) }; + let formatter = + crate::formatting::Formatter::new(pin_output, crate::formatting::FormattingStyle::OneLine) + .with_duration(123_456); + + crate::events::handle_event(&header, &payload, formatter) + .await + .unwrap(); + + assert_eq!( + String::from_utf8_lossy(&output), + "100 close(fd: 3) = 0 (success) <0.000123>\n" + ); +} + syscall_test!( parse_generic_syscall, { diff --git a/pinchy/tests/integration.rs b/pinchy/tests/integration.rs index 710b454..27f6867 100644 --- a/pinchy/tests/integration.rs +++ b/pinchy/tests/integration.rs @@ -381,6 +381,29 @@ fn follow_forks() { )); } +#[test] +fn syscall_times() { + let pinchy = PinchyTest::new(); + + let handle = run_workload_with_args(&pinchy, &["fcntl"], "fcntl_test", &["-T"]); + + // Every trace line should carry a duration suffix. + let expected_output = escaped_regex(indoc! {r#" + @PID@ fcntl(fd: 3, cmd: F_GETFD, arg: 0x0) = 0 (success) <@NUMBER@.@NUMBER@> + "#}); + + let output = handle.join().unwrap(); + Assert::new(output) + .success() + .stderr(predicate::str::is_match(&expected_output).unwrap()); + + // Server output - has to be at the end, since we kill the server for waiting. + let output = pinchy.wait(); + Assert::new(output) + .success() + .stdout(predicate::str::ends_with("Exiting...\n")); +} + #[test] fn pipe_operations_syscalls() { let pinchy = PinchyTest::new(); From 04d29e5ab2e9a5fba11d10e37e0d5fe05805d20d Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Wed, 10 Jun 2026 10:38:17 -0300 Subject: [PATCH 2/3] Annotate trace lines with the process name when following forks The wire header gains a fixed-size comm field (16 bytes, the kernel's TASK_COMM_LEN). With -f, the client annotates every line with the process name next to the PID, so interleaved output from several processes stays readable. Capturing comm (and the syscall duration) in every monomorphization of submit_compact_payload() pushed the larger eBPF programs past the BPF branch offset range at link time ("Branch target out of insn range"), x86_64 being the first to overflow. Both values are now captured once per exit, in the dispatching exit program before the tail call, into a per-CPU scratch map; the submit path only does a cheap per-CPU read. Tracepoint programs are not preempted, so the slot is stable across the tail call. Co-Authored-By: Claude Fable 5 --- README.md | 5 ++- pinchy-common/src/lib.rs | 5 +++ pinchy-ebpf/src/main.rs | 74 ++++++++++++++++++++++++++++++++++++- pinchy-ebpf/src/util.rs | 31 ++++++++-------- pinchy/src/client.rs | 36 ++++++++++++++++-- pinchy/src/formatting.rs | 28 ++++++++++++++ pinchy/src/server.rs | 55 +++++++++++++++++---------- pinchy/src/tests/mod.rs | 26 +++++++++++++ pinchy/tests/integration.rs | 3 +- 9 files changed, 219 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 740f443..4a24245 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,9 @@ Follow child processes created by the tracee (like `strace -f`): ```shell pinchy -f -- sh -c 'ls | wc -l' ``` -The daemon learns about new children from the parent's fork/clone exit, so a -child's very first syscalls may be missed. +Trace lines are annotated with the process name (`1234`) so interleaved +output stays readable. The daemon learns about new children from the parent's +fork/clone exit, so a child's very first syscalls may be missed. Show the time spent in each syscall (like `strace -T`): ```shell diff --git a/pinchy-common/src/lib.rs b/pinchy-common/src/lib.rs index b066e27..c1597e1 100644 --- a/pinchy-common/src/lib.rs +++ b/pinchy-common/src/lib.rs @@ -582,6 +582,9 @@ pub fn max_compact_payload_size() -> usize { max_payload_size } +// Same as the kernel's TASK_COMM_LEN. +pub const COMM_LEN: usize = 16; + #[repr(C)] #[derive(Clone, Copy, Debug, Default)] pub struct WireEventHeader { @@ -593,6 +596,8 @@ pub struct WireEventHeader { pub return_value: i64, // Time spent in the syscall; 0 when no matching enter was seen. pub duration_ns: u64, + // NUL-padded name of the process, as the kernel sees it. + pub comm: [u8; COMM_LEN], } #[repr(C)] diff --git a/pinchy-ebpf/src/main.rs b/pinchy-ebpf/src/main.rs index 49edf42..d39087b 100644 --- a/pinchy-ebpf/src/main.rs +++ b/pinchy-ebpf/src/main.rs @@ -8,7 +8,7 @@ use aya_ebpf::{ macros::{map, tracepoint}, - maps::{Array, HashMap, ProgramArray, RingBuf}, + maps::{Array, HashMap, PerCpuArray, ProgramArray, RingBuf}, programs::TracePointContext, EbpfContext as _, }; @@ -58,6 +58,50 @@ static mut EVENTS: RingBuf = RingBuf::with_byte_size(83886080, 0); #[map] static mut ENTER_MAP: HashMap = HashMap::with_max_entries(10240, 0); +// Per-exit context shared between the dispatching exit program and the +// tail-called handler that builds the event. Filled once per exit so the +// many submit_compact_payload() monomorphizations stay small -- inlining +// the map lookups and helper calls at every submit site pushed the larger +// programs past the BPF branch offset range. Tracepoint programs are not +// preempted, so the per-CPU slot is stable across the tail call. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct ExitContext { + pub duration_ns: u64, + pub comm: [u8; pinchy_common::COMM_LEN], +} + +#[map] +static mut EXIT_CONTEXT: PerCpuArray = PerCpuArray::with_max_entries(1, 0); + +// Record the time spent in the syscall (when we saw its enter) and the +// process name for the handler this exit tail-calls into. +#[inline(always)] +pub fn fill_exit_context(ctx: &TracePointContext, syscall_nr: i64) { + let tid = ctx.pid(); + + let mut duration_ns = 0u64; + + if let Some(enter_data) = unsafe { ENTER_MAP.get(&tid) } { + if enter_data.syscall_nr == syscall_nr { + duration_ns = unsafe { aya_ebpf::helpers::bpf_ktime_get_ns() } + .saturating_sub(enter_data.enter_ns); + } + } + + if let Some(exit_ctx) = unsafe { EXIT_CONTEXT.get_ptr_mut(0) } { + unsafe { + (*exit_ctx).duration_ns = duration_ns; + + let comm_ptr = core::ptr::addr_of_mut!((*exit_ctx).comm); + aya_ebpf::helpers::gen::bpf_get_current_comm( + comm_ptr as *mut core::ffi::c_void, + pinchy_common::COMM_LEN as u32, + ); + } + } +} + #[map(name = "SYSCALL_TAILCALLS")] static mut SYSCALL_TAILCALLS: ProgramArray = ProgramArray::pinned(512, 0); @@ -184,6 +228,8 @@ pub fn pinchy_exit(ctx: TracePointContext) -> u32 { return Ok(0); } + fill_exit_context(&ctx, syscall_nr); + unsafe { let Err(_) = SYSCALL_TAILCALLS.tail_call(&ctx, syscall_nr as u32); error!(&ctx, "failed tailcall for syscall {}", syscall_nr); @@ -800,6 +846,32 @@ pub fn syscall_exit_trivial(ctx: TracePointContext) -> u32 { }, )?; } + _ => { + trace!(&ctx, "unknown syscall {}", syscall_nr); + } + } + + Ok(()) + } + match inner(ctx) { + Ok(_) => 0, + Err(ret) => ret, + } +} + +// Split from syscall_exit_trivial: with every submit site inlined, a single +// program holding all trivial syscalls exceeds the BPF branch offset range +// on x86_64 ("Branch target out of insn range" at link time). Each syscall +// is registered against exactly one of the two programs in the tailcall +// array. +#[tracepoint] +pub fn syscall_exit_trivial2(ctx: TracePointContext) -> u32 { + fn inner(ctx: TracePointContext) -> Result<(), u32> { + let syscall_nr = get_syscall_nr(&ctx)?; + let args = get_args(&ctx, syscall_nr)?; + let return_value = util::get_return_value(&ctx)?; + + match syscall_nr { syscalls::SYS_pidfd_getfd => { crate::util::submit_compact_payload::( &ctx, diff --git a/pinchy-ebpf/src/util.rs b/pinchy-ebpf/src/util.rs index 3e0d406..a031f73 100644 --- a/pinchy-ebpf/src/util.rs +++ b/pinchy-ebpf/src/util.rs @@ -11,7 +11,7 @@ use pinchy_common::{ WireEventHeader, WIRE_VERSION, }; -use crate::{ENTER_MAP, EVENTS, SYSCALL_OFFSET, SYSCALL_RETURN_OFFSET}; +use crate::{ENTER_MAP, EVENTS, EXIT_CONTEXT, SYSCALL_OFFSET, SYSCALL_RETURN_OFFSET}; mod efficiency { #[cfg(feature = "efficiency-metrics")] @@ -144,11 +144,13 @@ pub fn get_args(ctx: &TracePointContext, expected_syscall_nr: i64) -> Result<[us return Err(1); } - // Copy the part of the enter data we care about. The entry stays in the - // map so submit_compact_payload() can compute the syscall duration; it - // is removed there (or overwritten by the tid's next syscall enter). + // Copy the part of the enter data we care about... let args = enter_data.args; + // Then remove the item from the map. The syscall duration was already + // computed from the entry in fill_exit_context() before the tail call. + let _ = unsafe { ENTER_MAP.remove(&tid) }; + Ok(args) } @@ -193,17 +195,13 @@ where let event = unsafe { event.assume_init_mut() as *mut WireCompactPayload as *mut u8 }; let header_ptr = event as *mut WireEventHeader; - let tid = ctx.pid(); - let mut duration_ns = 0u64; - - if let Some(enter_data) = unsafe { ENTER_MAP.get(&tid) } { - if enter_data.syscall_nr == syscall_nr { - duration_ns = unsafe { aya_ebpf::helpers::bpf_ktime_get_ns() } - .saturating_sub(enter_data.enter_ns); - - let _ = unsafe { ENTER_MAP.remove(&tid) }; - } - } + // The duration and process name were captured once per exit in + // fill_exit_context(); a per-CPU read here keeps this (heavily + // monomorphized) function small. + let (duration_ns, comm) = match unsafe { EXIT_CONTEXT.get(0) } { + Some(exit_ctx) => (exit_ctx.duration_ns, exit_ctx.comm), + None => (0, [0u8; pinchy_common::COMM_LEN]), + }; unsafe { header_ptr.write(WireEventHeader { @@ -211,9 +209,10 @@ where payload_len: core::mem::size_of::() as u32, syscall_nr, pid: ctx.tgid(), - tid, + tid: ctx.pid(), return_value, duration_ns, + comm, }); } diff --git a/pinchy/src/client.rs b/pinchy/src/client.rs index a1d93b3..81c83b1 100644 --- a/pinchy/src/client.rs +++ b/pinchy/src/client.rs @@ -197,13 +197,27 @@ async fn main() -> Result<()> { // Read everything there is to read, the server will close the write end // of the pipe - relay_to_sink(fd, style, args.output, args.syscall_times).await?; + relay_to_sink( + fd, + style, + args.output, + args.syscall_times, + args.follow_forks, + ) + .await?; pinchy_client::cleanup_and_quit(pid); } else if let Some(pid) = args.pid { let fd = pinchy_client::attach(pid, syscalls, args.follow_forks).await; - relay_to_sink(fd, style, args.output, args.syscall_times).await?; + relay_to_sink( + fd, + style, + args.output, + args.syscall_times, + args.follow_forks, + ) + .await?; } else { // Print clap's usage message to stderr and exit eprintln!("{}", Args::command().render_help()); @@ -220,15 +234,24 @@ async fn relay_to_sink( style: FormattingStyle, output: Option, syscall_times: bool, + annotate_comm: bool, ) -> Result<()> { match output { Some(path) => { let file = tokio::fs::File::create(&path).await?; - relay_trace(fd, style, file, false, syscall_times).await + relay_trace(fd, style, file, false, syscall_times, annotate_comm).await } None => { let is_terminal = std::io::stderr().is_terminal(); - relay_trace(fd, style, tokio::io::stderr(), is_terminal, syscall_times).await + relay_trace( + fd, + style, + tokio::io::stderr(), + is_terminal, + syscall_times, + annotate_comm, + ) + .await } } } @@ -239,6 +262,7 @@ async fn relay_trace( sink: W, sink_is_terminal: bool, syscall_times: bool, + annotate_comm: bool, ) -> Result<()> { let mut reader = tokio::io::BufReader::with_capacity( 64 * 1024, @@ -310,6 +334,10 @@ async fn relay_trace( formatter = formatter.with_duration(header.duration_ns); } + if annotate_comm { + formatter = formatter.with_comm(header.comm); + } + events::handle_event(&header, &payload, formatter).await?; sink.write_all(&output).await?; diff --git a/pinchy/src/formatting.rs b/pinchy/src/formatting.rs index 32a6b22..533b4a5 100644 --- a/pinchy/src/formatting.rs +++ b/pinchy/src/formatting.rs @@ -71,6 +71,7 @@ pub struct Formatter<'f> { style: FormattingStyle, output: Pin<&'f mut dyn AsyncWrite>, duration_ns: Option, + comm: Option<[u8; pinchy_common::COMM_LEN]>, } impl<'f> Formatter<'f> { @@ -79,6 +80,7 @@ impl<'f> Formatter<'f> { style, output, duration_ns: None, + comm: None, } } @@ -89,14 +91,40 @@ impl<'f> Formatter<'f> { self } + // When set, push_syscall() annotates the PID with the process name: + // `1234`. Used when following forks, where lines from + // several processes interleave. + pub fn with_comm(mut self, comm: [u8; pinchy_common::COMM_LEN]) -> Self { + self.comm = Some(comm); + self + } + pub async fn push_syscall(mut self, pid: u32, syscall_nr: i64) -> Result> { let syscall_name = syscall_name_from_nr(syscall_nr) .ok_or_else(|| anyhow!(format!("Unknown syscall: {syscall_nr}")))?; + let comm = self.comm; let output = &mut self.output; output.write_all(pid.to_string().as_bytes()).await?; + if let Some(mut comm) = comm { + let len = comm.iter().position(|&b| b == 0).unwrap_or(comm.len()); + + // comm is set by the traced process (prctl PR_SET_NAME) and can + // hold arbitrary bytes; neutralize anything that could carry a + // terminal escape sequence. + for byte in comm[..len].iter_mut() { + if !byte.is_ascii_graphic() && *byte != b' ' { + *byte = b'?'; + } + } + + output.write_all(b"<").await?; + output.write_all(&comm[..len]).await?; + output.write_all(b">").await?; + } + match self.style { FormattingStyle::OneLine => output.write_all(b" ").await?, FormattingStyle::MultiLine => output.write_all(b"\n\t").await?, diff --git a/pinchy/src/server.rs b/pinchy/src/server.rs index 24455c8..0536c52 100644 --- a/pinchy/src/server.rs +++ b/pinchy/src/server.rs @@ -396,14 +396,6 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { const TRIVIAL_SYSCALLS: &[i64] = &[ syscalls::SYS_dup3, syscalls::SYS_fcntl, - syscalls::SYS_sched_yield, - syscalls::SYS_getpid, - syscalls::SYS_gettid, - syscalls::SYS_getuid, - syscalls::SYS_geteuid, - syscalls::SYS_getgid, - syscalls::SYS_getegid, - syscalls::SYS_getppid, syscalls::SYS_brk, syscalls::SYS_mprotect, syscalls::SYS_getrandom, @@ -414,12 +406,9 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_rt_tgsigqueueinfo, syscalls::SYS_fchdir, syscalls::SYS_exit_group, - syscalls::SYS_rt_sigreturn, syscalls::SYS_dup, #[cfg(target_arch = "x86_64")] syscalls::SYS_dup2, - syscalls::SYS_sync, - syscalls::SYS_setsid, syscalls::SYS_setuid, syscalls::SYS_setgid, syscalls::SYS_close_range, @@ -427,7 +416,6 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_getsid, syscalls::SYS_setpgid, syscalls::SYS_umask, - syscalls::SYS_vhangup, syscalls::SYS_ioprio_get, syscalls::SYS_ioprio_set, syscalls::SYS_setregid, @@ -436,10 +424,6 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_setreuid, #[cfg(target_arch = "x86_64")] syscalls::SYS_alarm, - #[cfg(target_arch = "x86_64")] - syscalls::SYS_pause, - #[cfg(target_arch = "x86_64")] - syscalls::SYS_getpgrp, syscalls::SYS_personality, syscalls::SYS_getpriority, syscalls::SYS_setpriority, @@ -462,6 +446,28 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_fchmod, syscalls::SYS_fchown, syscalls::SYS_flock, + syscalls::SYS_pidfd_open, + syscalls::SYS_process_mrelease, + syscalls::SYS_inotify_init1, + syscalls::SYS_inotify_rm_watch, + ]; + const TRIVIAL2_SYSCALLS: &[i64] = &[ + syscalls::SYS_sched_yield, + syscalls::SYS_getpid, + syscalls::SYS_gettid, + syscalls::SYS_getuid, + syscalls::SYS_geteuid, + syscalls::SYS_getgid, + syscalls::SYS_getegid, + syscalls::SYS_getppid, + syscalls::SYS_rt_sigreturn, + syscalls::SYS_sync, + syscalls::SYS_setsid, + syscalls::SYS_vhangup, + #[cfg(target_arch = "x86_64")] + syscalls::SYS_pause, + #[cfg(target_arch = "x86_64")] + syscalls::SYS_getpgrp, syscalls::SYS_futex_wake, syscalls::SYS_map_shadow_stack, syscalls::SYS_lsm_set_self_attr, @@ -469,9 +475,7 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { #[cfg(target_arch = "x86_64")] syscalls::SYS_epoll_create, syscalls::SYS_epoll_create1, - syscalls::SYS_pidfd_open, syscalls::SYS_pidfd_getfd, - syscalls::SYS_process_mrelease, syscalls::SYS_mlock, syscalls::SYS_mlock2, syscalls::SYS_mlockall, @@ -492,9 +496,7 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_eventfd2, #[cfg(target_arch = "x86_64")] syscalls::SYS_inotify_init, - syscalls::SYS_inotify_init1, syscalls::SYS_set_mempolicy_home_node, - syscalls::SYS_inotify_rm_watch, syscalls::SYS_timer_delete, syscalls::SYS_timer_getoverrun, #[cfg(target_arch = "x86_64")] @@ -553,6 +555,19 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { explicitly_supported.insert(syscall_nr); } + // Second half of the trivial syscalls; see the eBPF side for why the + // handler is split in two. + let prog: &mut aya::programs::TracePoint = ebpf + .program_mut("syscall_exit_trivial2") + .context("missing syscall_exit_trivial2 tailcall")? + .try_into()?; + prog.load() + .context("trying to load syscall_exit_trivial2 into eBPF")?; + for &syscall_nr in TRIVIAL2_SYSCALLS { + prog_array.set(syscall_nr as u32, prog.fd()?, 0)?; + explicitly_supported.insert(syscall_nr); + } + // execve and execveat share the same handler, see eBPF side comments for details. let prog: &mut aya::programs::TracePoint = ebpf .program_mut("syscall_exit_execve") diff --git a/pinchy/src/tests/mod.rs b/pinchy/src/tests/mod.rs index 8ebbead..fec294b 100644 --- a/pinchy/src/tests/mod.rs +++ b/pinchy/src/tests/mod.rs @@ -30,6 +30,7 @@ pub(crate) fn make_compact_test_data( tid: pid, return_value, duration_ns: 0, + comm: [0u8; pinchy_common::COMM_LEN], }; let payload = unsafe { @@ -87,6 +88,31 @@ async fn syscall_duration_suffix() { ); } +#[::tokio::test] +async fn syscall_comm_annotation() { + let data = pinchy_common::CloseData { fd: 3 }; + let (mut header, payload) = + make_compact_test_data(pinchy_common::syscalls::SYS_close, 100, 0, &data); + + let comm = b"test-helper"; + header.comm[..comm.len()].copy_from_slice(comm); + + let mut output: Vec = vec![]; + let pin_output = unsafe { std::pin::Pin::new_unchecked(&mut output) }; + let formatter = + crate::formatting::Formatter::new(pin_output, crate::formatting::FormattingStyle::OneLine) + .with_comm(header.comm); + + crate::events::handle_event(&header, &payload, formatter) + .await + .unwrap(); + + assert_eq!( + String::from_utf8_lossy(&output), + "100 close(fd: 3) = 0 (success)\n" + ); +} + syscall_test!( parse_generic_syscall, { diff --git a/pinchy/tests/integration.rs b/pinchy/tests/integration.rs index 27f6867..c620910 100644 --- a/pinchy/tests/integration.rs +++ b/pinchy/tests/integration.rs @@ -362,8 +362,9 @@ fn follow_forks() { // follow-forks trace sees the child's openat. let handle = run_workload_with_args(&pinchy, &["openat"], "fork_test", &["-f"]); + // Lines are annotated with the process name when following forks. let expected_output = escaped_regex(indoc! {r#" - @PID@ openat(dfd: AT_FDCWD, pathname: "/dev/null", flags: 0x0 (O_RDONLY), mode: 0) = @NUMBER@ (fd) + @PID@ openat(dfd: AT_FDCWD, pathname: "/dev/null", flags: 0x0 (O_RDONLY), mode: 0) = @NUMBER@ (fd) "#}); let output = handle.join().unwrap(); From a5e492f084094c974ab1e217ba1c7031eac4eb1d Mon Sep 17 00:00:00 2001 From: Gustavo Noronha Silva Date: Wed, 10 Jun 2026 10:50:32 -0300 Subject: [PATCH 3/3] Format the follow-forks process name as a fixed-width column One-line traces now show the process name as a fixed-width bracketed field after the PID (`1317891 [ls ] brk(...)`). The field is 8 characters wide to keep the columns compact; longer names are truncated. The multi-line style instead puts the name unpadded on the header line (`1322191 sh`), bounded at 65 bytes for when richer process information (full command line, terminal-width awareness) uses the room later. Co-Authored-By: Claude Fable 5 --- README.md | 4 ++-- pinchy/src/formatting.rs | 35 +++++++++++++++++++++++++++++------ pinchy/src/tests/mod.rs | 29 ++++++++++++++++++++++++++++- pinchy/tests/integration.rs | 2 +- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4a24245..9caff5f 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,8 @@ Follow child processes created by the tracee (like `strace -f`): ```shell pinchy -f -- sh -c 'ls | wc -l' ``` -Trace lines are annotated with the process name (`1234`) so interleaved -output stays readable. The daemon learns about new children from the parent's +Trace lines are annotated with a fixed-width process name field +(`1234 [sh ]`) so interleaved output stays readable. The daemon learns about new children from the parent's fork/clone exit, so a child's very first syscalls may be missed. Show the time spent in each syscall (like `strace -T`): diff --git a/pinchy/src/formatting.rs b/pinchy/src/formatting.rs index 533b4a5..3bb43fe 100644 --- a/pinchy/src/formatting.rs +++ b/pinchy/src/formatting.rs @@ -91,9 +91,9 @@ impl<'f> Formatter<'f> { self } - // When set, push_syscall() annotates the PID with the process name: - // `1234`. Used when following forks, where lines from - // several processes interleave. + // When set, push_syscall() annotates the PID with the process name. + // Used when following forks, where lines from several processes + // interleave. pub fn with_comm(mut self, comm: [u8; pinchy_common::COMM_LEN]) -> Self { self.comm = Some(comm); self @@ -103,6 +103,15 @@ impl<'f> Formatter<'f> { let syscall_name = syscall_name_from_nr(syscall_nr) .ok_or_else(|| anyhow!(format!("Unknown syscall: {syscall_nr}")))?; + // Width of the one-line `[comm]` field; longer names are truncated + // to keep the columns compact. + const COMM_DISPLAY_WIDTH: usize = pinchy_common::COMM_LEN / 2; + + // Bound for the process name on multi-line headers; comm is much + // shorter today, but richer process information (full command line, + // terminal-width awareness) may use the room later. + const MULTILINE_COMM_MAX: usize = 65; + let comm = self.comm; let output = &mut self.output; @@ -120,9 +129,23 @@ impl<'f> Formatter<'f> { } } - output.write_all(b"<").await?; - output.write_all(&comm[..len]).await?; - output.write_all(b">").await?; + match self.style { + FormattingStyle::OneLine => { + let len = len.min(COMM_DISPLAY_WIDTH); + let mut field = [b' '; COMM_DISPLAY_WIDTH]; + field[..len].copy_from_slice(&comm[..len]); + + output.write_all(b" [").await?; + output.write_all(&field).await?; + output.write_all(b"]").await?; + } + FormattingStyle::MultiLine => { + output.write_all(b" ").await?; + output + .write_all(&comm[..len.min(MULTILINE_COMM_MAX)]) + .await?; + } + } } match self.style { diff --git a/pinchy/src/tests/mod.rs b/pinchy/src/tests/mod.rs index fec294b..52c6a27 100644 --- a/pinchy/src/tests/mod.rs +++ b/pinchy/src/tests/mod.rs @@ -109,7 +109,34 @@ async fn syscall_comm_annotation() { assert_eq!( String::from_utf8_lossy(&output), - "100 close(fd: 3) = 0 (success)\n" + "100 [test-hel] close(fd: 3) = 0 (success)\n" + ); +} + +#[::tokio::test] +async fn syscall_comm_annotation_multiline() { + let data = pinchy_common::CloseData { fd: 3 }; + let (mut header, payload) = + make_compact_test_data(pinchy_common::syscalls::SYS_close, 100, 0, &data); + + let comm = b"test-helper"; + header.comm[..comm.len()].copy_from_slice(comm); + + let mut output: Vec = vec![]; + let pin_output = unsafe { std::pin::Pin::new_unchecked(&mut output) }; + let formatter = crate::formatting::Formatter::new( + pin_output, + crate::formatting::FormattingStyle::MultiLine, + ) + .with_comm(header.comm); + + crate::events::handle_event(&header, &payload, formatter) + .await + .unwrap(); + + assert_eq!( + String::from_utf8_lossy(&output), + "100 test-helper\n\tclose(\n\t fd: 3\n\t) = 0 (success)\n" ); } diff --git a/pinchy/tests/integration.rs b/pinchy/tests/integration.rs index c620910..1117706 100644 --- a/pinchy/tests/integration.rs +++ b/pinchy/tests/integration.rs @@ -364,7 +364,7 @@ fn follow_forks() { // Lines are annotated with the process name when following forks. let expected_output = escaped_regex(indoc! {r#" - @PID@ openat(dfd: AT_FDCWD, pathname: "/dev/null", flags: 0x0 (O_RDONLY), mode: 0) = @NUMBER@ (fd) + @PID@ [test-hel] openat(dfd: AT_FDCWD, pathname: "/dev/null", flags: 0x0 (O_RDONLY), mode: 0) = @NUMBER@ (fd) "#}); let output = handle.join().unwrap();