Skip to content

Commit 189bfb9

Browse files
committed
feat: Improve execve trace parsing and event handling
- Add robust execve trace parsing with memory pointer resolution - Implement fallback mechanisms for command resolution - Add UnparsedEvent handling in event processing - Fix event aggregation to handle raw trace lines - Update summary generation to filter invalid events - Add memory reading utilities for process inspection This commit improves the reliability of execve trace parsing by: 1. Adding support for pointer-based execve traces 2. Implementing memory reading to resolve command names 3. Adding proper fallback to /proc/<pid>/cmdline 4. Gracefully handling unparsed events in the pipeline 5. Ensuring event aggregation and summary generation work with mixed event types The changes maintain backward compatibility while adding support for newer trace formats that only provide memory pointers.
1 parent 36a9657 commit 189bfb9

3 files changed

Lines changed: 105 additions & 9 deletions

File tree

linux_edr/app.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from .config import Config
1212
from .report_manager import ReportManager
1313
from .models import Cell
14-
from .domain.models.events import BaseSyscallEvent, ExecveEvent
14+
from .domain.models.events import BaseSyscallEvent, ExecveEvent, UnparsedEvent
1515

1616

1717
def setup_logging(debug: bool = False) -> None:
@@ -327,12 +327,20 @@ def _process_event(self, evt: BaseSyscallEvent) -> None:
327327
if self.verbose_debug:
328328
self._log_debug_event(evt)
329329

330-
# If the trace reader already produced a validated ExecveEvent model, buffer it directly.
331330
if isinstance(evt, BaseSyscallEvent):
331+
# Parsed syscall event – store its dict representation.
332332
self.agg.add(evt.model_dump() if hasattr(evt, "model_dump") else evt.dict())
333333
return
334-
else:
335-
logging.warning(f"Invalid event type: {type(evt)}")
334+
335+
# Handle unparsed trace lines gracefully.
336+
if isinstance(evt, UnparsedEvent):
337+
# Optionally buffer raw lines for troubleshooting; they will be ignored by most
338+
# downstream processing since they lack a "command" key.
339+
self.agg.add({"raw_line": evt.raw_line})
340+
return
341+
342+
# For truly unexpected types, log a warning once.
343+
logging.warning(f"Invalid event type: {type(evt)}")
336344

337345
def _log_debug_event(self, evt: BaseSyscallEvent) -> None:
338346
"""

linux_edr/summary.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,12 @@ def build_summary(
2121
now = datetime.now(timezone.utc)
2222
start = now - timedelta(minutes=window_minutes)
2323

24-
# Count occurrences of each command
25-
counts = Counter(evt["command"] for evt in events)
24+
# Count occurrences of each command, ignoring events without a command field (e.g., UnparsedEvent)
25+
counts = Counter(
26+
evt["command"]
27+
for evt in events
28+
if isinstance(evt, dict) and evt.get("command")
29+
)
2630
proc_summary: Dict[str, int] = dict(counts)
2731

2832
report_data = {

linux_edr/trace.py

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import logging
66
import time
77
import re
8-
from typing import Generator, Optional, Union
8+
import mmap
9+
from typing import Generator, Optional, Union, Tuple, List
910

1011
# Default path to the kernel's trace_pipe
1112
TRACE_PATH = "/sys/kernel/tracing/trace_pipe"
@@ -24,6 +25,79 @@
2425
ExecveEvent, ForkEvent, CloneEvent, ConnectEvent, BaseSyscallEvent, UnparsedEvent
2526
)
2627

28+
# ----------------------- Helpers to resolve execve pointers -----------------------
29+
30+
def _read_string_from_mem(pid: int, address: int, max_len: int = 4096) -> Optional[str]:
31+
"""Attempt to read a NUL-terminated string from another process's memory.
32+
33+
Requires sufficient privileges (typically root / CAP_SYS_PTRACE).
34+
35+
Args:
36+
pid: Process ID whose memory to read.
37+
address: Address of the string.
38+
max_len: Maximum number of bytes to read.
39+
40+
Returns:
41+
Decoded string if successful, otherwise None.
42+
"""
43+
try:
44+
with open(f"/proc/{pid}/mem", "rb", buffering=0) as mem_fd:
45+
mem_fd.seek(address)
46+
data = mem_fd.read(max_len)
47+
48+
if not data:
49+
return None
50+
51+
nul_idx = data.find(b"\x00")
52+
if nul_idx != -1:
53+
data = data[:nul_idx]
54+
55+
return data.decode("utf-8", errors="replace")
56+
except Exception:
57+
return None
58+
59+
def _resolve_execve_cmd(pid: int, filename_ptr: str) -> Tuple[str, List[str]]:
60+
"""Resolve the filename and arguments for an execve trace when only pointers are present.
61+
62+
Strategy:
63+
1. Attempt to read the filename string via /proc/<pid>/mem using the supplied pointer.
64+
2. Fallback to /proc/<pid>/cmdline which usually contains argv contents.
65+
66+
Args:
67+
pid: The PID from the trace event.
68+
filename_ptr: Hex string pointer to filename (may include trailing comma).
69+
70+
Returns:
71+
Tuple of (command, args list). Unknown values will be "<unknown>" or empty list.
72+
"""
73+
# Clean pointer string and convert to int
74+
filename_ptr = filename_ptr.strip().rstrip(",")
75+
cmd: str = "<unknown>"
76+
args: List[str] = []
77+
78+
try:
79+
address = int(filename_ptr, 16)
80+
if address:
81+
if (s := _read_string_from_mem(pid, address)):
82+
cmd = os.path.basename(s)
83+
except Exception:
84+
pass
85+
86+
# If we still don't have a usable cmd, fallback to cmdline
87+
if cmd == "<unknown>":
88+
try:
89+
with open(f"/proc/{pid}/cmdline", "rb") as f:
90+
data = f.read()
91+
if data:
92+
parts = data.split(b"\x00")
93+
if parts:
94+
cmd = os.path.basename(parts[0].decode("utf-8", errors="replace"))
95+
args = [p.decode("utf-8", errors="replace") for p in parts[1:] if p]
96+
except Exception:
97+
pass
98+
99+
return cmd, args
100+
27101
class TraceReader:
28102
"""
29103
Non-blocking reader for trace_pipe via selectors.
@@ -138,8 +212,18 @@ def _parse_line(self, line: str) -> Optional[BaseSyscallEvent]:
138212
ts, pid_str, cmd_args = m.groups()
139213
pid = int(pid_str)
140214
parts = cmd_args.split() if cmd_args else []
141-
if parts:
142-
return ExecveEvent(timestamp=ts, pid=pid, command=parts[0].strip('"'), args=parts[1:])
215+
if not parts:
216+
return None
217+
218+
# Typical trace contents: "filename: 7ffcb..., argv: 7ff..., envp: ..."
219+
if parts[0].startswith("filename:"):
220+
# Extract pointer after "filename:" which is at index 1
221+
filename_ptr = parts[1] if len(parts) > 1 else "0"
222+
command, argv = _resolve_execve_cmd(pid, filename_ptr)
223+
return ExecveEvent(timestamp=ts, pid=pid, command=command, args=argv)
224+
225+
# Fallback: treat the first token as command as before
226+
return ExecveEvent(timestamp=ts, pid=pid, command=parts[0].strip('"'), args=parts[1:])
143227

144228
# fork
145229
if m := FORK_PATTERN.search(line):

0 commit comments

Comments
 (0)