diff --git a/README.md b/README.md index 34e39f0..722ce91 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,9 @@ A demo of how to record an application in a Kubernetes pod using live-record. [**Load Debug Symbols**](load_debug_symbols/README.md) Loads debug symbols by parsing the relevant section addresses. +[**Process Tree**](process_tree/README.md) +Visualizes process trees from .undo recording files showing parent-child relationships. + [**Reconstruct file**](reconstruct_file/README.md) Reconstructs the content of a file by analysing reads on the execution history of a debugged program or LiveRecorder recording. diff --git a/follow_fork/README.md b/follow_fork/README.md index a6bf6d1..a5520be 100644 --- a/follow_fork/README.md +++ b/follow_fork/README.md @@ -1,6 +1,9 @@ Follow `fork()` =============== +**This addon has been superceded by the `--on-fork` options to live-record in Undo 9.1. Please +use `--on-fork` if you have Undo 9.1 or later.** + Often, when recording, engineers might want to record both the parent and the children generated during the run. This pre-load library intercepts the calls to 'fork()' and calls the diff --git a/private/manifest.json b/private/manifest.json index 93f69cb..787ec43 100644 --- a/private/manifest.json +++ b/private/manifest.json @@ -63,6 +63,13 @@ "version_min": "7.0.0", "help": "whatmap EXPRESSION -- Locate memory map containing EXPRESSION.\n\n\nExamples:\n\nwhatmap my_variable: looks up the map where my_variable is stored.\n\nwhatmap *0x1234: looks up the map containing the address 0x1234." }, + "process-tree": { + "description": "Visualizes process trees from Undo recordings showing parent-child relationships.", + "repo": "addons", + "script": "process_tree/process_tree.py", + "version_min": "9.1.0", + "help": "process-tree RECORDINGS_DIR [OPTIONS] -- Visualize process tree from Undo recordings.\n\n\nBy default, only ASCII tree output is shown. Use --output-svg to also generate an SVG.\n\n\nOptions:\n\n--output-svg FILE: Output SVG file path (generates SVG in addition to ASCII)\n\n\nExamples:\n\nprocess-tree /path/to/recordings\n\nprocess-tree /path/to/recordings --output-svg tree.svg" + }, "altui": { "description": "Altui provides a modern and user-friendly alternative to plain UDB and to TUI mode.", "repo": "altui", diff --git a/process_tree/README.md b/process_tree/README.md new file mode 100644 index 0000000..2340b12 --- /dev/null +++ b/process_tree/README.md @@ -0,0 +1,72 @@ +# Process Tree + +Visualizes process trees from Undo recordings. Shows both ASCII tree output and SVG timeline diagrams of parent-child process relationships. + +Note: this addon was created with Claude Code and has had only minimal human review and testing. + +## Usage + +This addon can be used in two ways: + +### As a UDB Command + +Before using the command it must be loaded into the debugger: +``` +extend process-tree +``` + +Then use the command: +``` +process-tree RECORDINGS_DIR [--output-svg FILE] +``` + +**Arguments:** +- `RECORDINGS_DIR`: Directory containing .undo recordings. + +**Options:** +- `--output-svg FILE`: Output SVG file path + +**Note:** By default, only ASCII tree output is shown. Use `--output-svg` to also generate an SVG visualization. + +### As a Standalone Script + +```bash +./process_tree.py RECORDINGS_DIR [--output-svg FILE] +``` + +## Examples + +**Basic usage** (shows ASCII output only): +``` +process-tree /path/to/recordings +``` + +**Generate SVG visualization**: +``` +process-tree /path/to/recordings --output-svg my_tree.svg +``` + +## Output + +The addon can generate two types of visualizations: + +### ASCII Tree +A hierarchical text representation of the process tree showing parent-child relationships: +``` +Process Tree Visualization: +================================================== +└── PID 1234 (recording_0001.undo) + ├── PID 1235 (recording_0002.undo) + │ └── PID 1237 (recording_0004.undo) + └── PID 1236 (recording_0003.undo) +``` + +### SVG Timeline +A visual timeline diagram showing: +- Horizontal timeline for each process +- Fork points showing where new processes are created +- Process IDs and recording file names +- Parent-child relationships with connecting lines + +The SVG file can be viewed in any web browser or image viewer. + diff --git a/process_tree/process_tree.py b/process_tree/process_tree.py new file mode 100755 index 0000000..f919b4d --- /dev/null +++ b/process_tree/process_tree.py @@ -0,0 +1,551 @@ +#!/usr/bin/env python3 +""" +Process Tree Visualizer + +Reads Undo recordings and generates a process tree visualization. +Shows both ASCII tree output and SVG timeline diagrams. + +Can be used as a standalone script or as a GDB command. +""" + +from __future__ import annotations + +import argparse +import json +import shutil +import subprocess +import sys +import xml.etree.ElementTree as ET +from collections.abc import Iterable +from dataclasses import dataclass, field +from pathlib import Path + + +try: + import gdb + + HAS_GDB = True +except ImportError: + HAS_GDB = False + + +def check_undo_available() -> None: + """Check if 'undo' executable is available on PATH.""" + if not shutil.which("undo"): + raise FileNotFoundError( + "Error: 'undo' executable not found. " + "Please ensure 'undo' is installed and available on your PATH." + ) + + +@dataclass(frozen=True) +class ForkPosition: + """Represents a fork point in the process tree layout.""" + + fork_x: int + child_pid: int + child_start_x: int + + +@dataclass +class LayoutInfo: + """Layout information for a process in the SVG visualization.""" + + y: int + line_start_x: int = 0 + fork_positions: list[ForkPosition] = field(default_factory=list) + + +@dataclass +class Process: + """Represents a single process in the tree.""" + + pid: int + ppid: int | None + recording_file: str + start_time: float = 0.0 + children: list[Process] = field(default_factory=list) + + @classmethod + def from_recording(cls, recording_file: Path) -> Process | None: + """Create a Process from an Undo recording file. + + Returns None if the recording cannot be parsed. + """ + try: + # Get all recording data at once + cmd = ["undo", "recording-json", str(recording_file)] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + data = json.loads(result.stdout) + + # Extract process IDs + pid = data["debuggee"]["state_load_rchild_pid"] + ppid = data["debuggee"]["rchild_ppid"] + + # Extract start time + utc_start = data["header"]["utc_start"] + utc_start_ns = data["header"]["utc_start_ns"] + start_time = float(utc_start) + float(utc_start_ns) / 1_000_000_000 + + return cls( + pid=pid, + ppid=ppid, + recording_file=str(recording_file), + start_time=start_time, + ) + + except subprocess.CalledProcessError as e: + print(f"Error processing {recording_file}: {e}", file=sys.stderr) + if e.stderr: + print(f" stderr: {e.stderr.strip()}", file=sys.stderr) + return None + except (json.JSONDecodeError, KeyError, ValueError, TypeError) as e: + print(f"Error processing {recording_file}: {e}", file=sys.stderr) + return None + + +class ProcessTree: + """Represents and manages a tree of processes.""" + + def __init__(self): + self.processes: dict[int, Process] = {} + self.root: Process | None = None + + def add_process(self, process: Process) -> None: + """Add a process to the tree. + + If a process with the same PID already exists, a warning is printed and + the new process replaces the old one. This can happen if multiple recordings + share the same PID (e.g., from different recording sessions saved to the + same directory). + """ + if process.pid in self.processes: + existing = self.processes[process.pid] + print( + f"Warning: Duplicate PID {process.pid} found: " + f"{Path(existing.recording_file).name} and {Path(process.recording_file).name}. " + f"This usually means multiple recordings have been saved from the same session. " + f"Using {Path(process.recording_file).name}.", + file=sys.stderr, + ) + self.processes[process.pid] = process + + def build_relationships(self) -> None: + """Build parent-child relationships and find root process.""" + # Link children to parents + for process in self.processes.values(): + if process.ppid is not None and process.ppid in self.processes: + parent = self.processes[process.ppid] + parent.children.append(process) + + # Find root process (one with no parent in our dataset) + roots = [ + p for p in self.processes.values() if p.ppid is None or p.ppid not in self.processes + ] + + if len(roots) != 1: + print( + f"Warning: Found {len(roots)} root processes, expected 1", + file=sys.stderr, + ) + if not roots: + raise ValueError("No root process found - cannot build process tree") + + self.root = roots[0] + self._sort_children_by_start_time() + + def _sort_children_by_start_time(self) -> None: + """Sort all children by their start time (chronological order).""" + for process in self.processes.values(): + process.children.sort(key=lambda p: p.start_time) + + +class ASCIIRenderer: + """Renders process tree as ASCII art.""" + + def render(self, tree: ProcessTree) -> None: + """Generate ASCII art visualization of the process tree.""" + if not tree.root: + print("No root process found") + return + + print("\nProcess Tree Visualization:") + print("=" * 50) + self._render_process(tree.root, "", True) + + def _render_process(self, process: Process, prefix: str, is_last: bool) -> None: + """Recursively render a process and its children.""" + # Print current process + connector = "└── " if is_last else "├── " + filename = Path(process.recording_file).name + print(f"{prefix}{connector}PID {process.pid} ({filename})") + + # Update prefix for children + child_prefix = prefix + (" " if is_last else "│ ") + + # Print children + for i, child in enumerate(process.children): + is_child_last = i == len(process.children) - 1 + self._render_process(child, child_prefix, is_child_last) + + +class SVGRenderer: + """Renders process tree as SVG timeline diagram.""" + + def __init__(self): + # Layout parameters + self.line_height = 80 + self.line_length = 600 + self.line_start_x = 120 + self.margin_top = 50 + self.margin_bottom = 30 + self.fork_spacing = 100 + self.fork_offset = 150 + + def render(self, tree: ProcessTree, output_file: str) -> None: + """Generate SVG visualization of the process tree.""" + if not tree.root: + print("No root process found") + return + + layout = self._calculate_layout(tree) + svg_width, svg_height = self._calculate_dimensions(layout) + + # Create SVG + svg = self._create_svg_element(svg_width, svg_height) + self._add_styles(svg) + + # Draw elements + self._draw_process_lines(svg, tree.processes.values(), layout) + self._draw_fork_connections(svg, tree.processes.values(), layout) + + # Save file + self._save_svg(svg, output_file) + + def _calculate_layout(self, tree: ProcessTree) -> dict[int, LayoutInfo]: + """Calculate positions for all processes and their forks.""" + assert tree.root is not None, "tree.root must not be None" + layout = {} + + # Calculate Y positions for each process + y_positions = {} + current_y = 0 + + def assign_y_positions(process: Process): + nonlocal current_y + y_positions[process.pid] = current_y + current_y += 1 + + for child in process.children: + assign_y_positions(child) + + assign_y_positions(tree.root) + + # Convert to layout structure + for pid, y_index in y_positions.items(): + layout[pid] = LayoutInfo(y=self.margin_top + y_index * self.line_height) + + # Now calculate X positions recursively + def calculate_x_positions(process: Process, current_x: int) -> None: + layout[process.pid].line_start_x = current_x + + # Calculate fork positions for children + if process.children: + fork_base_x = current_x + self.fork_offset + for i, child in enumerate(process.children): + fork_x = fork_base_x + i * self.fork_spacing + child_start_x = fork_x + 50 + + layout[process.pid].fork_positions.append( + ForkPosition( + fork_x=fork_x, + child_pid=child.pid, + child_start_x=child_start_x, + ) + ) + + # Recursively calculate for child + calculate_x_positions(child, child_start_x) + + calculate_x_positions(tree.root, self.line_start_x) + return layout + + def _calculate_dimensions(self, layout: dict[int, LayoutInfo]) -> tuple[int, int]: + """Calculate required SVG dimensions.""" + max_x = max(info.line_start_x + self.line_length for info in layout.values()) + max_y = max(info.y for info in layout.values()) + + width = max_x + 100 + height = max_y + self.margin_bottom + self.line_height + return width, height + + def _create_svg_element(self, width: int, height: int) -> ET.Element: + """Create the root SVG element with white background.""" + svg = ET.Element( + "svg", + width=str(width), + height=str(height), + xmlns="http://www.w3.org/2000/svg", + ) + + # Add white background + ET.SubElement(svg, "rect", width=str(width), height=str(height), fill="white") + return svg + + def _add_styles(self, svg: ET.Element) -> None: + """Add CSS styles to the SVG.""" + style = ET.SubElement(svg, "style") + style.text = """ + .process-line { stroke: black; stroke-width: 3; } + .fork-line { stroke: black; stroke-width: 2; } + .process-label { font-family: Arial, sans-serif; font-size: 14px; font-weight: bold; } + .fork-label { font-family: Arial, sans-serif; font-size: 12px; } + .filename-label { font-family: Arial, sans-serif; font-size: 10px; fill: #666; } + """ + + def _draw_process_lines( + self, + svg: ET.Element, + processes: Iterable[Process], + layout: dict[int, LayoutInfo], + ) -> None: + """Draw horizontal timeline lines for each process.""" + for process in processes: + info = layout[process.pid] + y = info.y + start_x = info.line_start_x + end_x = start_x + self.line_length + + # Main timeline + line = ET.SubElement( + svg, + "line", + x1=str(start_x), + y1=str(y), + x2=str(end_x), + y2=str(y), + ) + line.set("class", "process-line") + + # PID label (above line) + pid_label = ET.SubElement( + svg, + "text", + x=str(start_x - 90), + y=str(y - 10), + ) + pid_label.set("class", "process-label") + pid_label.text = f"PID {process.pid}" + + # Filename label (below line) + filename = Path(process.recording_file).name + filename_label = ET.SubElement( + svg, + "text", + x=str(start_x + 20), + y=str(y + 20), + ) + filename_label.set("class", "filename-label") + filename_label.text = filename + + def _draw_fork_connections( + self, + svg: ET.Element, + processes: Iterable[Process], + layout: dict[int, LayoutInfo], + ) -> None: + """Draw fork connections between parent and child processes.""" + for process in processes: + if not process.children: + continue + + parent_info = layout[process.pid] + parent_y = parent_info.y + + for fork_info in parent_info.fork_positions: + fork_x = fork_info.fork_x + child_pid = fork_info.child_pid + child_start_x = fork_info.child_start_x + child_y = layout[child_pid].y + + # Fork label + fork_label = ET.SubElement( + svg, + "text", + x=str(fork_x - 15), + y=str(parent_y - 10), + ) + fork_label.set("class", "fork-label") + fork_label.text = "fork()" + + # Vertical line down + vertical_line = ET.SubElement( + svg, + "line", + x1=str(fork_x), + y1=str(parent_y), + x2=str(fork_x), + y2=str(child_y), + ) + vertical_line.set("class", "fork-line") + + # Horizontal line to child + horizontal_line = ET.SubElement( + svg, + "line", + x1=str(fork_x), + y1=str(child_y), + x2=str(child_start_x), + y2=str(child_y), + ) + horizontal_line.set("class", "fork-line") + + def _save_svg(self, svg: ET.Element, output_file: str) -> None: + """Save SVG to file.""" + tree = ET.ElementTree(svg) + ET.indent(tree, space=" ", level=0) + tree.write(output_file, encoding="utf-8", xml_declaration=True) + print(f"SVG visualization saved to: {output_file}") + + +def load_recordings(recordings_dir: Path) -> ProcessTree: + """Load all .undo files and build process tree.""" + recording_files = list(recordings_dir.glob("*.undo")) + if not recording_files: + raise ValueError(f"No .undo files found in {recordings_dir}") + + print(f"Found {len(recording_files)} recording files") + + tree = ProcessTree() + for recording_file in recording_files: + process = Process.from_recording(recording_file) + if process is not None: + tree.add_process(process) + print( + f"Loaded: PID {process.pid}, PPID {process.ppid}, " + f"Start: {process.start_time:.9f}, File: {recording_file.name}" + ) + + tree.build_relationships() + return tree + + +def visualize_process_tree(recordings_dir: Path, output_svg: str | None = None) -> None: + """Load recordings and generate visualizations.""" + tree = load_recordings(recordings_dir) + + # Generate SVG only if explicitly requested + if output_svg: + svg_renderer = SVGRenderer() + svg_renderer.render(tree, output_svg) + + # Always show ASCII output + ascii_renderer = ASCIIRenderer() + ascii_renderer.render(tree) + + +if HAS_GDB: + + class ProcessTreeCommand(gdb.Command): + """ + Visualize process trees from Undo recordings. + + Usage: process-tree RECORDINGS_DIR [--output-svg FILE] + + Arguments: + RECORDINGS_DIR: Directory containing Undo recordings + + Options: + --output-svg FILE: Output SVG file path (generates SVG in addition to ASCII) + + By default, only ASCII tree output is shown. Use --output-svg to also generate an SVG. + + Examples: + process-tree /path/to/recordings + process-tree /path/to/recordings --output-svg tree.svg + """ + + def __init__(self) -> None: + super().__init__("process-tree", gdb.COMMAND_USER, gdb.COMPLETE_FILENAME) + + def invoke(self, argument: str, _from_tty: bool) -> None: + """Execute the process-tree command.""" + if not argument: + raise gdb.GdbError("Usage: process-tree RECORDINGS_DIR [--output-svg FILE]") + + # Parse arguments + args = gdb.string_to_argv(argument) + recordings_dir = Path(args[0]).expanduser() + output_svg = None + + # Parse optional arguments + i = 1 + while i < len(args): + arg = args[i] + if arg in ("-output-svg", "--output-svg"): + if i + 1 >= len(args): + raise gdb.GdbError(f"{arg} requires a filename") + output_svg = str(Path(args[i + 1]).expanduser()) + i += 2 + else: + raise gdb.GdbError(f"Unknown argument: {arg}") + + # Validate recordings directory + if not recordings_dir.is_dir(): + raise gdb.GdbError(f"Error: {recordings_dir} is not a valid directory") + + # Check if undo is available + try: + check_undo_available() + except FileNotFoundError as e: + raise gdb.GdbError(str(e)) + + # Create visualizer and generate output + try: + visualize_process_tree(recordings_dir, output_svg) + except Exception as e: + raise gdb.GdbError(f"Error generating process tree: {e}") + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Visualize process trees from Undo recordings. " + "By default, only ASCII output is shown." + ) + parser.add_argument("recordings_dir", help="Directory containing Undo recordings") + parser.add_argument( + "--output-svg", help="Output SVG file path (generates SVG in addition to ASCII)" + ) + + args = parser.parse_args() + + # Check if undo is available + try: + check_undo_available() + except FileNotFoundError as e: + print(str(e), file=sys.stderr) + return 1 + + recordings_dir = Path(args.recordings_dir).expanduser() + if not recordings_dir.is_dir(): + print(f"Error: {recordings_dir} is not a valid directory", file=sys.stderr) + return 1 + + output_svg = str(Path(args.output_svg).expanduser()) if args.output_svg else None + + try: + visualize_process_tree(recordings_dir, output_svg) + except ValueError as e: + print(str(e), file=sys.stderr) + return 1 + + return 0 + + +# When sourced by GDB, register the command +if HAS_GDB: + ProcessTreeCommand() +# When run as a standalone script +elif __name__ == "__main__": + sys.exit(main())