From 752d01a6f3b3bd6a0220967414411890adc10006 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 14 Aug 2025 02:17:37 +0200 Subject: [PATCH] Add an ability to print CFG to pdf and display it in the output Just use opt --passes=dot-cfg and the magic happens. --- .gitignore | 5 + Dockerfile | 4 +- Dockerfile.backend | 4 +- README.md | 5 +- backend/server.py | 263 ++++++++++++++++++++++++++++--------- session_temps/.gitkeep | 1 + setup_backend.sh | 3 +- src/app/ExplorerContent.js | 101 ++++++++++++-- tests/test_opt_dot_pdf.py | 40 ++++++ 9 files changed, 347 insertions(+), 79 deletions(-) create mode 100644 session_temps/.gitkeep create mode 100644 tests/test_opt_dot_pdf.py diff --git a/.gitignore b/.gitignore index 04ccd0d..5ee2f45 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,8 @@ next-env.d.ts # ignore stored sessions StoredSessions/* +!StoredSessions/.gitkeep + +# session temps +session_temps/* +!session_temps/.gitkeep diff --git a/Dockerfile b/Dockerfile index 4c3789b..f2b5bbf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ WORKDIR /app # Install minimal tooling RUN apt-get update && \ apt-get install -y --no-install-recommends \ - ca-certificates wget curl gnupg lsb-release software-properties-common && \ + ca-certificates wget curl gnupg lsb-release software-properties-common graphviz && \ rm -rf /var/lib/apt/lists/* # Add LLVM 22 repository @@ -46,7 +46,7 @@ RUN python3 -m venv /opt/venv && \ /opt/venv/bin/pip install --pre torch-mlir torchvision \ --extra-index-url=https://download.pytorch.org/whl/nightly/cpu \ -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \ - /opt/venv/bin/pip install triton fastapi uvicorn pytest httpx + /opt/venv/bin/pip install triton fastapi uvicorn pytest httpx PyPDF2 # Create non-root user and fix permissions RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \ diff --git a/Dockerfile.backend b/Dockerfile.backend index 0308dba..2283aa7 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -9,7 +9,7 @@ WORKDIR /app RUN apt-get update && \ apt-get install -y --no-install-recommends \ - ca-certificates wget curl gnupg lsb-release software-properties-common && \ + ca-certificates wget curl gnupg lsb-release software-properties-common graphviz && \ rm -rf /var/lib/apt/lists/* RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \ @@ -28,7 +28,7 @@ RUN python3 -m venv /opt/venv && \ /opt/venv/bin/pip install --pre torch-mlir torchvision \ --extra-index-url=https://download.pytorch.org/whl/nightly/cpu \ -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \ - /opt/venv/bin/pip install triton fastapi uvicorn pydantic + /opt/venv/bin/pip install triton fastapi uvicorn pydantic PyPDF2 RUN chown -R appuser:appuser /home/appuser/.cache /app diff --git a/README.md b/README.md index 835e25b..38dcf6f 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ tracing models through various IR stages and transformations. - Torch-MLIR - LLVM with mlir-opt - Triton +- graphviz - needed in case if you want PytorchExplorer to get CFG from LLVM IR in a form of pdf. To setup PyTorch and Torch-MLIR it's a good idea to visit https://github.com/llvm/torch-mlir repository and follow instructions from there. @@ -85,7 +86,7 @@ source setup_backend.sh If you already have a working venv for Torch-MLIR, you can just install FastAPI and testing dependencies: ```bash -pip install fastapi uvicorn pytest httpx +pip install fastapi uvicorn pytest httpx PyPDF2 ``` ### Run the application @@ -200,6 +201,8 @@ on the right. individually. 5. Hit **Store Session** to save your work. The backend returns a short ID which can be appended to the URL (e.g. `/abc123`) to reload the same session later. +6. It's possible to build CFG into pdf file for LLVM IR, just call standart for LLVM + opt --passes=dot-cfg and CFG will be rendered in the output window. ## Implementation details diff --git a/backend/server.py b/backend/server.py index 73dda68..ef67041 100644 --- a/backend/server.py +++ b/backend/server.py @@ -9,8 +9,10 @@ import atexit import logging import traceback +import base64 from typing import List, Optional, Tuple import re +from pathlib import Path from contextlib import redirect_stdout, redirect_stderr @@ -31,6 +33,11 @@ CompilerPipelineError, ) +try: + from PyPDF2 import PdfMerger +except Exception: + PdfMerger = None + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -46,6 +53,14 @@ cached_triton_runs = {} +# Where to store per-request temporary artifacts (DOT/PDF/IR) +# Default: /session_temps, override with PE_SESSION_TEMPS +PROJECT_ROOT = Path(__file__).resolve().parents[1] +SESSION_TEMPS_ROOT = os.environ.get( + "PE_SESSION_TEMPS", str(PROJECT_ROOT / "session_temps") +) +os.makedirs(SESSION_TEMPS_ROOT, exist_ok=True) + TORCH_MLIR_OPT_PATH = os.environ.get("TORCH_MLIR_OPT_PATH", "") LLVM_BIN_PATH = os.environ.get("LLVM_BIN_PATH", "") TRITON_OPT_PATH = os.environ.get("TRITON_OPT_PATH", "") @@ -132,11 +147,13 @@ def split_cmd_arguments(cmd: str) -> List[str]: # Run torch-mlir-opt and/or mlir-opt and/or opt etc. def run_external_opt_tool_file( - input_path: str, cmd: str, tool: str, output_path: str + input_path: str, cmd: str, tool: str, output_path: str, cwd: Optional[str] = None ) -> Tuple[bool, str]: args = [tool] + split_cmd_arguments(cmd) + [input_path, "-o", output_path] try: - result = subprocess.run(args, capture_output=True, text=True, check=True) + result = subprocess.run( + args, capture_output=True, text=True, check=True, cwd=cwd + ) return (True, result.stderr or "") except subprocess.CalledProcessError as e: logger.error( @@ -151,80 +168,202 @@ def run_external_opt_tool_file( raise CompilerPipelineError(f"Unexpected error while running '{tool}': {e}") +def _read_file_safe(path: str) -> Tuple[str, bool]: + # Read a file returning its text or base64 if binary. + # Returns a tuple of (content, is_binary). If the file cannot be decoded as + # UTF-8, it is assumed to be binary and returned base64-encoded. + + with open(path, "rb") as f: + data = f.read() + try: + return data.decode("utf-8"), False + except UnicodeDecodeError: + encoded = base64.b64encode(data).decode("utf-8") + return encoded, True + + # Utility for custom pipeline. def apply_optional_passes( ir: str, pipeline: List[Tuple[str, str]], dump_each: bool = False ) -> str: uid = uuid.uuid4().hex output = "" - temp_files = [] + pdf_blocks: List[Tuple[str, str]] = [] - # Step 1: Write initial IR to a file. - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: - f.write(ir) - f.flush() - prev_path = f.name - temp_files.append(prev_path) - - if dump_each: - output += f"\n\n===== Initial IR =====\n{ir}" - - # Step 2: Apply pipeline stages. - for index, (tool, flags) in enumerate(pipeline): - tool_path = None - - if tool == "torch-mlir-opt": - tool_path = os.path.join(TORCH_MLIR_OPT_PATH, "torch-mlir-opt") - elif tool == "mlir-opt": - tool_path = os.path.join(LLVM_BIN_PATH, "mlir-opt") - elif tool == "mlir-translate": - tool_path = os.path.join(LLVM_BIN_PATH, "mlir-translate") - elif tool == "opt": - flags += " -S" - tool_path = os.path.join(LLVM_BIN_PATH, "opt") - elif tool == "llc": - tool_path = os.path.join(LLVM_BIN_PATH, "llc") - elif tool == "triton-opt": - tool_path = os.path.join(TRITON_OPT_PATH, "triton-opt") - elif tool == "triton-llvm-opt": - tool_path = os.path.join(TRITON_OPT_PATH, "triton-llvm-opt") - elif tool == "user-tool": - tokens = split_cmd_arguments(flags) - if not tokens: - raise CompilerPipelineError("Empty user-tool invocation") - tool_path = tokens[0] - flags = " ".join(tokens[1:]) - else: - raise CompilerPipelineError(f"Unknown pipeline tool: '{tool}'") + # Make one session dir under PytorchExplorer/session_temps. + session_dir = tempfile.mkdtemp(dir=SESSION_TEMPS_ROOT, prefix=f"sess_{uid}_") + try: + # Step 1: Write initial IR into the session dir. + prev_path = os.path.join(session_dir, f"ir_init_{uid}.ll") + with open(prev_path, "w") as f: + f.write(ir) - out_path = os.path.join(tempfile.gettempdir(), f"ir_step_{index}_{uid}") - temp_files.append(out_path) + if dump_each: + output += f"\n\n===== Initial IR =====\n{ir}" + + # Step 2: Apply pipeline stages (all I/O + cwd inside session_dir). + for index, (tool, flags) in enumerate(pipeline): + if tool == "torch-mlir-opt": + tool_path = os.path.join(TORCH_MLIR_OPT_PATH, "torch-mlir-opt") + elif tool == "mlir-opt": + tool_path = os.path.join(LLVM_BIN_PATH, "mlir-opt") + elif tool == "mlir-translate": + tool_path = os.path.join(LLVM_BIN_PATH, "mlir-translate") + elif tool == "opt": + flags += " -S" + tool_path = os.path.join(LLVM_BIN_PATH, "opt") + elif tool == "llc": + tool_path = os.path.join(LLVM_BIN_PATH, "llc") + elif tool == "triton-opt": + tool_path = os.path.join(TRITON_OPT_PATH, "triton-opt") + elif tool == "triton-llvm-opt": + tool_path = os.path.join(TRITON_OPT_PATH, "triton-llvm-opt") + elif tool == "user-tool": + tokens = split_cmd_arguments(flags) + if not tokens: + raise CompilerPipelineError("Empty user-tool invocation") + tool_path = tokens[0] + flags = " ".join(tokens[1:]) + if os.path.basename(tool_path) == "opt" and "-S" not in flags.split(): + flags += " -S" + else: + raise CompilerPipelineError(f"Unknown pipeline tool: '{tool}'") - success, stderr = run_external_opt_tool_file( - prev_path, flags, tool_path, out_path - ) - if not success: - raise CompilerPipelineError(f"{tool} failed: {stderr}") + out_path = os.path.join(session_dir, f"ir_step_{index}_{uid}.ll") - if dump_each: - with open(out_path, "r") as f: - stage_output = f.read() - output += f"\n\n===== IR after {tool} {flags} =====\n{stage_output}" + if "dot-cfg" in flags and "--dot-cfg-dir=" not in flags: + flags += f" --dot-cfg-dir={session_dir}" - prev_path = out_path + # Run the tool with cwd=session_dir so DOTs land here. + success, stderr = run_external_opt_tool_file( + prev_path, flags, tool_path, out_path, cwd=session_dir + ) + if not success: + raise CompilerPipelineError(f"{tool} failed: {stderr}") + + # Collect DOTs from session_dir -> convert to PDFs -> merge -> attach. + dot_files = sorted( + set( + glob.glob(os.path.join(session_dir, "*.dot")) + + glob.glob(os.path.join(session_dir, ".*.dot")) + ) + ) + pdf_paths: List[str] = [] - if not dump_each: - with open(prev_path, "r") as f: - output = f.read() + # Only warn if user requested dot-cfg but no DOTs appeared. + if "dot-cfg" in flags and not dot_files: + logger.warning( + "No *.dot emitted by -passes=dot-cfg; checked %s", session_dir + ) - # Cleanup. - for path in temp_files: - try: - os.remove(path) - except Exception: - pass + # Convert DOT -> PDF. + if dot_files: + if not shutil.which("dot"): + logger.error( + "'dot' (graphviz) not found on PATH; cannot render CFG PDFs." + ) + else: + for df in sorted(set(dot_files)): + pdf_path = os.path.splitext(df)[0] + ".pdf" + try: + subprocess.run( + ["dot", "-Tpdf", df, "-o", pdf_path], check=True + ) + pdf_paths.append(pdf_path) + except Exception as e: + logger.error(f"Failed to convert {df} to PDF: {e}") + + # Remove DOTs after conversion (keep PDFs). + for df in dot_files: + try: + os.remove(df) + except Exception: + pass + + def _encode_and_attach(path_to_pdf: str): + try: + with open(path_to_pdf, "rb") as pf: + encoded = base64.b64encode(pf.read()).decode("utf-8") + pdf_blocks.append((os.path.basename(path_to_pdf), encoded)) + if dump_each: + nonlocal output + output += f"\n\n===== DOT PDF {os.path.basename(path_to_pdf)} =====\n{encoded}" + except Exception as e: + logger.error(f"Failed to read PDF {path_to_pdf}: {e}") + + # Merge PDFs if we have more than one. + if pdf_paths: + merged_ok = False + merged_path = os.path.join(session_dir, f"cfg-merged-stage-{index}.pdf") + + if PdfMerger is not None: + try: + merger = PdfMerger() + for p in sorted(pdf_paths): + merger.append(p) + with open(merged_path, "wb") as mf: + merger.write(mf) + merger.close() + _encode_and_attach(merged_path) + merged_ok = True + except Exception as e: + logger.error(f"PDF merge via PyPDF2 failed: {e}") + + if not merged_ok and shutil.which("pdfunite"): + try: + cmd = ["pdfunite"] + sorted(pdf_paths) + [merged_path] + subprocess.run(cmd, check=True) + _encode_and_attach(merged_path) + merged_ok = True + except Exception as e: + logger.error(f"PDF merge via pdfunite failed: {e}") + + if not merged_ok: + # Fall back to attaching individual PDFs. + for p in sorted(pdf_paths): + _encode_and_attach(p) + + # Handle analysis-only stages (no output file produced). + wrote_output = os.path.exists(out_path) + + if dump_each: + path_to_show = out_path if wrote_output else prev_path + stage_output, is_binary = _read_file_safe(path_to_show) + if not wrote_output: + output += f"\n\n===== IR after {tool} {flags} (no new output; IR unchanged) =====\n" + else: + output += f"\n\n===== IR after {tool} {flags} =====\n" + output += stage_output + + if wrote_output: + prev_path = out_path + + # Final assembly. + if not dump_each: + with open(prev_path, "rb") as f: + data = f.read() + try: + output = data.decode("utf-8") + except UnicodeDecodeError: + encoded = base64.b64encode(data).decode("utf-8") + if data.startswith(b"%PDF"): + pdf_blocks.insert( + 0, (os.path.basename(prev_path) + ".pdf", encoded) + ) + output = "" + else: + output = f"===== BINARY OUTPUT {os.path.basename(prev_path)} =====\n{encoded}" + + for name, encoded in pdf_blocks: + if output: + output += "\n\n" + output += f"===== DOT PDF {name} =====\n{encoded}" - return output + return output + + finally: + # Nuke the whole session dir; PDFs/DOTs/IR intermediates are all ephemeral. + shutil.rmtree(session_dir, ignore_errors=True) # Torch graph IR. diff --git a/session_temps/.gitkeep b/session_temps/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/session_temps/.gitkeep @@ -0,0 +1 @@ + diff --git a/setup_backend.sh b/setup_backend.sh index 2ba1e20..f855dcd 100644 --- a/setup_backend.sh +++ b/setup_backend.sh @@ -16,7 +16,8 @@ sudo apt-get update sudo apt-get -y install \ llvm-22-dev \ llvm-22-tools \ - mlir-22-tools + mlir-22-tools \ + graphviz echo "Exporting LLVM 22 tools path..." export PATH=/usr/lib/llvm-22/bin:$PATH diff --git a/src/app/ExplorerContent.js b/src/app/ExplorerContent.js index 92f5425..7b0976b 100644 --- a/src/app/ExplorerContent.js +++ b/src/app/ExplorerContent.js @@ -540,6 +540,24 @@ export default function ExplorerContent() { setLocalCmd(customToolCmd[irWin.id] || ""); }, [customToolCmd, irWin.id]); + const extractPdfs = (output) => { + const regex = /===== DOT PDF ([^=]+?) =====\n([\s\S]*?)(?=\n=====|$)/g; + let match; + let lastIndex = 0; + let text = ""; + const pdfs = []; + while ((match = regex.exec(output)) !== null) { + text += output.slice(lastIndex, match.index); + pdfs.push({ + name: match[1], + data: match[2].trim().replace(/\s+/g, ""), + }); + lastIndex = regex.lastIndex; + } + text += output.slice(lastIndex); + return { textContent: text.trim(), pdfs }; + }; + return ( <>
- { + const displayOutput = exploreStage.windowId === irWin.id ? filterToStage(irWin.output, exploreStage.stageIdx) - : irWin.output - } - onChange={() => {}} - theme={theme === "light" ? "mlirTheme" : "mlirThemeDark"} - options={{ readOnly: true }} - /> + : irWin.output; + const { textContent, pdfs } = extractPdfs(displayOutput); + + const editorOnly = pdfs.length === 0; + + return ( + <> + {/* Keep editor full-height if no PDFs; otherwise give it a small fixed height */} +
+ {}} + theme={ + theme === "light" ? "mlirTheme" : "mlirThemeDark" + } + options={{ readOnly: true }} + /> +
+ + {/* PDF area fills all remaining space */} + {pdfs.length > 0 && ( +
+ {pdfs.length === 1 ? ( +