Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ next-env.d.ts

# ignore stored sessions
StoredSessions/*
!StoredSessions/.gitkeep

# session temps
session_temps/*
!session_temps/.gitkeep
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WORKDIR /app
# Install minimal tooling
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates wget curl gnupg lsb-release software-properties-common && \
ca-certificates wget curl gnupg lsb-release software-properties-common graphviz && \
rm -rf /var/lib/apt/lists/*

# Add LLVM 22 repository
Expand Down Expand Up @@ -46,7 +46,7 @@ RUN python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --pre torch-mlir torchvision \
--extra-index-url=https://download.pytorch.org/whl/nightly/cpu \
-f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \
/opt/venv/bin/pip install triton fastapi uvicorn pytest httpx
/opt/venv/bin/pip install triton fastapi uvicorn pytest httpx PyPDF2

# Create non-root user and fix permissions
RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile.backend
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ WORKDIR /app

RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates wget curl gnupg lsb-release software-properties-common && \
ca-certificates wget curl gnupg lsb-release software-properties-common graphviz && \
rm -rf /var/lib/apt/lists/*

RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \
Expand All @@ -28,7 +28,7 @@ RUN python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --pre torch-mlir torchvision \
--extra-index-url=https://download.pytorch.org/whl/nightly/cpu \
-f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \
/opt/venv/bin/pip install triton fastapi uvicorn pydantic
/opt/venv/bin/pip install triton fastapi uvicorn pydantic PyPDF2

RUN chown -R appuser:appuser /home/appuser/.cache /app

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ tracing models through various IR stages and transformations.
- Torch-MLIR
- LLVM with mlir-opt
- Triton
- graphviz - needed in case if you want PytorchExplorer to get CFG from LLVM IR in a form of pdf.

To setup PyTorch and Torch-MLIR it's a good idea to visit https://github.com/llvm/torch-mlir repository and follow instructions from there.

Expand Down Expand Up @@ -85,7 +86,7 @@ source setup_backend.sh

If you already have a working venv for Torch-MLIR, you can just install FastAPI and testing dependencies:
```bash
pip install fastapi uvicorn pytest httpx
pip install fastapi uvicorn pytest httpx PyPDF2
```

### Run the application
Expand Down Expand Up @@ -200,6 +201,8 @@ on the right.
individually.
5. Hit **Store Session** to save your work. The backend returns a short ID which
can be appended to the URL (e.g. `/abc123`) to reload the same session later.
6. It's possible to build CFG into pdf file for LLVM IR, just call standart for LLVM
opt --passes=dot-cfg and CFG will be rendered in the output window.

## Implementation details

Expand Down
263 changes: 201 additions & 62 deletions backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
import atexit
import logging
import traceback
import base64
from typing import List, Optional, Tuple
import re
from pathlib import Path

from contextlib import redirect_stdout, redirect_stderr

Expand All @@ -31,6 +33,11 @@
CompilerPipelineError,
)

try:
from PyPDF2 import PdfMerger
except Exception:
PdfMerger = None

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

Expand All @@ -46,6 +53,14 @@

cached_triton_runs = {}

# Where to store per-request temporary artifacts (DOT/PDF/IR)
# Default: <project_root>/session_temps, override with PE_SESSION_TEMPS
PROJECT_ROOT = Path(__file__).resolve().parents[1]
SESSION_TEMPS_ROOT = os.environ.get(
"PE_SESSION_TEMPS", str(PROJECT_ROOT / "session_temps")
)
os.makedirs(SESSION_TEMPS_ROOT, exist_ok=True)

TORCH_MLIR_OPT_PATH = os.environ.get("TORCH_MLIR_OPT_PATH", "")
LLVM_BIN_PATH = os.environ.get("LLVM_BIN_PATH", "")
TRITON_OPT_PATH = os.environ.get("TRITON_OPT_PATH", "")
Expand Down Expand Up @@ -132,11 +147,13 @@

# Run torch-mlir-opt and/or mlir-opt and/or opt etc.
def run_external_opt_tool_file(
input_path: str, cmd: str, tool: str, output_path: str
input_path: str, cmd: str, tool: str, output_path: str, cwd: Optional[str] = None
) -> Tuple[bool, str]:
args = [tool] + split_cmd_arguments(cmd) + [input_path, "-o", output_path]
try:
result = subprocess.run(args, capture_output=True, text=True, check=True)
result = subprocess.run(
args, capture_output=True, text=True, check=True, cwd=cwd
)
return (True, result.stderr or "")
except subprocess.CalledProcessError as e:
logger.error(
Expand All @@ -151,80 +168,202 @@
raise CompilerPipelineError(f"Unexpected error while running '{tool}': {e}")


def _read_file_safe(path: str) -> Tuple[str, bool]:
# Read a file returning its text or base64 if binary.
# Returns a tuple of (content, is_binary). If the file cannot be decoded as
# UTF-8, it is assumed to be binary and returned base64-encoded.

with open(path, "rb") as f:
data = f.read()
try:
return data.decode("utf-8"), False
except UnicodeDecodeError:
encoded = base64.b64encode(data).decode("utf-8")
return encoded, True


# Utility for custom pipeline.
def apply_optional_passes(
ir: str, pipeline: List[Tuple[str, str]], dump_each: bool = False
) -> str:
uid = uuid.uuid4().hex
output = ""
temp_files = []
pdf_blocks: List[Tuple[str, str]] = []

# Step 1: Write initial IR to a file.
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
f.write(ir)
f.flush()
prev_path = f.name
temp_files.append(prev_path)

if dump_each:
output += f"\n\n===== Initial IR =====\n{ir}"

# Step 2: Apply pipeline stages.
for index, (tool, flags) in enumerate(pipeline):
tool_path = None

if tool == "torch-mlir-opt":
tool_path = os.path.join(TORCH_MLIR_OPT_PATH, "torch-mlir-opt")
elif tool == "mlir-opt":
tool_path = os.path.join(LLVM_BIN_PATH, "mlir-opt")
elif tool == "mlir-translate":
tool_path = os.path.join(LLVM_BIN_PATH, "mlir-translate")
elif tool == "opt":
flags += " -S"
tool_path = os.path.join(LLVM_BIN_PATH, "opt")
elif tool == "llc":
tool_path = os.path.join(LLVM_BIN_PATH, "llc")
elif tool == "triton-opt":
tool_path = os.path.join(TRITON_OPT_PATH, "triton-opt")
elif tool == "triton-llvm-opt":
tool_path = os.path.join(TRITON_OPT_PATH, "triton-llvm-opt")
elif tool == "user-tool":
tokens = split_cmd_arguments(flags)
if not tokens:
raise CompilerPipelineError("Empty user-tool invocation")
tool_path = tokens[0]
flags = " ".join(tokens[1:])
else:
raise CompilerPipelineError(f"Unknown pipeline tool: '{tool}'")
# Make one session dir under PytorchExplorer/session_temps.
session_dir = tempfile.mkdtemp(dir=SESSION_TEMPS_ROOT, prefix=f"sess_{uid}_")
try:
# Step 1: Write initial IR into the session dir.
prev_path = os.path.join(session_dir, f"ir_init_{uid}.ll")
with open(prev_path, "w") as f:
f.write(ir)

out_path = os.path.join(tempfile.gettempdir(), f"ir_step_{index}_{uid}")
temp_files.append(out_path)
if dump_each:
output += f"\n\n===== Initial IR =====\n{ir}"

# Step 2: Apply pipeline stages (all I/O + cwd inside session_dir).
for index, (tool, flags) in enumerate(pipeline):
if tool == "torch-mlir-opt":
tool_path = os.path.join(TORCH_MLIR_OPT_PATH, "torch-mlir-opt")
elif tool == "mlir-opt":
tool_path = os.path.join(LLVM_BIN_PATH, "mlir-opt")
elif tool == "mlir-translate":
tool_path = os.path.join(LLVM_BIN_PATH, "mlir-translate")
elif tool == "opt":
flags += " -S"
tool_path = os.path.join(LLVM_BIN_PATH, "opt")
elif tool == "llc":
tool_path = os.path.join(LLVM_BIN_PATH, "llc")
elif tool == "triton-opt":
tool_path = os.path.join(TRITON_OPT_PATH, "triton-opt")
elif tool == "triton-llvm-opt":
tool_path = os.path.join(TRITON_OPT_PATH, "triton-llvm-opt")
elif tool == "user-tool":
tokens = split_cmd_arguments(flags)
if not tokens:
raise CompilerPipelineError("Empty user-tool invocation")
tool_path = tokens[0]
flags = " ".join(tokens[1:])
if os.path.basename(tool_path) == "opt" and "-S" not in flags.split():
flags += " -S"
else:
raise CompilerPipelineError(f"Unknown pipeline tool: '{tool}'")

success, stderr = run_external_opt_tool_file(
prev_path, flags, tool_path, out_path
)
if not success:
raise CompilerPipelineError(f"{tool} failed: {stderr}")
out_path = os.path.join(session_dir, f"ir_step_{index}_{uid}.ll")

if dump_each:
with open(out_path, "r") as f:
stage_output = f.read()
output += f"\n\n===== IR after {tool} {flags} =====\n{stage_output}"
if "dot-cfg" in flags and "--dot-cfg-dir=" not in flags:
flags += f" --dot-cfg-dir={session_dir}"

prev_path = out_path
# Run the tool with cwd=session_dir so DOTs land here.
success, stderr = run_external_opt_tool_file(
prev_path, flags, tool_path, out_path, cwd=session_dir
)
if not success:
raise CompilerPipelineError(f"{tool} failed: {stderr}")

# Collect DOTs from session_dir -> convert to PDFs -> merge -> attach.
dot_files = sorted(
set(
glob.glob(os.path.join(session_dir, "*.dot"))
+ glob.glob(os.path.join(session_dir, ".*.dot"))
)
)
pdf_paths: List[str] = []

if not dump_each:
with open(prev_path, "r") as f:
output = f.read()
# Only warn if user requested dot-cfg but no DOTs appeared.
if "dot-cfg" in flags and not dot_files:
logger.warning(
"No *.dot emitted by -passes=dot-cfg; checked %s", session_dir
)

# Cleanup.
for path in temp_files:
try:
os.remove(path)
except Exception:
pass
# Convert DOT -> PDF.
if dot_files:
if not shutil.which("dot"):
logger.error(
"'dot' (graphviz) not found on PATH; cannot render CFG PDFs."
)
else:
for df in sorted(set(dot_files)):
pdf_path = os.path.splitext(df)[0] + ".pdf"
try:
subprocess.run(
["dot", "-Tpdf", df, "-o", pdf_path], check=True
)
pdf_paths.append(pdf_path)
except Exception as e:
logger.error(f"Failed to convert {df} to PDF: {e}")

# Remove DOTs after conversion (keep PDFs).
for df in dot_files:
try:
os.remove(df)
except Exception:
pass

def _encode_and_attach(path_to_pdf: str):
try:
with open(path_to_pdf, "rb") as pf:
encoded = base64.b64encode(pf.read()).decode("utf-8")
pdf_blocks.append((os.path.basename(path_to_pdf), encoded))
if dump_each:
nonlocal output
output += f"\n\n===== DOT PDF {os.path.basename(path_to_pdf)} =====\n{encoded}"
except Exception as e:
logger.error(f"Failed to read PDF {path_to_pdf}: {e}")

# Merge PDFs if we have more than one.
if pdf_paths:
merged_ok = False
merged_path = os.path.join(session_dir, f"cfg-merged-stage-{index}.pdf")

if PdfMerger is not None:
try:
merger = PdfMerger()
for p in sorted(pdf_paths):
merger.append(p)
with open(merged_path, "wb") as mf:
merger.write(mf)
merger.close()
_encode_and_attach(merged_path)
merged_ok = True
except Exception as e:
logger.error(f"PDF merge via PyPDF2 failed: {e}")

if not merged_ok and shutil.which("pdfunite"):
try:
cmd = ["pdfunite"] + sorted(pdf_paths) + [merged_path]
subprocess.run(cmd, check=True)
_encode_and_attach(merged_path)
merged_ok = True
except Exception as e:
logger.error(f"PDF merge via pdfunite failed: {e}")

if not merged_ok:
# Fall back to attaching individual PDFs.
for p in sorted(pdf_paths):
_encode_and_attach(p)

# Handle analysis-only stages (no output file produced).
wrote_output = os.path.exists(out_path)

if dump_each:
path_to_show = out_path if wrote_output else prev_path
stage_output, is_binary = _read_file_safe(path_to_show)
if not wrote_output:
output += f"\n\n===== IR after {tool} {flags} (no new output; IR unchanged) =====\n"
else:
output += f"\n\n===== IR after {tool} {flags} =====\n"
output += stage_output

if wrote_output:
prev_path = out_path

# Final assembly.
if not dump_each:
with open(prev_path, "rb") as f:
data = f.read()
try:
output = data.decode("utf-8")
except UnicodeDecodeError:
encoded = base64.b64encode(data).decode("utf-8")
if data.startswith(b"%PDF"):
pdf_blocks.insert(
0, (os.path.basename(prev_path) + ".pdf", encoded)
)
output = ""
else:
output = f"===== BINARY OUTPUT {os.path.basename(prev_path)} =====\n{encoded}"

for name, encoded in pdf_blocks:
if output:
output += "\n\n"
output += f"===== DOT PDF {name} =====\n{encoded}"

return output
return output

finally:
# Nuke the whole session dir; PDFs/DOTs/IR intermediates are all ephemeral.
shutil.rmtree(session_dir, ignore_errors=True)


# Torch graph IR.
Expand Down
1 change: 1 addition & 0 deletions session_temps/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

3 changes: 2 additions & 1 deletion setup_backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ sudo apt-get update
sudo apt-get -y install \
llvm-22-dev \
llvm-22-tools \
mlir-22-tools
mlir-22-tools \
graphviz

echo "Exporting LLVM 22 tools path..."
export PATH=/usr/lib/llvm-22/bin:$PATH
Expand Down
Loading