Skip to content

Commit c914f53

Browse files
committed
Sync: for the incendio machine
1 parent 6d9ec43 commit c914f53

16 files changed

Lines changed: 625 additions & 0 deletions

mlir/optmization/.clang-format

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
BasedOnStyle: LLVM
2+
AlwaysBreakTemplateDeclarations: Yes
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
FROM alwaysproblem/fastdev-u2204:zsh
2+
3+
ARG UID=1000
4+
ARG GID=1000
5+
6+
RUN echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy main" > /etc/apt/sources.list.d/llvm.list \
7+
&& echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy main" >> /etc/apt/sources.list.d/llvm.list \
8+
&& echo "# 18" >> /etc/apt/sources.list.d/llvm.list \
9+
&& echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" >> /etc/apt/sources.list.d/llvm.list \
10+
&& echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" >> /etc/apt/sources.list.d/llvm.list \
11+
&& echo "# 19" >> /etc/apt/sources.list.d/llvm.list \
12+
&& echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main" >> /etc/apt/sources.list.d/llvm.list \
13+
&& echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main" >> /etc/apt/sources.list.d/llvm.list \
14+
&& wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc \
15+
&& apt update -y && \
16+
apt install -y \
17+
python3 python3-dev python3-setuptools python3-pip \
18+
libtinfo-dev zlib1g-dev \
19+
build-essential cmake ninja-build \
20+
clang-19 clang-tidy-19 clangd-19 cmake-format \
21+
clang-format-19 lldb-19 lld-19 libfmt-dev libspdlog-dev \
22+
&& update-alternatives --install /usr/bin/clang clang /usr/bin/clang-19 100 \
23+
&& update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-19 100 \
24+
&& update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-19 100 \
25+
&& update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-19 100 \
26+
&& update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-19 100 \
27+
&& update-alternatives --install /usr/bin/lld lld /usr/bin/lld-19 100 \
28+
&& update-alternatives --install /usr/bin/lldb lldb /usr/bin/lldb-19 100
29+
30+
RUN git config --global --add safe.directory '*' && \
31+
/root/.local/bin/setup_new_user ${UID} ${GID} && \
32+
python3 -m pip install pre-commit compdb
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
2+
// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
3+
{
4+
"remoteUser": "root",
5+
"name": "mlir-example",
6+
"workspaceMount": "source=${localWorkspaceFolder},target=${localWorkspaceFolder}/../../../MLcompiler-tutorial/mlir/${localWorkspaceFolderBasename},type=bind",
7+
"workspaceFolder": "/root/Desktop/dockerVolumn/MLcompiler-tutorial/mlir/${localWorkspaceFolderBasename}",
8+
"build": {
9+
"context": "${localWorkspaceFolder}/.devcontainer",
10+
"dockerfile": "Dockerfile",
11+
"options": [
12+
"--net=host"
13+
],
14+
"args": {
15+
"UID": "1000",
16+
"GID": "1000"
17+
}
18+
},
19+
// Features to add to the dev container. More info: https://containers.dev/features.
20+
// "features": {},
21+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
22+
// "forwardPorts": [],
23+
// Use 'postCreateCommand' to run commands after the container is created.
24+
// "postCreateCommand": "python --version",
25+
// Configure tool-specific properties.
26+
// "customizations": {},
27+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
28+
// "remoteUser": "root"
29+
"privileged": true,
30+
// "capAdd": ["SYS_PTRACE"],
31+
"mounts": [
32+
{
33+
"source": "${localWorkspaceFolder}/../../../",
34+
"target": "/root/Desktop/dockerVolumn",
35+
"type": "bind"
36+
}
37+
],
38+
"runArgs": [
39+
// "--cap-add=SYS_PTRACE",
40+
// "--security-opt",
41+
// "seccomp=unconfined",
42+
"--name",
43+
// "${localEnv:USER}-tvm",
44+
"yyx-mlir-example",
45+
// "-v",
46+
// "/data/rech/yongxiy/Desktop/dockerVolumn:/root/Desktop/dockerVolumn"
47+
],
48+
"customizations": {
49+
"vscode": {
50+
"extensions": [
51+
"jeff-hykin.better-cpp-syntax",
52+
"aaron-bond.better-comments",
53+
"ms-vscode.cpptools-themes",
54+
"revng.llvm-ir",
55+
"jakob-erzar.llvm-tablegen",
56+
"MomenAbdelkarim-WyattCalandro-LuisPrieto.mlir",
57+
"ms-vscode.cpptools",
58+
"ms-vscode.cpptools-extension-pack",
59+
"twxs.cmake",
60+
"josetr.cmake-language-support-vscode",
61+
"ms-vscode.cmake-tools",
62+
"cheshirekow.cmake-format",
63+
"yzhang.markdown-all-in-one",
64+
"bierner.markdown-preview-github-styles",
65+
"bierner.markdown-mermaid",
66+
"DavidAnson.vscode-markdownlint",
67+
"llvm-vs-code-extensions.vscode-mlir",
68+
"llvm-vs-code-extensions.vscode-clangd",
69+
"llvm-vs-code-extensions.lldb-dap",
70+
"mutantdino.resourcemonitor",
71+
"hoovercj.vscode-power-mode"
72+
]
73+
}
74+
}
75+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This file copied into the container along with environment.yml* from the parent
2+
folder. This file is included to prevents the Dockerfile COPY instruction from
3+
failing if no environment.yml is found.

mlir/optmization/.envsetup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
source /root/miniconda3/etc/profile.d/conda.sh && conda activate mlir
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v4.3.0
4+
hooks:
5+
- id: check-yaml
6+
- id: trailing-whitespace
7+
- id: end-of-file-fixer
8+
9+
- repo: https://github.com/pre-commit/mirrors-clang-format
10+
rev: 'v14.0.6'
11+
hooks:
12+
- id: clang-format
13+
types_or: [c++, c]
14+
15+
- repo: https://github.com/cheshirekow/cmake-format-precommit
16+
rev: v0.6.10
17+
hooks:
18+
- id: cmake-format
19+
- id: cmake-lint

mlir/optmization/CMakeLists.txt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
3+
# note: fix ztd terminfo not found
4+
project(mlir-example LANGUAGES C CXX)
5+
6+
# ############## For conda users.################################
7+
find_package(LLVM CONFIG REQUIRED)
8+
find_package(MLIR CONFIG REQUIRED)
9+
# set(MLIR_TABLEGEN_EXE /root/anaconda3/envs/mlir/bin/mlir-tblgen)
10+
# ##############################################################################
11+
12+
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
13+
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
14+
message(STATUS "Found MLIR ${MLIR_PACKAGE_VERSION}")
15+
message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}")
16+
message(STATUS "Found MLIRTableGen: ${MLIR_TABLEGEN_EXE}")
17+
message(STATUS "LLVM_INCLUDE_DIR include dir: ${LLVM_INCLUDE_DIR}")
18+
message(STATUS "MLIR_INCLUDE_DIR include dir: ${MLIR_INCLUDE_DIR}")
19+
20+
# This is for non-conda users. find_package(LLVM CONFIG PATHS
21+
# ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/llvm) find_package(MLIR
22+
# CONFIG PATHS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/mlir)
23+
# set(MLIR_TABLEGEN_EXE ${CMAKE_CURRENT_SOURCE_DIR}/third_party/bin/mlir-tblgen)
24+
25+
include_directories(${LLVM_INCLUDE_DIR})
26+
include_directories(${MLIR_INCLUDE_DIR})
27+
28+
include(LLVMDistributionSupport)
29+
include(TableGen)
30+
include(AddMLIR)
31+
include(AddLLVM)
32+
# include(HandleLLVMOptions)
33+
34+
# note: fix the llvm::cl undefined reference problem
35+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fno-rtti")
36+
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
37+
38+
add_subdirectory(explore)

mlir/optmization/README.md

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Optimization Overview
2+
3+
This tutorial series walks through key optimization techniques in ML compilers using MLIR, ordered by pedagogical progression. Each stage builds on concepts from the previous one.
4+
5+
## Environment Setup
6+
7+
### Environment Preparation with conda (Optional)
8+
9+
- OS must be higher than ubuntu 22.04.
10+
- install gcc-13 and g++-13
11+
12+
```bash
13+
apt update -y && \
14+
apt install -yq gcc-13 g++-13
15+
# apt install -yq software-properties-common \
16+
# add-apt-repository -y ppa:ubuntu-toolchain-r/test \
17+
# apt update -y
18+
# apt install -yq gcc-11 g++-11
19+
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 20
20+
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 20
21+
```
22+
23+
- install cmake and ninja you can choose one way you like. conda is best for me.
24+
25+
```bash
26+
conda create -n mlir -y
27+
conda activate mlir
28+
# conda install cmake ninja clang-format clang lld ncurses mlir llvm -c conda-forge
29+
conda install cmake ninja clang-format clang clang-tools mlir zlib spdlog fmt lit llvm=19.* -c conda-forge -y
30+
# create -n mlir cmake ninja clang-format clang mlir zlib spdlog fmt lit llvm -c conda-forge -y
31+
```
32+
33+
- build example with conda
34+
35+
```bash
36+
cd example
37+
bash build_with_conda.sh all
38+
```
39+
40+
### Environment Preparation with dev containers
41+
42+
Please choose the `Dev Containers: Open Folder in Container...`
43+
44+
- build example with dev containers
45+
46+
```bash
47+
cd example
48+
bash scripts/sync_deps.sh
49+
bash scripts/build_deps.sh
50+
bash build.sh all
51+
```
52+
53+
## Configure the Clangd
54+
55+
```bash
56+
cd example
57+
# after you configure the project with cmake, you can configure the clangd by run the following command
58+
compdb -p build list > compile_commands.json
59+
```
60+
61+
## Plan
62+
63+
### Phase 1: MatMul (Foundation)
64+
65+
**Goal:** Establish core optimization vocabulary and mechanics.
66+
67+
| Topic | Description |
68+
| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
69+
| Structured Op | Define and lower a matmul via `linalg.generic` / named ops; understand the iteration domain, indexing maps, and payload. |
70+
| Tiling | Apply `scf.forall` / `scf.for` tile-and-fuse to decompose the M×N×K loop nest; explore tile-size trade-offs. |
71+
| Locality | Demonstrate cache-friendly access via loop permutation (MKN vs MNK), packing, and micro-kernel promotion to registers. |
72+
| Simple Cost Model | Introduce a basic analytical model (FLOPs, memory traffic, arithmetic intensity) to guide tile-size selection. |
73+
| Pipeline Abstraction | Compose the above into a reusable pass pipeline: tile → promote → vectorize → lower, showing how MLIR pass infrastructure orchestrates transformations. |
74+
75+
**Deliverable:** An end-to-end optimized matmul that is competitive with a naive BLAS call, with clear before/after IR at every stage.
76+
77+
---
78+
79+
### Phase 2: Conv2D + Activation Fusion (Spatial & Fusion)
80+
81+
**Goal:** Extend tiling to spatial dimensions and introduce operator fusion.
82+
83+
| Topic | Description |
84+
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
85+
| Fusion | Fuse an elementwise activation (ReLU, GELU) into the convolution producer-consumer pair; understand producer-consumer analysis and the legality of fusion. |
86+
| Spatial Tiling | Tile output height and width dimensions; manage the resulting input tile expansion due to the kernel window (halo). |
87+
| Layout | Explore NHWC vs NCHW (and packed variants like NCHWc); understand how data layout affects vectorization and memory access patterns. |
88+
| Halo / Reuse | Handle overlapping input regions across tiles; compute the halo size from kernel size, stride, and dilation; demonstrate data reuse. |
89+
90+
**Deliverable:** A fused conv2d + activation kernel with explicit spatial tiling, demonstrating measurable speedup from fusion and layout selection.
91+
92+
---
93+
94+
### Phase 3: LayerNorm / Softmax (Reduction Scheduling)
95+
96+
**Goal:** Tackle reduction-heavy operations where numerical stability and scheduling are tightly coupled.
97+
98+
| Topic | Description |
99+
| -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
100+
| Reduction Scheduling | Implement multi-pass (mean → variance → normalize) vs single-pass (Welford) reduction strategies; tile reductions across threads. |
101+
| Scratch Buffer | Allocate and manage intermediate buffers (`memref.alloca` / workspace) for partial results; understand buffer lifetime and placement. |
102+
| Numerics–Schedule Coupling | Show how the softmax "max-subtract" trick and log-sum-exp rewriting are not just numerical choices but directly constrain the legal schedules. |
103+
104+
**Deliverable:** A numerically stable, tiled LayerNorm/Softmax implementation with clear discussion of how algorithmic rewrites enable (or block) specific schedules.
105+
106+
---
107+
108+
### Phase 4: Subgraph Fusion & Memory Planning (Graph Level)
109+
110+
**Goal:** Move from single-op to multi-op / graph-level optimization.
111+
112+
| Topic | Description |
113+
| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
114+
| Graph Scheduling | Decide fusion groups and execution order across a small subgraph (e.g., matmul → bias → layernorm); model the trade-off between parallelism and memory pressure. |
115+
| Peak Memory Optimization | Apply operator reordering, in-place updates, and buffer sharing (liveness analysis) to minimize peak memory; visualize the memory waterline before/after. |
116+
117+
**Deliverable:** A small end-to-end subgraph whose peak memory and kernel count are jointly optimized, with tooling to visualize the memory timeline.
118+
119+
---
120+
121+
### Suggested Timeline
122+
123+
| Week | Phase | Key Milestone |
124+
| ---- | ----------------------------- | -------------------------------------------- |
125+
| 1–3 | Phase 1 – MatMul | Tiled + vectorized matmul with pass pipeline |
126+
| 4–5 | Phase 2 – Conv2D + Activation | Fused conv2d-relu with spatial tiling |
127+
| 6–7 | Phase 3 – LayerNorm / Softmax | Numerically stable tiled reduction |
128+
| 8–9 | Phase 4 – Subgraph Fusion | Graph-level fusion with memory planning |
129+
| 10 | Wrap-up | Benchmarking, profiling, and write-up |
130+

mlir/optmization/build.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
3+
_target=${1:-'all'}
4+
5+
rm -rf build
6+
mkdir build
7+
8+
_workspaceFolder=$(pwd)
9+
10+
cd build
11+
12+
# For non-conda users:
13+
cmake .. -Wno-dev -G Ninja \
14+
-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \
15+
-DCMAKE_BUILD_TYPE:STRING=Debug \
16+
-DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc \
17+
-DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++ \
18+
-DMLIR_DIR=${_workspaceFolder}/third_party/llvm/lib/cmake/mlir \
19+
-DLLVM_DIR=${_workspaceFolder}/third_party/llvm/lib/cmake/llvm \
20+
-DCMAKE_MODULE_PATH="${_workspaceFolder}/third_party/llvm/lib/cmake/mlir;${_workspaceFolder}/third_party/llvm/lib/cmake/llvm" \
21+
-DMLIR_TABLEGEN_EXE=${_workspaceFolder}/third_party/llvm/bin/mlir-tblgen
22+
23+
# ninja
24+
cmake \
25+
--build ${_workspaceFolder}/build \
26+
--config Debug --target ${_target}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
_target=${1:-'all'}
4+
5+
rm -rf build
6+
mkdir build
7+
8+
_workspaceFolder=$(pwd)
9+
10+
cd build
11+
12+
13+
cmake .. -G Ninja --no-warn-unused-cli \
14+
-Wno-dev \
15+
-DCMAKE_MODULE_PATH="/root/miniconda3/envs/mlir/lib/cmake/mlir;/root/miniconda3/envs/mlir/lib/cmake/llvm" \
16+
-DMLIR_TABLEGEN_EXE:FILEPATH=/root/miniconda3/envs/mlir/bin/mlir-tblgen \
17+
-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \
18+
-DCMAKE_BUILD_TYPE:STRING=Debug \
19+
-DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc \
20+
-DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++
21+
22+
# ninja
23+
cmake \
24+
--build ${_workspaceFolder}/build \
25+
--config Debug --target ${_target}

0 commit comments

Comments
 (0)