Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 14 additions & 20 deletions examples/cpu/x86/matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from lighthouse import dialects as lh_dialects
from lighthouse.workload import benchmark, get_bench_wrapper_schedule
from lighthouse.utils.numpy import numpy_to_mlir_type
from lighthouse.pipeline.helper import apply_registered_pass
import lighthouse.utils as lh_utils
from lighthouse import schedule as lh_schedule
import lighthouse.schedule.x86 as lh_schedule_x86
Expand Down Expand Up @@ -135,7 +134,7 @@ def payload(A, B, C):

return mod

def schedule_modules(
def pipeline(
self,
stop_at_stage: Optional[str] = None,
parameters: Optional[dict] = None,
Expand Down Expand Up @@ -267,24 +266,19 @@ def schedule_modules(
return scheds

# Lower to LLVM.
with lh_schedule.schedule_boilerplate() as (sched, named_seq):
target = named_seq.bodyTarget
target = apply_registered_pass(target, "convert-linalg-to-loops")
target = apply_registered_pass(target, "fold-memref-alias-ops")
target = apply_registered_pass(target, "expand-strided-metadata")
target = apply_registered_pass(target, "canonicalize")
target = apply_registered_pass(target, "convert-vector-to-scf")
target = apply_registered_pass(target, "lower-affine")
target = apply_registered_pass(target, "convert-scf-to-cf")
target = apply_registered_pass(target, "convert-vector-to-llvm")
target = apply_registered_pass(target, "convert-to-llvm")
target = apply_registered_pass(target, "reconcile-unrealized-casts")
lh_transform.cleanup(target)

transform.yield_()
scheds.append(sched)

return scheds
return scheds + [
"convert-linalg-to-loops",
"fold-memref-alias-ops",
"expand-strided-metadata",
"canonicalize",
"convert-vector-to-scf",
"lower-affine",
"convert-scf-to-cf",
"convert-vector-to-llvm",
"convert-to-llvm",
"reconcile-unrealized-casts",
"CleanupBundle",
]


def parse_cli():
Expand Down
125 changes: 36 additions & 89 deletions examples/feed-forward-mpi/feed-forward-mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

from mlir import ir
from mlir.dialects import transform
from mlir.dialects.transform.bufferization import OneShotBufferizeOp
from mlir.dialects.bufferization import LayoutMapOption
from mlir.execution_engine import ExecutionEngine
from mlir.runtime.np_to_memref import (
ranked_memref_to_numpy,
Expand Down Expand Up @@ -292,54 +290,18 @@ def find_factors(n):

return mod

def get_shard_schedule(self):
with schedule_boilerplate() as (schedule, named_sequence):
func = match(named_sequence.bodyTarget, ops={"func.func"})
func = apply_registered_pass(
func,
"sharding-propagation",
options={"traversal": "forward-backward"},
)
if self.verbose > 0:
transform.PrintOp(target=func)
func = apply_registered_pass(func, "shard-partition")
if self.verbose > 0:
transform.PrintOp(target=func)
func = apply_registered_pass(func, "shard-simplify")
if self.verbose > 0:
transform.PrintOp(target=func)
func = apply_registered_pass(func, "convert-shard-to-mpi")
func = apply_registered_pass(func, "canonicalize")
if self.verbose > 0:
transform.PrintOp(target=func)
func = apply_registered_pass(func, "tosa-to-linalg")
transform.YieldOp()
return schedule
def pipeline(
self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
) -> list[ir.Module | str]:
"""Generate schedules:
- sharding propagation, partition, and MPI
- tosa-to-linalg
- adding benchmark wrapper
- tile_and_vector
- all the rest"""

def get_bufferize_schedule(self):
# Create passes to inject deallocations. Don't do this for dealloc_2d, though.
with schedule_boilerplate() as (schedule, named_sequence):
anytype = transform.AnyOpType.get()
func = match(named_sequence.bodyTarget, ops={"func.func"})
mod = transform.get_parent_op(
anytype, func, op_name="builtin.module", deduplicate=True
)
mod = apply_registered_pass(mod, "linalg-generalize-named-ops")
mod = apply_registered_pass(mod, "linalg-fuse-elementwise-ops")
identity_layout = LayoutMapOption.IdentityLayoutMap
mod = apply_registered_pass(mod, "eliminate-empty-tensors")
mod = OneShotBufferizeOp(
mod,
allow_return_allocs_from_loops=False,
bufferize_function_boundaries=True,
function_boundary_type_conversion=identity_layout,
)
mod = apply_registered_pass(
mod,
"drop-equivalent-buffer-results",
options={"modify-public-functions": True},
)

# Run passes to inject deallocations. Don't do this for dealloc_2d, though.
for fname in [
self.benchmark_function_name,
self.payload_function_name,
Expand All @@ -351,56 +313,41 @@ def get_bufferize_schedule(self):
"alloc_wout",
]:
func = match(
mod,
named_sequence.bodyTarget,
ops={"func.func"},
op_attrs={"sym_name": ir.StringAttr.get(fname)},
)
func = apply_registered_pass(func, "buffer-deallocation-pipeline")
mod = transform.get_parent_op(
anytype, func, op_name="builtin.module", deduplicate=True
)
transform.YieldOp()
return schedule

def get_lower_schedule(self):
with schedule_boilerplate() as (schedule, named_sequence):
anytype = transform.AnyOpType.get()
func = match(named_sequence.bodyTarget, ops={"func.func"})
mod = transform.get_parent_op(
anytype, func, op_name="builtin.module", deduplicate=True
)
mod = apply_registered_pass(mod, "convert-linalg-to-parallel-loops")
mod = apply_registered_pass(mod, "scf-parallel-loop-fusion")
mod = apply_registered_pass(mod, "canonicalize")
mod = apply_registered_pass(mod, "expand-strided-metadata")
mod = apply_registered_pass(mod, "lower-affine")
mod = apply_registered_pass(mod, "convert-vector-to-scf")
mod = apply_registered_pass(mod, "convert-scf-to-cf")
mod = apply_registered_pass(mod, "symbol-dce")
mod = apply_registered_pass(mod, "convert-vector-to-llvm")
mod = apply_registered_pass(mod, "canonicalize")
mod = apply_registered_pass(mod, "convert-to-llvm")
mod = apply_registered_pass(mod, "reconcile-unrealized-casts")
mod = apply_registered_pass(mod, "cse")
if self.verbose > 1:
transform.PrintOp(target=mod)
transform.PrintOp(target=func)
transform.YieldOp()
return schedule

def schedule_modules(
self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
) -> list[ir.Module]:
"""Generate schedules:
- sharding propagation, partition, and MPI, tosa-to-linalg
- adding benchmark wrapper
- tile_and_vector
- all the rest"""
return [
self.get_shard_schedule(),
"func.func(sharding-propagation{traversal=forward-backward})",
"func.func(shard-partition)",
"func.func(shard-simplify)",
"func.func(convert-shard-to-mpi)",
"canonicalize",
"func.func(tosa-to-linalg)",
get_bench_wrapper_schedule(self),
tile_and_vector_matmul.create(self.tile_size),
self.get_bufferize_schedule(),
self.get_lower_schedule(),
"linalg-generalize-named-ops",
"eliminate-empty-tensors",
"one-shot-bufferize{bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map}",
"drop-equivalent-buffer-results{modify-public-functions=1}",
schedule,
"convert-linalg-to-parallel-loops",
"scf-parallel-loop-fusion",
"canonicalize",
"expand-strided-metadata",
"lower-affine",
"convert-vector-to-scf",
"convert-scf-to-cf",
"symbol-dce",
"convert-vector-to-llvm",
"canonicalize",
"convert-to-llvm",
"reconcile-unrealized-casts",
"cse",
]


Expand Down
48 changes: 11 additions & 37 deletions examples/workload/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,9 @@
from mlir import ir
from mlir.runtime.np_to_memref import get_ranked_memref_descriptor
from mlir.dialects import func, linalg, bufferization
from mlir.dialects import transform
from mlir.execution_engine import ExecutionEngine

from lighthouse import dialects as lh_dialects
from lighthouse.pipeline.helper import match
from lighthouse.pipeline.opt import PassBundles, apply_bundle

from lighthouse.workload import Workload, execute, benchmark, get_bench_wrapper_schedule

Expand Down Expand Up @@ -121,41 +118,18 @@ def payload(A, B, C):

return mod

def schedule_modules(
def pipeline(
self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
) -> ir.Module:
schedule_module = ir.Module.create()
schedule_module.operation.attributes["transform.with_named_sequence"] = (
ir.UnitAttr.get()
)
with ir.InsertionPoint(schedule_module.body):
named_sequence = transform.named_sequence(
"__transform_main",
[transform.AnyOpType.get()],
[],
arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}],
)
with ir.InsertionPoint(named_sequence.body):
anytype = transform.AnyOpType.get()
func = match(named_sequence.bodyTarget, ops={"func.func"})
mod = transform.get_parent_op(
anytype,
func,
op_name="builtin.module",
deduplicate=True,
)
mod = apply_bundle(mod, PassBundles["BufferizationBundle"])
mod = apply_bundle(mod, PassBundles["MLIRLoweringBundle"])
mod = apply_bundle(mod, PassBundles["CleanupBundle"])

if stop_at_stage == "bufferized":
transform.YieldOp()
return [schedule_module]

mod = apply_bundle(mod, PassBundles["LLVMLoweringBundle"])
transform.YieldOp()

return [get_bench_wrapper_schedule(self), schedule_module]
) -> list[str]:
pline = [
get_bench_wrapper_schedule(self),
"BufferizationBundle",
"MLIRLoweringBundle",
"CleanupBundle",
]
if stop_at_stage == "bufferized":
return pline
return pline + ["LLVMLoweringBundle"]


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/xegpu/matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def payload_module(self) -> ir.Module:
)
return mod

def schedule_modules(
def pipeline(
self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
) -> list[ir.Module]:
return [
Expand Down
2 changes: 1 addition & 1 deletion examples/xegpu/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def payload_module(self) -> ir.Module:
)
return mod

def schedule_modules(
def pipeline(
self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
) -> list[ir.Module]:
return [
Expand Down
Loading
Loading