adressing review comments

fschlimb · fschlimb · commit ea13d00e29fd · 2026-03-12T04:18:22.000-07:00
diff --git a/examples/feed-forward-mpi/README.md b/examples/feed-forward-mpi/README.md
@@ -4,7 +4,7 @@ This example shows how MLIR's sharding infrastructure can be used to distribute
 
 Currently, only the lower part of the sharding pipeline is used: `shard-partition`, `convert-shard-to-mpi`, and lowering to LLVM. Therefore, the ingress MLIR is fully annotated.
 
-The example implements a single feed-fowrad layer, following a 1D/2D weight-stationary partition strategy as described in figures 2a and 2b of https://arxiv.org/pdf/2211.05102.
+The example implements a single feed-forwad layer, following a 1D/2D weight-stationary partition strategy as described in figures 2a and 2b of https://arxiv.org/pdf/2211.05102.
 
 ## Prerequisites
 
diff --git a/examples/feed-forward-mpi/feed-forward-mpi.py b/examples/feed-forward-mpi/feed-forward-mpi.py
@@ -32,6 +32,7 @@
 )
 from lighthouse.pipeline.helper import apply_registered_pass, match
 from lighthouse.workload import Workload, benchmark, get_bench_wrapper_schedule
+from lighthouse.schedule.utils import schedule_boilerplate
 from lighthouse.schedule.x86 import tile_and_vector_matmul
 from ff_weight_stationary import generate_ff_payload
 
@@ -290,24 +291,8 @@ def find_factors(n):
 
         return mod
 
-    def schedule_modules(
-        self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
-    ) -> list[ir.Module]:
-        """Generate two schedules: one that deals with sharding propagation, partition, and MPI.
-        Another one for all the rest"""
-        pre_schedule = ir.Module.create()
-        pre_schedule.operation.attributes["transform.with_named_sequence"] = (
-            ir.UnitAttr.get()
-        )
-        with ir.InsertionPoint(pre_schedule.body):
-            named_sequence = transform.named_sequence(
-                "__transform_pre",
-                [transform.AnyOpType.get()],
-                [],
-                arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}],
-            )
-        with ir.InsertionPoint(named_sequence.body):
-            anytype = transform.AnyOpType.get()
+    def get_shard_schedule(self):
+        with schedule_boilerplate() as (schedule, named_sequence):
             func = match(named_sequence.bodyTarget, ops={"func.func"})
             func = apply_registered_pass(
                 func,
@@ -317,7 +302,6 @@ def schedule_modules(
             if self.verbose > 0:
                 transform.PrintOp(target=func)
             func = apply_registered_pass(func, "shard-partition")
-            func = apply_registered_pass(func, "canonicalize")
             if self.verbose > 0:
                 transform.PrintOp(target=func)
             func = apply_registered_pass(func, "shard-simplify")
@@ -329,36 +313,17 @@ def schedule_modules(
                 transform.PrintOp(target=func)
             func = apply_registered_pass(func, "tosa-to-linalg")
             transform.YieldOp()
-            func = None
-
-        bench_schedule = get_bench_wrapper_schedule(self)
-
-        tile_schedule = tile_and_vector_matmul.create(self.tile_size)
+        return schedule
 
-        main_schedule = ir.Module.create()
-        main_schedule.operation.attributes["transform.with_named_sequence"] = (
-            ir.UnitAttr.get()
-        )
-        with ir.InsertionPoint(main_schedule.body):
-            named_sequence = transform.named_sequence(
-                "__transform_main",
-                [transform.AnyOpType.get()],
-                [],
-                arg_attrs=[{"transform.readonly": ir.UnitAttr.get()}],
-            )
-        with ir.InsertionPoint(named_sequence.body):
+    def get_lower_schedule(self):
+        with schedule_boilerplate() as (schedule, named_sequence):
             anytype = transform.AnyOpType.get()
             func = match(named_sequence.bodyTarget, ops={"func.func"})
             mod = transform.get_parent_op(
                 anytype, func, op_name="builtin.module", deduplicate=True
             )
             mod = apply_registered_pass(mod, "linalg-generalize-named-ops")
-            mod = apply_registered_pass(mod, "canonicalize")
             mod = apply_registered_pass(mod, "linalg-fuse-elementwise-ops")
-            mod = apply_registered_pass(mod, "arith-expand")
-            mod = apply_registered_pass(mod, "memref-expand")
-            mod = apply_registered_pass(mod, "empty-tensor-to-alloc-tensor")
-            mod = apply_registered_pass(mod, "canonicalize")
             identity_layout = LayoutMapOption.IdentityLayoutMap
             mod = OneShotBufferizeOp(
                 mod,
@@ -371,27 +336,18 @@ def schedule_modules(
                 "drop-equivalent-buffer-results",
                 options={"modify-public-functions": True},
             )
-            mod = apply_registered_pass(mod, "expand-realloc")
-            mod = apply_registered_pass(mod, "canonicalize")
             mod = apply_registered_pass(mod, "buffer-deallocation-simplification")
             mod = apply_registered_pass(mod, "bufferization-lower-deallocations")
             mod = apply_registered_pass(mod, "cse")
             mod = apply_registered_pass(mod, "canonicalize")
-            mod = apply_registered_pass(mod, "convert-bufferization-to-memref")
             mod = apply_registered_pass(mod, "convert-linalg-to-parallel-loops")
             mod = apply_registered_pass(mod, "scf-parallel-loop-fusion")
             mod = apply_registered_pass(mod, "canonicalize")
-            mod = apply_registered_pass(mod, "fold-memref-alias-ops")
             mod = apply_registered_pass(mod, "expand-strided-metadata")
-            mod = apply_registered_pass(mod, "convert-math-to-funcs")
             mod = apply_registered_pass(mod, "lower-affine")
             mod = apply_registered_pass(mod, "convert-vector-to-scf")
             mod = apply_registered_pass(mod, "convert-scf-to-cf")
             mod = apply_registered_pass(mod, "symbol-dce")
-            mod = apply_registered_pass(mod, "finalize-memref-to-llvm")
-            mod = apply_registered_pass(mod, "convert-math-to-llvm")
-            mod = apply_registered_pass(mod, "convert-math-to-libm")
-            mod = apply_registered_pass(mod, "convert-func-to-llvm")
             mod = apply_registered_pass(mod, "convert-vector-to-llvm")
             mod = apply_registered_pass(mod, "canonicalize")
             mod = apply_registered_pass(mod, "convert-to-llvm")
@@ -400,8 +356,22 @@ def schedule_modules(
             if self.verbose > 1:
                 transform.PrintOp(target=mod)
             transform.YieldOp()
+        return schedule
 
-        return [pre_schedule, bench_schedule, tile_schedule, main_schedule]
+    def schedule_modules(
+        self, stop_at_stage: Optional[str] = None, parameters: Optional[dict] = None
+    ) -> list[ir.Module]:
+        """Generate schedules:
+        - sharding propagation, partition, and MPI, tosa-to-linalg
+        - adding benchmark wrapper
+        - tile_and_vector
+        - all the rest"""
+        return [
+            self.get_shard_schedule(),
+            get_bench_wrapper_schedule(self),
+            tile_and_vector_matmul.create(self.tile_size),
+            self.get_lower_schedule(),
+        ]
 
 
 if __name__ == "__main__":
diff --git a/examples/feed-forward-mpi/ff_weight_stationary.py b/examples/feed-forward-mpi/ff_weight_stationary.py
@@ -124,7 +124,7 @@ def _(a, b, c, r):
             res = bufferization.materialize_in_destination(
                 t_mk,
                 sd_res,
-                sd_r,  # , restrict=True, writable=True
+                sd_r,
             )
             return shard.shard(res, sh_act, annotate_for_users=True)
 
diff --git a/lighthouse/schedule/pattern_schedule.py b/lighthouse/schedule/pattern_schedule.py
@@ -1,7 +1,6 @@
-from contextlib import contextmanager
 from mlir import rewrite, ir
 from mlir.dialects import ext, transform
-from mlir.dialects.transform import AnyOpType
+from lighthouse.schedule.utils import schedule_boilerplate
 
 
 @ext.register_dialect
@@ -35,21 +34,6 @@ def populate_patterns(
     return RewritePattern
 
 
-@contextmanager
-def schedule_boilerplate():
-    schedule = ir.Module.create()
-    schedule.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
-    with ir.InsertionPoint(schedule.body):
-        named_sequence = transform.NamedSequenceOp(
-            "__transform_main",
-            [AnyOpType.get()],
-            [AnyOpType.get()],
-            arg_attrs=[{"transform.consumed": ir.UnitAttr.get()}],
-        )
-        with ir.InsertionPoint(named_sequence.body):
-            yield schedule, named_sequence
-
-
 def pattern_rewrite_schedule(patterns: dict, pname: str = "rewrite_pattern"):
     """Return a transform module that applies the given rewrite patterns.
     patterns: dict mapping op names to match-and-rewrite functions.
diff --git a/lighthouse/schedule/utils.py b/lighthouse/schedule/utils.py
@@ -0,0 +1,18 @@
+from contextlib import contextmanager
+from mlir import ir
+from mlir.dialects import transform
+
+
+@contextmanager
+def schedule_boilerplate():
+    schedule = ir.Module.create()
+    schedule.operation.attributes["transform.with_named_sequence"] = ir.UnitAttr.get()
+    with ir.InsertionPoint(schedule.body):
+        named_sequence = transform.NamedSequenceOp(
+            "__transform_main",
+            [transform.AnyOpType.get()],
+            [transform.AnyOpType.get()],
+            arg_attrs=[{"transform.consumed": ir.UnitAttr.get()}],
+        )
+        with ir.InsertionPoint(named_sequence.body):
+            yield schedule, named_sequence
diff --git a/lighthouse/workload/runner.py b/lighthouse/workload/runner.py
@@ -66,21 +66,17 @@ def execute(
                 raise ValueError("Benchmark verification failed.")
 
 
-def bench_wrapper_pattern(funcname: str, get_bench_name=None):
+def bench_wrapper_pattern(funcname: str, benchname=None):
     """Returns a rewrite pattern that matches a function named `funcname` and clones it
-    as a new function with name given by `get_bench_name(funcname)` (default: "bench_" + funcname).
+    as a new function with name given by `benchname` (default: "bench_" + funcname).
     The new function is a benchmark wrapper that calls the payload function and times it.
     Every function call is timed separately. Returns the times (seconds) in a memref,
     which is passed as an additional argument to the benchmark function.
     It also takes two additional arguments for the number of runs and warmup iterations.
     """
-    marker = "__wrapped__"
-    if get_bench_name is None:
-
-        def default_bench_name(name):
-            return f"bench_{name}"
-
-        get_bench_name = default_bench_name
+    marker = "__bench_wrapped__"
+    if benchname is None:
+        benchname = f"bench_{funcname}"
 
     def match_and_rewrite(op, rewriter):
         if op.name.value != funcname:
@@ -100,7 +96,7 @@ def match_and_rewrite(op, rewriter):
             index_t = ir.IndexType.get()
             args = payload_arguments + [time_memref_t, index_t, index_t]
 
-            @func_cif(*args, name=get_bench_name(funcname))
+            @func_cif(*args, name=benchname)
             def bench(*args):
                 index_t = ir.IndexType.get()
                 zero = arith.constant(index_t, 0)
@@ -129,7 +125,7 @@ def get_bench_wrapper_schedule(workload: Workload):
         {
             "func.func": bench_wrapper_pattern(
                 workload.payload_function_name,
-                lambda name: workload.benchmark_function_name,
+                workload.benchmark_function_name,
             )
         },
         "add_bench_pattern",
diff --git a/lighthouse/workload/workload.py b/lighthouse/workload/workload.py
@@ -68,7 +68,15 @@ def lower_payload(
         schedule_modules = self.schedule_modules(
             stop_at_stage=dump_payload, parameters=schedule_parameters
         )
-        assert isinstance(schedule_modules, list)
+        if not isinstance(schedule_modules, list):
+            raise TypeError(
+                f"schedule_modules() must return a list of ir.Module instances, "
+                f"got {type(schedule_modules).__name__}"
+            )
+        if not schedule_modules:
+            raise ValueError(
+                "schedule_modules() must return at least one schedule module."
+            )
         if not dump_payload or dump_payload != "initial":
             for schedule_module in schedule_modules:
                 # apply schedule on payload module

Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,7 @@ def _(a, b, c, r):`
`124`	`124`	`res = bufferization.materialize_in_destination(`
`125`	`125`	`t_mk,`
`126`	`126`	`sd_res,`
`127`		`- sd_r, # , restrict=True, writable=True`
	`127`	`+ sd_r,`
`128`	`128`	`)`
`129`	`129`	`return shard.shard(res, sh_act, annotate_for_users=True)`
`130`	`130`