添加简单循环交换实现及相关测试

Alwaysproblem · Alwaysproblem · commit cd56fa23a7c8 · 2026-03-16T08:52:01.000Z
diff --git a/mlir/optimization/scheduler/CMakeLists.txt b/mlir/optimization/scheduler/CMakeLists.txt
@@ -18,6 +18,7 @@ add_executable(
   lib/FusionFeasibility.cpp
   lib/LivenessAdapter.cpp
   lib/LocalListScheduling.cpp
+  lib/SimpleLoopInterchange.cpp
   )
 
 # add_dependencies(lab-scheduler ToyCh6ShapeInferenceInterfaceIncGen
diff --git a/mlir/optimization/scheduler/include/lab/LabPasses.h b/mlir/optimization/scheduler/include/lab/LabPasses.h
@@ -16,5 +16,6 @@ std::unique_ptr<Pass> createLabLivenessPass();
 std::unique_ptr<Pass> createLabMemrefLifetimePass();
 std::unique_ptr<Pass> createLabFusionFeasibilityPass();
 std::unique_ptr<Pass> createAsyncLocalSchedulePass();
+std::unique_ptr<Pass> createSimpleLoopInterchangePass();
 
 } // namespace mlir
diff --git a/mlir/optimization/scheduler/lab-opt.cpp b/mlir/optimization/scheduler/lab-opt.cpp
@@ -52,7 +52,11 @@ int main(int argc, char **argv) {
       [](mlir::OpPassManager &pm) {
         pm.addPass(mlir::createAsyncLocalSchedulePass());
       });
-
+  mlir::PassPipelineRegistration<>(
+      "simple-loop-interchange", "Simple Loop Interchange Pass",
+      [](mlir::OpPassManager &pm) {
+        pm.addPass(mlir::createSimpleLoopInterchangePass());
+      });
   return mlir::asMainReturnCode(
       mlir::MlirOptMain(argc, argv, "Lab optimizer\n", registry));
 }
diff --git a/mlir/optimization/scheduler/lib/SimpleLoopInterchange.cpp b/mlir/optimization/scheduler/lib/SimpleLoopInterchange.cpp
@@ -0,0 +1,103 @@
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Visitors.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+static bool isPerfectTwoLevelNest(AffineForOp outer, AffineForOp &inner) {
+  Block &body = outer.getRegion().front();
+
+  Operation *firstNonTerminator = nullptr;
+  for (Operation &op : body.without_terminator()) {
+    if (firstNonTerminator)
+      return false; // 外层 body 里不止一个非 terminator op
+    firstNonTerminator = &op;
+  }
+
+  if (!firstNonTerminator)
+    return false;
+
+  inner = dyn_cast<AffineForOp>(firstNonTerminator);
+  return inner != nullptr;
+}
+
+static bool shouldInterchangeByLastIndexHeuristic(AffineForOp outer,
+                                                  AffineForOp inner) {
+  Value outerIV = outer.getInductionVar();
+  Value innerIV = inner.getInductionVar();
+
+  bool outerUsedAsLastIndex = false;
+  bool innerUsedAsLastIndex = false;
+
+  inner.walk([&](Operation *op) {
+    if (auto load = dyn_cast<AffineLoadOp>(op)) {
+      auto indices = load.getIndices();
+      if (!indices.empty()) {
+        if (indices.back() == outerIV)
+          outerUsedAsLastIndex = true;
+        if (indices.back() == innerIV)
+          innerUsedAsLastIndex = true;
+      }
+    }
+    if (auto store = dyn_cast<AffineStoreOp>(op)) {
+      auto indices = store.getIndices();
+      if (!indices.empty()) {
+        if (indices.back() == outerIV)
+          outerUsedAsLastIndex = true;
+        if (indices.back() == innerIV)
+          innerUsedAsLastIndex = true;
+      }
+    }
+  });
+
+  // 如果外层 iv 作为最右索引更常见，而内层不是，则值得尝试交换
+  return outerUsedAsLastIndex && !innerUsedAsLastIndex;
+}
+
+namespace {
+struct SimpleLoopInterchangePass
+    : public PassWrapper<SimpleLoopInterchangePass,
+                         OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SimpleLoopInterchangePass)
+
+  StringRef getArgument() const final { return "lab-simple-loop-interchange"; }
+  StringRef getDescription() const final {
+    return "A simple affine loop interchange pass for perfect 2-level nests";
+  }
+
+  void runOnOperation() override;
+};
+
+void SimpleLoopInterchangePass::runOnOperation() {
+  func::FuncOp func = getOperation();
+
+  SmallVector<AffineForOp> candidates;
+  func.walk([&](AffineForOp forOp) { candidates.push_back(forOp); });
+
+  for (AffineForOp outer : candidates) {
+    AffineForOp inner;
+    if (!isPerfectTwoLevelNest(outer, inner))
+      continue;
+
+    if (!shouldInterchangeByLastIndexHeuristic(outer, inner))
+      continue;
+
+    SmallVector<AffineForOp> loops = {outer, inner};
+    SmallVector<unsigned> perm = {1, 0}; // 交换两层
+
+    if (!isValidLoopInterchangePermutation(loops, perm))
+      continue;
+
+    interchangeLoops(outer, inner);
+  }
+}
+} // namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createSimpleLoopInterchangePass() {
+  return std::make_unique<SimpleLoopInterchangePass>();
+}
+} // namespace mlir
diff --git a/mlir/optimization/scheduler/tests/loop_reorder.mlir b/mlir/optimization/scheduler/tests/loop_reorder.mlir
@@ -0,0 +1,25 @@
+func.func @interchange_me(%A: memref<64x64xf32>, %B: memref<64x64xf32>) {
+  affine.for %i = 0 to 64 {
+    affine.for %j = 0 to 64 {
+      %v = affine.load %A[%j, %i] : memref<64x64xf32>
+      %c = arith.constant 1.0 : f32
+      %r = arith.addf %v, %c : f32
+      affine.store %r, %B[%j, %i] : memref<64x64xf32>
+    }
+  }
+  return
+}
+
+// we assume the tensor is stored in row-major order, so the original loop order is i-j.
+// expected to be transformed to:
+// func.func @interchange_me(%A: memref<64x64xf32>, %B: memref<64x64xf32>) {
+//   affine.for %j = 0 to 64 {
+//     affine.for %i = 0 to 64 {
+//       %v = affine.load %A[%j, %i] : memref<64x64xf32>
+//       %c = arith.constant 1.0 : f32
+//       %r = arith.addf %v, %c : f32
+//       affine.store %r, %B[%j, %i] : memref<64x64xf32>
+//     }
+//   }
+//   return
+// }

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ add_executable(`
`18`	`18`	`lib/FusionFeasibility.cpp`
`19`	`19`	`lib/LivenessAdapter.cpp`
`20`	`20`	`lib/LocalListScheduling.cpp`
	`21`	`+ lib/SimpleLoopInterchange.cpp`
`21`	`22`	`)`
`22`	`23`
`23`	`24`	`# add_dependencies(lab-scheduler ToyCh6ShapeInferenceInterfaceIncGen`