Merge pull request #493 from VainF/v2.0

VainF · web-flow · commit e17a53afa65f · 2025-06-13T17:43:15.000+08:00
V2.0
diff --git a/.github/workflows/test_torch_200.yml b/.github/workflows/test_torch_200.yml
@@ -19,7 +19,7 @@ jobs:
       uses: actions/setup-python@v3
       with:
         python-version: "3.10"
-        cache: 'pip' # caching pip dependencies
+        # cache: 'pip' # caching pip dependencies
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/tests/test_concat.py b/tests/test_concat.py
@@ -6,40 +6,48 @@
 import torch_pruning as tp
 import torch.nn as nn
 
-class Net(nn.Module):
-    def __init__(self, in_dim):
+class MLP(nn.Module):
+    def __init__(self, input_dim, output_layer=True, dims=None, dropout=0):
         super().__init__()
-        self.block1 = nn.Sequential(
-            nn.Conv2d(in_dim, in_dim, 1),
-            nn.BatchNorm2d(in_dim),
-            nn.GELU(),
-            nn.Conv2d(in_dim, in_dim, 1),
-            nn.BatchNorm2d(in_dim)
-        )
-        self.parallel_path = nn.Sequential(
-            nn.Conv2d(in_dim, in_dim, 1),
-            nn.BatchNorm2d(in_dim),
-            nn.GELU(),
-            nn.Conv2d(in_dim, in_dim//2, 1),
-            nn.BatchNorm2d(in_dim//2)
-        )
-        self.block2 = nn.Sequential(
-            nn.Conv2d(in_dim * 2 + in_dim//2, in_dim, 1),
-            nn.BatchNorm2d(in_dim)
-        )
-        
+        if dims is None:
+            dims = []
+        layers = list()
+        for i_dim in dims:
+            layers.append(nn.Linear(input_dim, i_dim))
+            layers.append(nn.BatchNorm1d(i_dim))
+            layers.append(nn.ReLU())
+            layers.append(nn.Dropout(p=dropout))
+            input_dim = i_dim
+        if output_layer:
+            layers.append(nn.Linear(input_dim, 1))
+        self.mlp = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.mlp(x)
+
+class widedeep(nn.Module):
+    def __init__(self, input_dim):
+        super(widedeep, self).__init__()
+        self.dims = input_dim
+
+        self.mlp = MLP(self.dims, True, dims=[32,16], dropout=0.2)
+        self.linear = nn.Linear(self.dims, 3)
+        self.lin2 = nn.Linear(4, 1)
+
     def forward(self, x):
-        x = self.block1(x)
-        x2 = self.parallel_path(x)
-        x = torch.cat([x, x, x2], dim=1)
-        x = self.block2(x)
+        x = x.reshape(x.shape[0], -1)
+        mlp_out = self.mlp(x)
+        linear_out = self.linear(x)
+        x = torch.concat([linear_out, mlp_out], dim=-1)
+        x = self.lin2(x)
+        x = torch.sigmoid(x)
         return x
     
 def test_pruner():
-    model = Net(512)
+    model = widedeep(32)
     print(model)
     # Global metrics
-    example_inputs = torch.randn(1, 512, 7, 7)
+    example_inputs = torch.randn(1, 32)
     imp = tp.importance.MagnitudeImportance(p=2)
     ignored_layers = []
 
diff --git a/torch_pruning/_helpers.py b/torch_pruning/_helpers.py
@@ -69,7 +69,6 @@ def __init__(self, offset, reverse=False):
         self.reverse = reverse
 
     def __call__(self, idxs: _HybridIndex):
-
         if self.reverse == True:
             new_idxs = [
                 _HybridIndex(idx = i.idx - self.offset[0], root_idx=i.root_idx )
diff --git a/torch_pruning/dependency.py b/torch_pruning/dependency.py
@@ -10,6 +10,7 @@
 
 INDEX_MAPPING_PLACEHOLDER = None
 MAX_RECURSION_DEPTH = 500
+MAX_LEGAL_DIM = 100
 
 class Node(object):
     """ Node of DepGraph. 
@@ -365,7 +366,7 @@ def build_dependency(
         # Ignore layers & nn.Parameter
         if ignored_layers is not None:
             self.IGNORED_LAYERS_IN_TRACING.extend(ignored_layers)
-        self.ignored_params = ignored_params
+        self.ignored_params = ignored_params if ignored_params is not None else []
 
         # Ignore all sub-modules of customized layers since they will be handled by the customized pruner
         for layer_type_or_instance in self.CUSTOMIZED_PRUNERS.keys():            
@@ -512,7 +513,7 @@ def _fix_dependency_graph_non_recursive(dep, idxs, *args):
                             )
 
         _fix_dependency_graph_non_recursive(*group[0])
-
+        
         # merge pruning ops
         merged_group = Group() # craft a new group for merging
         for dep, idxs in group.items:
@@ -941,9 +942,9 @@ def _update_slice_index_mapping(self, slice_node: Node):
             return
         grad_fn = slice_node.grad_fn
         if hasattr(grad_fn, '_saved_self_sym_sizes'):
-            if len(grad_fn._saved_self_sym_sizes)==4 and grad_fn._saved_dim != 1:
+            if len(grad_fn._saved_self_sym_sizes)==4 and grad_fn._saved_dim != 1 and grad_fn._saved_dim<MAX_LEGAL_DIM:
                 return
-            elif len(grad_fn._saved_self_sym_sizes)==3 and grad_fn._saved_dim != 2:
+            elif len(grad_fn._saved_self_sym_sizes)==3 and grad_fn._saved_dim != 2 and grad_fn._saved_dim<MAX_LEGAL_DIM:
                 return
     
         start, step, end, dim = slice_node.module.start, slice_node.module.step, slice_node.module.end, slice_node.module.dim
@@ -966,8 +967,6 @@ def _init_shape_information(self):
                 grad_fn = node.grad_fn
 
                 if hasattr(grad_fn, '_saved_self_sizes') or hasattr(grad_fn, '_saved_split_sizes'):
-                    MAX_LEGAL_DIM = 100
-                    
                     if hasattr(grad_fn, '_saved_split_sizes') and hasattr(grad_fn, '_saved_dim') :
                         if grad_fn._saved_dim != 1 and grad_fn._saved_dim < MAX_LEGAL_DIM: # a temp fix for pytorch==1.11, where the _saved_dim is an uninitialized value like 118745347895359
                             continue
@@ -1095,7 +1094,7 @@ def _update_concat_index_mapping(self, cat_node: Node):
         if cat_node.type != ops.OPTYPE.CONCAT:
             return
 
-        if hasattr(cat_node.grad_fn, '_saved_dim') and cat_node.grad_fn._saved_dim != 1: # this only works for Pytorch>=1.12
+        if hasattr(cat_node.grad_fn, '_saved_dim') and cat_node.grad_fn._saved_dim<MAX_LEGAL_DIM and cat_node.grad_fn._saved_dim != 1: # this only works for Pytorch>=1.12
             return 
 
         if cat_node.module.concat_sizes is not None:
@@ -1151,11 +1150,10 @@ def _update_split_index_mapping(self, split_node: Node):
             # There a issue in some pytorch version, where the _saved_dim is an uninitialized value like 118745347895359
             # So we need to check if the _saved_dim is a valid value (<len(_saved_self_sym_sizes) or a nominal value like 20)
             if hasattr(split_node.grad_fn, '_saved_self_sym_sizes'):
-                if split_node.grad_fn._saved_dim<len(split_node.grad_fn._saved_self_sym_sizes) and split_node.grad_fn._saved_dim != 1:
+                if split_node.grad_fn._saved_dim<len(split_node.grad_fn._saved_self_sym_sizes) and split_node.grad_fn._saved_dim<MAX_LEGAL_DIM and split_node.grad_fn._saved_dim != 1:
                     return
             else:
-                THRESHOLD = 20
-                if split_node.grad_fn._saved_dim<THRESHOLD and split_node.grad_fn._saved_dim>=0 and split_node.grad_fn._saved_dim != 1:
+                if split_node.grad_fn._saved_dim>=0 and split_node.grad_fn._saved_dim<MAX_LEGAL_DIM and split_node.grad_fn._saved_dim != 1:
                     return 
         offsets = split_node.module.offsets
 
@@ -1189,7 +1187,7 @@ def _update_unbind_index_mapping(self, unbind_node: Node):
         if unbind_node.type != ops.OPTYPE.UNBIND:
             return
 
-        if hasattr(unbind_node.grad_fn, '_saved_dim') and unbind_node.grad_fn._saved_dim != 0: # For timm attention
+        if hasattr(unbind_node.grad_fn, '_saved_dim') and unbind_node.grad_fn._saved_dim<MAX_LEGAL_DIM and (unbind_node.grad_fn._saved_dim )!= 0: # this only works for Pytorch>=1.12
             return 
 
         num_chunks = len(unbind_node.outputs)