diff --git a/cogdl/models/nn/gcn.py b/cogdl/models/nn/gcn.py
index 71b3e9e4..a926afdd 100644
--- a/cogdl/models/nn/gcn.py
+++ b/cogdl/models/nn/gcn.py
@@ -49,6 +49,50 @@ def forward(self, input, edge_index, edge_attr=None):
     def __repr__(self):
         return self.__class__.__name__ + " (" + str(self.in_features) + " -> " + str(self.out_features) + ")"
 
+class GraphConvolutionBGNorm(nn.Module):
+    """
+    Simple GCN layer with BingGe Normalization, similar to https://arxiv.org/abs/1609.02907
+    """
+
+    def __init__(self, in_features, out_features, bias=True):
+        super(GraphConvolutionBGNorm, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
+        if bias:
+            self.bias = Parameter(torch.FloatTensor(out_features))
+        else:
+            self.register_parameter("bias", None)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        stdv = 1.0 / math.sqrt(self.weight.size(1))
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            self.bias.data.zero_()
+
+    def forward(self, input, edge_index, edge_attr=None):
+        if edge_attr is None:
+            edge_attr = torch.ones(edge_index.shape[1]).float().to(input.device)
+        normalization_attr = torch.ones(input.shape[0]).float().to(input.device)
+        normalization_idx = torch.arange(input.shape[0]).repeat(2,1).to(input.device)
+        edge_attr = torch.cat((edge_attr, normalization_attr)).to(input.device)
+        edge_index = torch.cat((edge_index, normalization_idx), dim=1)
+        adj = torch.sparse_coo_tensor(
+            edge_index,
+            edge_attr,
+            (input.shape[0], input.shape[0]),
+        ).to(input.device)
+        support = torch.mm(input, self.weight)
+        output = torch.spmm(adj, support)
+        if self.bias is not None:
+            return output + self.bias
+        else:
+            return output
+
+    def __repr__(self):
+        return self.__class__.__name__ + " (" + str(self.in_features) + " -> " + str(self.out_features) + ")"
+
 
 @register_model("gcn")
 class TKipfGCN(BaseModel):
@@ -100,3 +144,57 @@ def forward(self, x, adj):
 
     def predict(self, data):
         return self.forward(data.x, data.edge_index)
+
+@register_model("gcnbg")
+class TKipfGCNBGNorm(BaseModel):
+    r"""The GCN model from the `"Semi-Supervised Classification with Graph Convolutional Networks"
+    <https://arxiv.org/abs/1609.02907>`_ paper
+
+    We implement this model with BingGe normalization
+
+    Args:
+        num_features (int) : Number of input features.
+        num_classes (int) : Number of classes.
+        hidden_size (int) : The dimension of node representation.
+        dropout (float) : Dropout rate for model training.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument("--num-features", type=int)
+        parser.add_argument("--num-classes", type=int)
+        parser.add_argument("--hidden-size", type=int, default=64)
+        parser.add_argument("--dropout", type=float, default=0.5)
+        # fmt: on
+
+    @classmethod
+    def build_model_from_args(cls, args):
+        return cls(args.num_features, args.hidden_size, args.num_classes, args.dropout)
+
+    def __init__(self, in_feats, hidden_size, out_feats, dropout):
+        super(TKipfGCNBGNorm, self).__init__()
+
+        self.gc1 = GraphConvolutionBGNorm(in_feats, hidden_size)
+        self.gc2 = GraphConvolutionBGNorm(hidden_size, out_feats)
+        self.bn = nn.BatchNorm1d(hidden_size)
+        self.dropout = dropout
+        # self.nonlinear = nn.SELU()
+
+    def forward(self, x, adj):
+        device = x.device
+        adj_values = torch.ones(adj.shape[1]).to(device)
+        adj, adj_values = add_remaining_self_loops(adj, adj_values, 1, x.shape[0])
+        deg = spmm(adj, adj_values, torch.ones(x.shape[0], 1).to(device)).squeeze()
+        deg_sqrt = deg.pow(-1 / 2)
+        adj_values = deg_sqrt[adj[1]] * adj_values * deg_sqrt[adj[0]]
+
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = F.relu(self.bn(self.gc1(x, adj, adj_values)))
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = self.gc2(x, adj, adj_values)
+        return x
+
+    def predict(self, data):
+        return self.forward(data.x, data.edge_index)
diff --git a/cogdl/options.py b/cogdl/options.py
index 48507f37..d8cd76a3 100644
--- a/cogdl/options.py
+++ b/cogdl/options.py
@@ -24,6 +24,7 @@ def get_parser():
                         help='which GPU to use')
     parser.add_argument('--save-dir', default='.', type=str)
     parser.add_argument('--enhance', type=str, default=None, help='use prone or prone++ to enhance embedding')
+    parser.add_argument('--dropedge', default=0.0, help='the drop edge probability')
 
     # fmt: on
     return parser
diff --git a/cogdl/tasks/node_classification.py b/cogdl/tasks/node_classification.py
index 6bc215cc..03dd08d1 100644
--- a/cogdl/tasks/node_classification.py
+++ b/cogdl/tasks/node_classification.py
@@ -97,6 +97,13 @@ def __init__(
 
         self.dataset = dataset
         self.data = dataset[0]
+
+        # add dropedge args
+        self.dropedge = float(args.dropedge)
+        # store the original edge index
+        self.original_edge_idx = torch.tensor(self.data.edge_index)
+        self.original_edge_num = self.original_edge_idx.shape[1]
+
         args.num_features = dataset.num_features
         args.num_classes = dataset.num_classes
         args.num_nodes = dataset.data.x.shape[0]
@@ -168,6 +175,13 @@ def train(self):
     def _train_step(self):
         self.model.train()
         self.optimizer.zero_grad()
+
+        # drop the edge
+        remaining_edge_num = int((1 - self.dropedge) * self.original_edge_num)
+        perm = np.random.permutation(self.original_edge_num)
+        remaining_edge = perm[:remaining_edge_num]
+        self.data.edge_index = self.original_edge_idx[:, remaining_edge]
+
         self.model.node_classification_loss(self.data).backward()
         self.optimizer.step()
 
diff --git a/match.yml b/match.yml
index c03eb28f..bdefd393 100644
--- a/match.yml
+++ b/match.yml
@@ -25,6 +25,8 @@ node_classification:
     - sgcpn
     - sgc
     - dropedge_gcn
+    - gunet
+    - gcnbg
     - unet
     - pprgo
     dataset:
diff --git a/tests/tasks/test_node_classification.py b/tests/tasks/test_node_classification.py
index 5adb8027..a615dcca 100644
--- a/tests/tasks/test_node_classification.py
+++ b/tests/tasks/test_node_classification.py
@@ -22,6 +22,7 @@ def get_default_args():
         "missing_rate": -1,
         "task": "node_classification",
         "dataset": "cora",
+        "dropedge": 0.0,
     }
     return build_args_from_dict(default_dict)
 
@@ -646,6 +647,16 @@ def test_dropedge_inceptiongcn_cora():
     ret = task.train()
     assert 0 <= ret["Acc"] <= 1
 
+def test_dropedge_gcnbg_citeseer():
+    args = get_default_args()
+    args.task = "node_classification"
+    args.dataset = "citeseer"
+    args.model = "gcnbg"
+    args.dropedge = 0.05
+    task = build_task(args)
+    ret = task.train()
+    assert 0 <= ret["Acc"] <= 1
+
 
 def test_pprgo_cora():
     args = get_default_args()
@@ -707,4 +718,5 @@ def test_pprgo_cora():
     test_dropedge_inceptiongcn_cora()
     test_dropedge_densegcn_cora()
     test_unet_cora()
+    test_dropedge_gcnbg_citeseer()
     test_pprgo_cora()