CV_final_Project/model.py at master · Winstonllllai/CV_final_Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import torch.nn as nn

# Basic Block
class Basic_C2D_Block(nn.Module):
    def __init__(self, in_dim, out_dim, k_size, stride, is_BN):
        super(Basic_C2D_Block, self).__init__()
        self.conv_1 = nn.Conv2d(
            in_dim, out_dim, kernel_size=k_size, stride=stride, padding=k_size // 2
        )
        self.bn_1 = nn.BatchNorm2d(out_dim) if is_BN else nn.Identity()
        self.lrelu = nn.LeakyReLU(inplace=False)

    def forward(self, x):
        y = self.conv_1(x)
        y = self.bn_1(y)
        return self.lrelu(y)

# Residual Block
class Res_C2D_Block(nn.Module):
    def __init__(self, in_dim, out_dim, num_blocks, stride=1):
        super(Res_C2D_Block, self).__init__()

        layers = []
        for i in range(num_blocks):
            layers.append(
                Basic_C2D_Block(
                    in_dim=in_dim if i == 0 else out_dim,
                    out_dim=out_dim,
                    k_size=3,
                    stride=stride if i == 0 else 1,
                    is_BN=False,
                )
            )
        self.blocks = nn.Sequential(*layers)

        self.adjust_residual = None
        if in_dim != out_dim or stride != 1:
            self.adjust_residual = nn.Sequential(
                nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_dim),
            )

    def forward(self, x):
        residual = x
        if self.adjust_residual:
            residual = self.adjust_residual(x)

        y = self.blocks(x)
        y += residual
        return nn.LeakyReLU(inplace=False)(y)

class ActorCriticCNN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(ActorCriticCNN, self).__init__()

        channels, _, _ = input_shape

        self.basic = Basic_C2D_Block(channels, 24, k_size=4, stride=4, is_BN=False)
        self.res1  = Res_C2D_Block(24, 48, num_blocks=2, stride=2)
        self.res2  = Res_C2D_Block(48, 96, num_blocks=2, stride=2)

        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)  # Shared pooling layer

        # Actor head (outputs action logits)
        self.actor_fc = nn.Linear(96, num_actions)

        # Critic head (outputs state value)
        self.critic_fc = nn.Linear(96, 1)

    def forward(self, x):
        x = self.basic(x)
        x = self.res1(x)
        x = self.res2(x)

        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)

        # Actor and Critic branches
        action_logits = self.actor_fc(x)  # Actor head
        state_value = self.critic_fc(x)   # Critic head

        return action_logits, state_value