text-classification/models.py at master · silknow/text-classification · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Text classification model
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import ModuleList as List


class ConvBlock(nn.Module):
    def __init__(self, input_dim, slen, out_channels, kernel_size,
                 activation='elu'):
        super(ConvBlock, self).__init__()
        # print(f'Conv: k={kernel_size} filters={out_channels}')
        self.conv = nn.Conv1d(input_dim, out_channels, kernel_size)

        if activation == 'relu':
            self.activation = F.relu
        elif activation == 'elu':
            self.activation = F.elu
        elif activation == 'gelu':
            self.activation = F.gelu
        else:
            raise ValueError(f'invalid activation f={activation}')

        self.pool = nn.MaxPool1d(slen - kernel_size + 1)

    def forward(self, x):
        x = self.conv(x)
        x = self.activation(x)
        x = self.pool(x)
        return x


class TextCNN(nn.Module):
    def __init__(self, embeddings, embeddings_freeze, slen, output_size,
                 dropout_p=0.1,
                 kernel_sizes=(3, 4, 5),
                 channels_outs=(100, 100, 100),
                 hidden_size=100,
                 activation='relu',
                 alpha_dropout=False):
        """
        channels_out[s] is the ouput of the convolution which converts
        channels_in into channels_out. this is commonly called n_filters or
        n_feature_maps and "conceptually corresponds" to the number of
        features extracted by a convolution

        SCNN uses ELUs and AlphaDropout to create a self-normalizing CNN
        The idea is to build smaller (less filters) networks with the same
        performance.
        """
        super(TextCNN, self).__init__()
        self.nk = len(kernel_sizes)
        self.dim_e = embeddings.shape[1]
        self.dim_sum_filter = sum(channels_outs)  # sum of all channels_out
        self.hidden_size = hidden_size

        if activation == 'relu':
            activation_function = F.relu
        elif activation == 'elu':
            activation_function = F.elu
        elif activation == 'gelu':
            activation_function = F.gelu

        # embedding
        self.embed = nn.Embedding.from_pretrained(torch.Tensor(embeddings),
                                                  freeze=embeddings_freeze)

        # Convolution Block
        # by default pytorch uses Lecun Intialization for convolutions
        self.conv_blocks = List([ConvBlock(self.dim_e, slen, f, k, activation)
                                 for k, f in zip(kernel_sizes, channels_outs)])

        # dropout
        self.dropout = None
        self.dropout2 = None
        if alpha_dropout:
            self.dropout = nn.AlphaDropout(dropout_p)
        else:
            self.dropout = nn.Dropout(dropout_p)

        # a fc hidden layer to squeeze into a desired size
        if hidden_size > 0:
            self.fc = nn.Linear(self.dim_sum_filter, self.hidden_size)
            if alpha_dropout:
                self.dropout2 = nn.AlphaDropout(dropout_p)
            else:
                self.dropout2 = nn.Dropout(dropout_p)
            self.fc_act = activation_function
            # output
            self.output = nn.Linear(self.hidden_size, output_size)
        # no squeezing
        else:
            self.output = nn.Linear(self.dim_sum_filter, output_size)

    def forward(self, inputs):
        """Expects fixed length sequences as input. inputs (batch, slen).
        i.e. all batches must have the same fixed length.
        """
        # inputs is (batch, slen)
        x = self.embed(inputs)  # x is (batch, seq, dim_e)
        x = x.transpose(1, 2)   # x is (batch, dim_e, slen)
        # because conv1d requires (batch, channels_in=dim_e, slen)
        # to produce an output (batch, channels_out, slen - k + 1)
        # we then pool1d (kernel=slen-k) over the output of conv1d
        # since 1d works along time (i.e. sequence) this means
        # we get (batch, channels_out=cshannels_outs_k, 1) which we squeeze
        conv_blocks_out = [self.conv_blocks[n](x).squeeze(-1)
                           for n in range(self.nk)]
        # and finally we concatenate all our conv1ds with different kernel
        # sizes together to get (batch, sum_k(channels_outs_k))
        # i.e. we concat the channels_out (i.e. featutres)
        x = torch.cat(conv_blocks_out, dim=1)

        # and do some dropout
        x = self.dropout(x)

        # squeeze into hidden
        if self.hidden_size > 0:
            x = self.fc(x)
            x = self.dropout2(x)
            x = self.fc_act(x)

        # map to classes and return
        out = self.output(x)

        # return with hidden outputs
        if self.hidden_size > 0:
            return out, x

        # return without hidden
        return out


def sn_create_default_model(embeddings, slen, output_size):
    """Create a model with the parameters as defined in D3.3.
    """
    dropout_p = 0.4
    kernel_sizes = (2, 3, 4)
    channels_out = (100, 100, 100)
    hidden_size = 0
    activation = 'gelu'
    alpha_dropout = True

    model = TextCNN(embeddings, True, slen, output_size,
                    dropout_p=dropout_p,
                    kernel_sizes=kernel_sizes,
                    channels_outs=channels_out,
                    hidden_size=hidden_size,
                    activation=activation,
                    alpha_dropout=alpha_dropout)

    return model


def sn_create_multimodal_model(embeddings, slen, output_size, hidden_size=500):
    """Create a model with the parameters defined for multimodal experiments.
    """
    dropout_p = 0.4
    kernel_sizes = (2, 3, 4)
    channels_out = (100, 100, 100)
    activation = 'gelu'
    alpha_dropout = True

    model = TextCNN(embeddings, True, slen, output_size,
                    dropout_p=dropout_p,
                    kernel_sizes=kernel_sizes,
                    channels_outs=channels_out,
                    hidden_size=hidden_size,
                    activation=activation,
                    alpha_dropout=alpha_dropout)

    return model