dlc2action.model.transformer

  1#
  2# Copyright 2020-present by A. Mathis Group and contributors. All rights reserved.
  3#
  4# This project and all its files are licensed under GNU AGPLv3 or later version. 
  5# A copy is included in dlc2action/LICENSE.AGPL.
  6#
  7import copy
  8import math
  9
 10import torch
 11from dlc2action.model.base_model import Model
 12from torch import nn
 13from torch.nn import functional as F
 14
 15
 16class Interpolate(nn.Module):
 17    def __init__(self, size, mode):
 18        super(Interpolate, self).__init__()
 19        self.interp = F.interpolate
 20        self.size = size
 21        self.mode = mode
 22
 23    def forward(self, x):
 24        """Forward pass."""
 25        x = self.interp(x, size=self.size, mode=self.mode, align_corners=False)
 26        return x
 27
 28
 29class FeedForward(nn.Module):
 30    def __init__(self, d_model, dropout=0.1):
 31        super().__init__()
 32        # We set d_ff as a default to 2048
 33        self.linear_1 = nn.Conv1d(d_model, d_model, 1)
 34        self.dropout = nn.Dropout(dropout)
 35        self.linear_2 = nn.Conv1d(d_model, d_model, 1)
 36
 37    def forward(self, x):
 38        """Forward pass."""
 39        x = self.dropout(F.relu(self.linear_1(x)))
 40        x = self.linear_2(x)
 41        return x
 42
 43
 44class MultiHeadAttention(nn.Module):
 45    def __init__(self, heads, d_model, dropout=0.1):
 46        super().__init__()
 47
 48        self.d_model = d_model
 49        self.d_k = d_model // heads
 50        self.h = heads
 51
 52        self.q_linear = nn.Linear(d_model, d_model)
 53        self.v_linear = nn.Linear(d_model, d_model)
 54        self.k_linear = nn.Linear(d_model, d_model)
 55        self.dropout = nn.Dropout(dropout)
 56        self.out = nn.Linear(d_model, d_model)
 57
 58    def forward(self, q, k, v, mask=None):
 59        """Forward pass."""
 60        bs = q.size(0)
 61        q = q.transpose(1, 2)
 62        v = v.transpose(1, 2)
 63        k = k.transpose(1, 2)
 64
 65        # perform linear operation and split into h heads
 66        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
 67        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
 68        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
 69
 70        # transpose to get dimensions bs * h * sl * d_model
 71
 72        k = k.transpose(1, 2)
 73        q = q.transpose(1, 2)
 74        v = v.transpose(1, 2)
 75        # calculate attention using function we will define next
 76        scores = attention(q, k, v, self.d_k, mask, self.dropout)
 77
 78        # concatenate heads and put through final linear layer
 79        concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
 80
 81        output = self.out(concat).transpose(1, 2)
 82
 83        return output
 84
 85
 86def attention(q, k, v, d_k, mask=None, dropout=None):
 87    """Attention mechanism."""
 88    scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
 89    if mask is not None:
 90        mask = mask.unsqueeze(1)
 91        scores = scores.masked_fill(mask == 0, -1e9)
 92    scores = F.softmax(scores, dim=-1)
 93
 94    if dropout is not None:
 95        scores = dropout(scores)
 96
 97    output = torch.matmul(scores, v)
 98    return output
 99
100
101class EncoderLayer(nn.Module):
102    def __init__(self, d_model, heads, dropout=0.1):
103        super().__init__()
104        self.norm_1 = nn.BatchNorm1d(d_model)
105        self.norm_2 = nn.BatchNorm1d(d_model)
106        self.attn = MultiHeadAttention(heads, d_model)
107        self.ff = FeedForward(d_model)
108        self.dropout_1 = nn.Dropout(dropout)
109        self.dropout_2 = nn.Dropout(dropout)
110
111    def forward(self, x, mask):
112        """Forward pass."""
113        x2 = self.norm_1(x)
114        x = x + self.dropout_1(self.attn(x2, x2, x2, mask))
115        x2 = self.norm_2(x)
116        x = x + self.dropout_2(self.ff(x2))
117        return x
118
119
120def get_clones(module, N):
121    """Create N identical modules."""
122    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
123
124
125class Encoder(nn.Module):
126    def __init__(self, d_model, N, heads, len_segment):
127        super().__init__()
128        self.pe = PositionalEncoder(d_model, max_seq_len=len_segment)
129        self.layers = get_clones(EncoderLayer(d_model, heads), N)
130        self.norm = nn.BatchNorm1d(d_model)
131
132    def forward(self, src, mask):
133        """Forward pass."""
134        x = self.pe(src)
135        for layer in self.layers:
136            x = layer(x, mask)
137        return self.norm(x)
138
139
140class PositionalEncoder(nn.Module):
141    def __init__(self, d_model, max_seq_len=512):
142        super().__init__()
143        self.d_model = d_model
144
145        # create constant 'pe' matrix with values dependent on
146        # pos and i
147        pe = torch.zeros(d_model, max_seq_len)
148        for pos in range(max_seq_len):
149            for i in range(0, d_model, 2):
150                pe[i, pos] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
151                if i + 1 < d_model:
152                    pe[i + 1, pos] = math.cos(
153                        pos / (10000 ** ((2 * (i + 1)) / d_model))
154                    )
155
156        self.pe = pe.unsqueeze(0)
157
158    def forward(self, x):
159        """Forward pass."""
160        # make embeddings relatively larger
161        x = x * math.sqrt(self.d_model)
162        # add constant to embedding
163        seq_len = x.size(-1)
164        x = x + self.pe[:, :, :seq_len].to(x.device)
165        return x
166
167
168class TransformerModule(nn.Module):
169    def __init__(
170        self, heads, N, d_model, input_dim, output_dim, len_segment, num_pool=3, add_batchnorm=True
171    ):
172        super(TransformerModule, self).__init__()
173        self.encoder = Encoder(d_model, N, heads, len_segment)
174        self.in_layers = nn.ModuleList()
175        self.out_layers = nn.ModuleList()
176        layer = nn.ModuleList()
177        layer.append(nn.Conv1d(input_dim, d_model, 3, padding=1))
178        layer.append(nn.ReLU())
179        if num_pool > 0:
180            layer.append(nn.MaxPool1d(2, 2))
181        self.in_layers.append(layer)
182        for _ in range(num_pool - 1):
183            layer = nn.ModuleList()
184            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
185            layer.append(nn.ReLU())
186            if add_batchnorm:
187                layer.append(nn.BatchNorm1d(d_model))
188            layer.append(nn.MaxPool1d(2, 2))
189            self.in_layers.append(layer)
190        for _ in range(num_pool):
191            layer = nn.ModuleList()
192            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
193            layer.append(nn.ReLU())
194            if add_batchnorm:
195                layer.append(nn.BatchNorm1d(d_model))
196            self.out_layers.append(layer)
197        self.conv_out = nn.Conv1d(d_model, output_dim, 3, padding=1)
198
199    def forward(self, x):
200        """Forward pass."""
201        sizes = []
202        for layer_list in self.in_layers:
203            sizes.append(x.shape[-1])
204            for layer in layer_list:
205                x = layer(x)
206        mask = (x.sum(1).unsqueeze(1) != 0).int()
207        x = self.encoder(x, mask)
208        sizes = sizes[::-1]
209        for i, (layer_list, size) in enumerate(zip(self.out_layers, sizes)):
210            for layer in layer_list:
211                x = layer(x)
212            x = F.interpolate(x, size)
213        x = self.conv_out(x)
214        return x
215
216
217class Predictor(nn.Module):
218    def __init__(self, dim, num_classes):
219        super(Predictor, self).__init__()
220        self.conv_out_1 = nn.Conv1d(dim, 64, kernel_size=1)
221        self.conv_out_2 = nn.Conv1d(64, num_classes, kernel_size=1)
222
223    def forward(self, x):
224        """Forward pass."""
225        x = self.conv_out_1(x)
226        x = F.relu(x)
227        x = self.conv_out_2(x)
228        return x
229
230
231class Transformer(Model):
232    """
233    A modification of Transformer-Encoder with additional max-pooling and upsampling
234
235    Set `num_pool` to 0 to get a standard transformer-encoder.
236    """
237
238    def __init__(
239        self,
240        N,
241        heads,
242        num_f_maps,
243        input_dim,
244        num_classes,
245        num_pool,
246        len_segment,
247        add_batchnorm=False,
248        feature_dim=None,
249        state_dict_path=None,
250        ssl_constructors=None,
251        ssl_types=None,
252        ssl_modules=None,
253    ):
254        input_dim = sum([x[0] for x in input_dim.values()])
255        if feature_dim is None:
256            feature_dim = num_classes
257            self.f_shape = None
258            self.params_predictor = None
259        else:
260            self.f_shape = torch.Size([int(feature_dim)])
261            self.params_predictor = {
262                "dim": int(feature_dim),
263                "num_classes": int(num_classes),
264            }
265        self.params = {
266            "d_model": int(float(num_f_maps)),
267            "len_segment": int(float(len_segment)),  # "max_seq_len
268            "input_dim": int(float(input_dim)),
269            "N": int(float(N)),
270            "heads": int(float(heads)),
271            "add_batchnorm": bool(add_batchnorm),
272            "num_pool": int(float(num_pool)),
273            "output_dim": int(float(feature_dim)),
274        }
275        super().__init__(ssl_constructors, ssl_modules, ssl_types, state_dict_path)
276
277    def _feature_extractor(self):
278        return TransformerModule(**self.params)
279
280    def _predictor(self) -> torch.nn.Module:
281        if self.params_predictor is None:
282            return nn.Identity()
283        else:
284            return Predictor(**self.params_predictor)
285
286    def features_shape(self) -> torch.Size:
287        return self.f_shape
class Interpolate(torch.nn.modules.module.Module):
17class Interpolate(nn.Module):
18    def __init__(self, size, mode):
19        super(Interpolate, self).__init__()
20        self.interp = F.interpolate
21        self.size = size
22        self.mode = mode
23
24    def forward(self, x):
25        """Forward pass."""
26        x = self.interp(x, size=self.size, mode=self.mode, align_corners=False)
27        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

Interpolate(size, mode)
18    def __init__(self, size, mode):
19        super(Interpolate, self).__init__()
20        self.interp = F.interpolate
21        self.size = size
22        self.mode = mode

Initialize internal Module state, shared by both nn.Module and ScriptModule.

interp
size
mode
def forward(self, x):
24    def forward(self, x):
25        """Forward pass."""
26        x = self.interp(x, size=self.size, mode=self.mode, align_corners=False)
27        return x

Forward pass.

class FeedForward(torch.nn.modules.module.Module):
30class FeedForward(nn.Module):
31    def __init__(self, d_model, dropout=0.1):
32        super().__init__()
33        # We set d_ff as a default to 2048
34        self.linear_1 = nn.Conv1d(d_model, d_model, 1)
35        self.dropout = nn.Dropout(dropout)
36        self.linear_2 = nn.Conv1d(d_model, d_model, 1)
37
38    def forward(self, x):
39        """Forward pass."""
40        x = self.dropout(F.relu(self.linear_1(x)))
41        x = self.linear_2(x)
42        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

FeedForward(d_model, dropout=0.1)
31    def __init__(self, d_model, dropout=0.1):
32        super().__init__()
33        # We set d_ff as a default to 2048
34        self.linear_1 = nn.Conv1d(d_model, d_model, 1)
35        self.dropout = nn.Dropout(dropout)
36        self.linear_2 = nn.Conv1d(d_model, d_model, 1)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

linear_1
dropout
linear_2
def forward(self, x):
38    def forward(self, x):
39        """Forward pass."""
40        x = self.dropout(F.relu(self.linear_1(x)))
41        x = self.linear_2(x)
42        return x

Forward pass.

class MultiHeadAttention(torch.nn.modules.module.Module):
45class MultiHeadAttention(nn.Module):
46    def __init__(self, heads, d_model, dropout=0.1):
47        super().__init__()
48
49        self.d_model = d_model
50        self.d_k = d_model // heads
51        self.h = heads
52
53        self.q_linear = nn.Linear(d_model, d_model)
54        self.v_linear = nn.Linear(d_model, d_model)
55        self.k_linear = nn.Linear(d_model, d_model)
56        self.dropout = nn.Dropout(dropout)
57        self.out = nn.Linear(d_model, d_model)
58
59    def forward(self, q, k, v, mask=None):
60        """Forward pass."""
61        bs = q.size(0)
62        q = q.transpose(1, 2)
63        v = v.transpose(1, 2)
64        k = k.transpose(1, 2)
65
66        # perform linear operation and split into h heads
67        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
68        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
69        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
70
71        # transpose to get dimensions bs * h * sl * d_model
72
73        k = k.transpose(1, 2)
74        q = q.transpose(1, 2)
75        v = v.transpose(1, 2)
76        # calculate attention using function we will define next
77        scores = attention(q, k, v, self.d_k, mask, self.dropout)
78
79        # concatenate heads and put through final linear layer
80        concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
81
82        output = self.out(concat).transpose(1, 2)
83
84        return output

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

MultiHeadAttention(heads, d_model, dropout=0.1)
46    def __init__(self, heads, d_model, dropout=0.1):
47        super().__init__()
48
49        self.d_model = d_model
50        self.d_k = d_model // heads
51        self.h = heads
52
53        self.q_linear = nn.Linear(d_model, d_model)
54        self.v_linear = nn.Linear(d_model, d_model)
55        self.k_linear = nn.Linear(d_model, d_model)
56        self.dropout = nn.Dropout(dropout)
57        self.out = nn.Linear(d_model, d_model)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

d_model
d_k
h
q_linear
v_linear
k_linear
dropout
out
def forward(self, q, k, v, mask=None):
59    def forward(self, q, k, v, mask=None):
60        """Forward pass."""
61        bs = q.size(0)
62        q = q.transpose(1, 2)
63        v = v.transpose(1, 2)
64        k = k.transpose(1, 2)
65
66        # perform linear operation and split into h heads
67        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
68        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
69        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
70
71        # transpose to get dimensions bs * h * sl * d_model
72
73        k = k.transpose(1, 2)
74        q = q.transpose(1, 2)
75        v = v.transpose(1, 2)
76        # calculate attention using function we will define next
77        scores = attention(q, k, v, self.d_k, mask, self.dropout)
78
79        # concatenate heads and put through final linear layer
80        concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
81
82        output = self.out(concat).transpose(1, 2)
83
84        return output

Forward pass.

def attention(q, k, v, d_k, mask=None, dropout=None):
87def attention(q, k, v, d_k, mask=None, dropout=None):
88    """Attention mechanism."""
89    scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
90    if mask is not None:
91        mask = mask.unsqueeze(1)
92        scores = scores.masked_fill(mask == 0, -1e9)
93    scores = F.softmax(scores, dim=-1)
94
95    if dropout is not None:
96        scores = dropout(scores)
97
98    output = torch.matmul(scores, v)
99    return output

Attention mechanism.

class EncoderLayer(torch.nn.modules.module.Module):
102class EncoderLayer(nn.Module):
103    def __init__(self, d_model, heads, dropout=0.1):
104        super().__init__()
105        self.norm_1 = nn.BatchNorm1d(d_model)
106        self.norm_2 = nn.BatchNorm1d(d_model)
107        self.attn = MultiHeadAttention(heads, d_model)
108        self.ff = FeedForward(d_model)
109        self.dropout_1 = nn.Dropout(dropout)
110        self.dropout_2 = nn.Dropout(dropout)
111
112    def forward(self, x, mask):
113        """Forward pass."""
114        x2 = self.norm_1(x)
115        x = x + self.dropout_1(self.attn(x2, x2, x2, mask))
116        x2 = self.norm_2(x)
117        x = x + self.dropout_2(self.ff(x2))
118        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

EncoderLayer(d_model, heads, dropout=0.1)
103    def __init__(self, d_model, heads, dropout=0.1):
104        super().__init__()
105        self.norm_1 = nn.BatchNorm1d(d_model)
106        self.norm_2 = nn.BatchNorm1d(d_model)
107        self.attn = MultiHeadAttention(heads, d_model)
108        self.ff = FeedForward(d_model)
109        self.dropout_1 = nn.Dropout(dropout)
110        self.dropout_2 = nn.Dropout(dropout)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

norm_1
norm_2
attn
ff
dropout_1
dropout_2
def forward(self, x, mask):
112    def forward(self, x, mask):
113        """Forward pass."""
114        x2 = self.norm_1(x)
115        x = x + self.dropout_1(self.attn(x2, x2, x2, mask))
116        x2 = self.norm_2(x)
117        x = x + self.dropout_2(self.ff(x2))
118        return x

Forward pass.

def get_clones(module, N):
121def get_clones(module, N):
122    """Create N identical modules."""
123    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

Create N identical modules.

class Encoder(torch.nn.modules.module.Module):
126class Encoder(nn.Module):
127    def __init__(self, d_model, N, heads, len_segment):
128        super().__init__()
129        self.pe = PositionalEncoder(d_model, max_seq_len=len_segment)
130        self.layers = get_clones(EncoderLayer(d_model, heads), N)
131        self.norm = nn.BatchNorm1d(d_model)
132
133    def forward(self, src, mask):
134        """Forward pass."""
135        x = self.pe(src)
136        for layer in self.layers:
137            x = layer(x, mask)
138        return self.norm(x)

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

Encoder(d_model, N, heads, len_segment)
127    def __init__(self, d_model, N, heads, len_segment):
128        super().__init__()
129        self.pe = PositionalEncoder(d_model, max_seq_len=len_segment)
130        self.layers = get_clones(EncoderLayer(d_model, heads), N)
131        self.norm = nn.BatchNorm1d(d_model)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

pe
layers
norm
def forward(self, src, mask):
133    def forward(self, src, mask):
134        """Forward pass."""
135        x = self.pe(src)
136        for layer in self.layers:
137            x = layer(x, mask)
138        return self.norm(x)

Forward pass.

class PositionalEncoder(torch.nn.modules.module.Module):
141class PositionalEncoder(nn.Module):
142    def __init__(self, d_model, max_seq_len=512):
143        super().__init__()
144        self.d_model = d_model
145
146        # create constant 'pe' matrix with values dependent on
147        # pos and i
148        pe = torch.zeros(d_model, max_seq_len)
149        for pos in range(max_seq_len):
150            for i in range(0, d_model, 2):
151                pe[i, pos] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
152                if i + 1 < d_model:
153                    pe[i + 1, pos] = math.cos(
154                        pos / (10000 ** ((2 * (i + 1)) / d_model))
155                    )
156
157        self.pe = pe.unsqueeze(0)
158
159    def forward(self, x):
160        """Forward pass."""
161        # make embeddings relatively larger
162        x = x * math.sqrt(self.d_model)
163        # add constant to embedding
164        seq_len = x.size(-1)
165        x = x + self.pe[:, :, :seq_len].to(x.device)
166        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

PositionalEncoder(d_model, max_seq_len=512)
142    def __init__(self, d_model, max_seq_len=512):
143        super().__init__()
144        self.d_model = d_model
145
146        # create constant 'pe' matrix with values dependent on
147        # pos and i
148        pe = torch.zeros(d_model, max_seq_len)
149        for pos in range(max_seq_len):
150            for i in range(0, d_model, 2):
151                pe[i, pos] = math.sin(pos / (10000 ** ((2 * i) / d_model)))
152                if i + 1 < d_model:
153                    pe[i + 1, pos] = math.cos(
154                        pos / (10000 ** ((2 * (i + 1)) / d_model))
155                    )
156
157        self.pe = pe.unsqueeze(0)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

d_model
pe
def forward(self, x):
159    def forward(self, x):
160        """Forward pass."""
161        # make embeddings relatively larger
162        x = x * math.sqrt(self.d_model)
163        # add constant to embedding
164        seq_len = x.size(-1)
165        x = x + self.pe[:, :, :seq_len].to(x.device)
166        return x

Forward pass.

class TransformerModule(torch.nn.modules.module.Module):
169class TransformerModule(nn.Module):
170    def __init__(
171        self, heads, N, d_model, input_dim, output_dim, len_segment, num_pool=3, add_batchnorm=True
172    ):
173        super(TransformerModule, self).__init__()
174        self.encoder = Encoder(d_model, N, heads, len_segment)
175        self.in_layers = nn.ModuleList()
176        self.out_layers = nn.ModuleList()
177        layer = nn.ModuleList()
178        layer.append(nn.Conv1d(input_dim, d_model, 3, padding=1))
179        layer.append(nn.ReLU())
180        if num_pool > 0:
181            layer.append(nn.MaxPool1d(2, 2))
182        self.in_layers.append(layer)
183        for _ in range(num_pool - 1):
184            layer = nn.ModuleList()
185            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
186            layer.append(nn.ReLU())
187            if add_batchnorm:
188                layer.append(nn.BatchNorm1d(d_model))
189            layer.append(nn.MaxPool1d(2, 2))
190            self.in_layers.append(layer)
191        for _ in range(num_pool):
192            layer = nn.ModuleList()
193            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
194            layer.append(nn.ReLU())
195            if add_batchnorm:
196                layer.append(nn.BatchNorm1d(d_model))
197            self.out_layers.append(layer)
198        self.conv_out = nn.Conv1d(d_model, output_dim, 3, padding=1)
199
200    def forward(self, x):
201        """Forward pass."""
202        sizes = []
203        for layer_list in self.in_layers:
204            sizes.append(x.shape[-1])
205            for layer in layer_list:
206                x = layer(x)
207        mask = (x.sum(1).unsqueeze(1) != 0).int()
208        x = self.encoder(x, mask)
209        sizes = sizes[::-1]
210        for i, (layer_list, size) in enumerate(zip(self.out_layers, sizes)):
211            for layer in layer_list:
212                x = layer(x)
213            x = F.interpolate(x, size)
214        x = self.conv_out(x)
215        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

TransformerModule( heads, N, d_model, input_dim, output_dim, len_segment, num_pool=3, add_batchnorm=True)
170    def __init__(
171        self, heads, N, d_model, input_dim, output_dim, len_segment, num_pool=3, add_batchnorm=True
172    ):
173        super(TransformerModule, self).__init__()
174        self.encoder = Encoder(d_model, N, heads, len_segment)
175        self.in_layers = nn.ModuleList()
176        self.out_layers = nn.ModuleList()
177        layer = nn.ModuleList()
178        layer.append(nn.Conv1d(input_dim, d_model, 3, padding=1))
179        layer.append(nn.ReLU())
180        if num_pool > 0:
181            layer.append(nn.MaxPool1d(2, 2))
182        self.in_layers.append(layer)
183        for _ in range(num_pool - 1):
184            layer = nn.ModuleList()
185            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
186            layer.append(nn.ReLU())
187            if add_batchnorm:
188                layer.append(nn.BatchNorm1d(d_model))
189            layer.append(nn.MaxPool1d(2, 2))
190            self.in_layers.append(layer)
191        for _ in range(num_pool):
192            layer = nn.ModuleList()
193            layer.append(nn.Conv1d(d_model, d_model, 3, padding=1))
194            layer.append(nn.ReLU())
195            if add_batchnorm:
196                layer.append(nn.BatchNorm1d(d_model))
197            self.out_layers.append(layer)
198        self.conv_out = nn.Conv1d(d_model, output_dim, 3, padding=1)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

encoder
in_layers
out_layers
conv_out
def forward(self, x):
200    def forward(self, x):
201        """Forward pass."""
202        sizes = []
203        for layer_list in self.in_layers:
204            sizes.append(x.shape[-1])
205            for layer in layer_list:
206                x = layer(x)
207        mask = (x.sum(1).unsqueeze(1) != 0).int()
208        x = self.encoder(x, mask)
209        sizes = sizes[::-1]
210        for i, (layer_list, size) in enumerate(zip(self.out_layers, sizes)):
211            for layer in layer_list:
212                x = layer(x)
213            x = F.interpolate(x, size)
214        x = self.conv_out(x)
215        return x

Forward pass.

class Predictor(torch.nn.modules.module.Module):
218class Predictor(nn.Module):
219    def __init__(self, dim, num_classes):
220        super(Predictor, self).__init__()
221        self.conv_out_1 = nn.Conv1d(dim, 64, kernel_size=1)
222        self.conv_out_2 = nn.Conv1d(64, num_classes, kernel_size=1)
223
224    def forward(self, x):
225        """Forward pass."""
226        x = self.conv_out_1(x)
227        x = F.relu(x)
228        x = self.conv_out_2(x)
229        return x

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

Predictor(dim, num_classes)
219    def __init__(self, dim, num_classes):
220        super(Predictor, self).__init__()
221        self.conv_out_1 = nn.Conv1d(dim, 64, kernel_size=1)
222        self.conv_out_2 = nn.Conv1d(64, num_classes, kernel_size=1)

Initialize internal Module state, shared by both nn.Module and ScriptModule.

conv_out_1
conv_out_2
def forward(self, x):
224    def forward(self, x):
225        """Forward pass."""
226        x = self.conv_out_1(x)
227        x = F.relu(x)
228        x = self.conv_out_2(x)
229        return x

Forward pass.

class Transformer(dlc2action.model.base_model.Model):
232class Transformer(Model):
233    """
234    A modification of Transformer-Encoder with additional max-pooling and upsampling
235
236    Set `num_pool` to 0 to get a standard transformer-encoder.
237    """
238
239    def __init__(
240        self,
241        N,
242        heads,
243        num_f_maps,
244        input_dim,
245        num_classes,
246        num_pool,
247        len_segment,
248        add_batchnorm=False,
249        feature_dim=None,
250        state_dict_path=None,
251        ssl_constructors=None,
252        ssl_types=None,
253        ssl_modules=None,
254    ):
255        input_dim = sum([x[0] for x in input_dim.values()])
256        if feature_dim is None:
257            feature_dim = num_classes
258            self.f_shape = None
259            self.params_predictor = None
260        else:
261            self.f_shape = torch.Size([int(feature_dim)])
262            self.params_predictor = {
263                "dim": int(feature_dim),
264                "num_classes": int(num_classes),
265            }
266        self.params = {
267            "d_model": int(float(num_f_maps)),
268            "len_segment": int(float(len_segment)),  # "max_seq_len
269            "input_dim": int(float(input_dim)),
270            "N": int(float(N)),
271            "heads": int(float(heads)),
272            "add_batchnorm": bool(add_batchnorm),
273            "num_pool": int(float(num_pool)),
274            "output_dim": int(float(feature_dim)),
275        }
276        super().__init__(ssl_constructors, ssl_modules, ssl_types, state_dict_path)
277
278    def _feature_extractor(self):
279        return TransformerModule(**self.params)
280
281    def _predictor(self) -> torch.nn.Module:
282        if self.params_predictor is None:
283            return nn.Identity()
284        else:
285            return Predictor(**self.params_predictor)
286
287    def features_shape(self) -> torch.Size:
288        return self.f_shape

A modification of Transformer-Encoder with additional max-pooling and upsampling

Set num_pool to 0 to get a standard transformer-encoder.

Transformer( N, heads, num_f_maps, input_dim, num_classes, num_pool, len_segment, add_batchnorm=False, feature_dim=None, state_dict_path=None, ssl_constructors=None, ssl_types=None, ssl_modules=None)
239    def __init__(
240        self,
241        N,
242        heads,
243        num_f_maps,
244        input_dim,
245        num_classes,
246        num_pool,
247        len_segment,
248        add_batchnorm=False,
249        feature_dim=None,
250        state_dict_path=None,
251        ssl_constructors=None,
252        ssl_types=None,
253        ssl_modules=None,
254    ):
255        input_dim = sum([x[0] for x in input_dim.values()])
256        if feature_dim is None:
257            feature_dim = num_classes
258            self.f_shape = None
259            self.params_predictor = None
260        else:
261            self.f_shape = torch.Size([int(feature_dim)])
262            self.params_predictor = {
263                "dim": int(feature_dim),
264                "num_classes": int(num_classes),
265            }
266        self.params = {
267            "d_model": int(float(num_f_maps)),
268            "len_segment": int(float(len_segment)),  # "max_seq_len
269            "input_dim": int(float(input_dim)),
270            "N": int(float(N)),
271            "heads": int(float(heads)),
272            "add_batchnorm": bool(add_batchnorm),
273            "num_pool": int(float(num_pool)),
274            "output_dim": int(float(feature_dim)),
275        }
276        super().__init__(ssl_constructors, ssl_modules, ssl_types, state_dict_path)

Initialize the model.

Parameters

ssl_constructors : list, optional a list of SSL constructors that build the necessary SSL modules ssl_modules : list, optional a list of torch.nn.Module instances that will serve as SSL modules ssl_types : list, optional a list of string SSL types state_dict_path : str, optional path to the model state dictionary to load strict : bool, default False when True, the state dictionary will only be loaded if the current and the loaded architecture are the same; otherwise missing or extra keys, as well as shaoe inconsistencies, are ignored prompt_function : callable, optional a function that takes a list of strings and returns a string prompt

params
def features_shape(self) -> torch.Size:
287    def features_shape(self) -> torch.Size:
288        return self.f_shape

Get the shape of feature extractor output.

Returns

feature_shape : torch.Size shape of feature extractor output