-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear_lstm_model.py
More file actions
104 lines (83 loc) · 3.57 KB
/
linear_lstm_model.py
File metadata and controls
104 lines (83 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from typing import Tuple, Optional, List
import torch.nn as nn
import torch
import dataset
class LinearLSTM(nn.Module):
def __init__(self, embedding_dim: int,
lstm_output_dim : int,
num_classes: int,
extra_non_linear: Optional[int],
label_smoothing: Optional[float]
):
super(LinearLSTM, self).__init__()
self.embedding_dim = embedding_dim
self.lstm_output_dim = lstm_output_dim
self.num_classes = num_classes
self.label_smoothing = label_smoothing
# One learnable vector for every token type.
self.embedding = nn.Embedding(
num_embeddings=dataset.PAD_TOKEN_VALUE+1,
embedding_dim=embedding_dim,
padding_idx=dataset.PAD_TOKEN_VALUE)
self.lstm = nn.LSTM(
input_size=embedding_dim,
hidden_size=lstm_output_dim)
# Output is logit.
linear_output_size = (extra_non_linear if
extra_non_linear is not None
else num_classes)
self.linear = nn.Linear(lstm_output_dim, linear_output_size)
fc_layers : List[nn.Module] = [self.linear]
if extra_non_linear is not None:
fc_layers.append(nn.Tanh())
fc_layers.append(nn.Linear(linear_output_size, num_classes))
self.fc_layers = nn.Sequential(*fc_layers)
# Initialize the linear layer (LSTM has default
# initialization; not sure about the linear)
non_linearity = ('linear' if
extra_non_linear is None
else 'relu')
gain = nn.init.calculate_gain(non_linearity)
torch.nn.init.xavier_uniform_(
self.linear.weight, gain=gain
)
# Default settings is batch average and softmax of inputs.
self.loss = nn.CrossEntropyLoss()
if self.label_smoothing is not None:
self.train_loss = nn.CrossEntropyLoss(
label_smoothing=self.label_smoothing)
else:
self.train_loss = self.loss
self.set_gpu_use()
def named_weights(self):
return [ ]
def forward(self,
progs: torch.Tensor,
lengths: torch.Tensor) -> torch.Tensor:
"""progs is padded sequences. It is T x B where T is the max length
in the batch.
length is a 1D tensor of sequences lengths.
Returns B x self.num_classes - dimensional output.
"""
progs = progs.to(self.device)
T, B = progs.size()
progs_embed = self.embedding(progs)
assert progs_embed.size() == (T, B, self.embedding_dim)
progs_packed = torch.nn.utils.rnn.pack_padded_sequence(
progs_embed, lengths,
enforce_sorted=False
)
# An alternative is to pass (h0, c0) to self.lstm. The default
# is (h0, c0)=(0, 0). One idea is to initialize them
# randomly. Is that ingesting noise?
lstm_output, (h_n, c_n) = self.lstm(progs_packed)
h_n = torch.squeeze(h_n, 0)
logits = self.fc_layers(h_n)
assert logits.size() == (B, self.num_classes), logits.size()
return logits
def set_gpu_use(self):
self.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device [{self.device}].')
assert torch.cuda.is_available()
self.to(self.device)