Seq2seq model이란
Attention 모듈 동작 원리 복습
attention score 계산 방법 → Weight Matrix를 사용해 학습이 가능
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.gru = nn.GRU(
input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=True if num_dirs > 1 else False,
dropout=dropout
)
self.linear = nn.Linear(num_dirs * hidden_size, hidden_size)
def forward(self, batch, batch_lens): # batch: (B, S_L), batch_lens: (B)
# d_w: word embedding size
batch_emb = self.embedding(batch) # (B, S_L, d_w)
batch_emb = batch_emb.transpose(0, 1) # (S_L, B, d_w)
packed_input = pack_padded_sequence(batch_emb, batch_lens)
h_0 = torch.zeros((num_layers * num_dirs, batch.shape[0], hidden_size)) # (num_layers*num_dirs, B, d_h) = (4, B, d_h)
packed_outputs, h_n = self.gru(packed_input, h_0) # h_n: (4, B, d_h)
outputs = pad_packed_sequence(packed_outputs)[0] # outputs: (S_L, B, 2d_h)
forward_hidden = h_n[-2, :, :]
backward_hidden = h_n[-1, :, :]
hidden = self.linear(torch.cat((forward_hidden, backward_hidden), dim=-1)).unsqueeze(0) # (1, B, d_h)
return outputs, hidden
forward_hidden = h_n[-2, :, :]
, backward_hidden = h_n[-1, :, :]
으로 마지막 layer의 hidden state를 각각 구하고 마지막에 합쳐준다.