机器翻译之创建Seq2Seq的编码器、解码器
1.创建编码器、解码器的基类
1.1创建编码器的基类
from torch import nn
#构建编码器的基类
class Encoder(nn.Module): #继承父类nn.Module
def __init__(self, **kwargs): #**kwargs:不定常的关键字参数
super().__init__(**kwargs)
def forward(self, X, *args): #*args:不定常的位置参数
#若继承了Encoder这个基类,就必须实现forward(),否则就会报下这个错
raise NotImplementedError
1.2创建解码器的基类
#创建解码器的基类
#创建解码器的基类比创建编码器的基类多一个 state的初始化
class Decoder(nn.Module):
def __init__(self, **kwargs):
super().__init__(**kwargs)
#初始化state
def init_state(self, enc_outputs, *args):
raise NotImplementedError
#前向传播,解码器比编码器多传入一个state
def forward(self, X, state):
raise NotImplementedError
1.3合并编码器和解码器的基类
class EncoderDecoder(nn.Module):
def __init__(self, encoder, decoder, **kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def forward(self, enc_X, dec_X, *args):
"""
enc_X:编码器需传入的数据
dec_X:解码器需传入的数据
"""
enc_outputs = self.encoder(enc_X, *args)
dec_state = self.decoder.init_state(enc_outputs, *args)
return self.decoder(dec_X, dec_state)
2.基于上述基类,正式创建Seq2Seq编码器与解码器的类
import collections
import math
import torch
import dltools
2.1创建Seq2Seq的编码器类
class Seq2SeqEncoder(Encoder): #继承父类Encoder
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, dropout=0, **kwargs):
super().__init__(**kwargs)
"""
vocab_size:词汇表大小
embed_size:嵌入层大小
num_hiddens:隐藏层的神经元数量
num_layers:隐藏层的层数
dropout=0 : 默认所有的神经元参与计算
"""
#初始化嵌入层
self.embedding = nn.Embedding(vocab_size, embed_size)
#初始化神经网络层
self.rnn = nn.GRU(embed_size, num_hiddens, num_layers, dropout=dropout)
def forward(self, X, *args):
#在进行embedding之前,X的shape=(batch_size, num_steps, vocab_size)
X = self.embedding(X)
#X经过embedding处理,X的shape=(batch_size, num_steps, embed_size)
X = X.permute(1, 0, 2)
#经过permute调换维度之后,X的shape=(num_steps, batch_size, embed_size)
#此时, pytorch 会自动完成隐藏状态的初始化,即0, 不需要手动传入state
outputs, state = self.rnn(X)
#outputs的shape=(num_steps, batch_size, num_hiddens) ,最后一维是神经元的数量
#state的shape=(num_layers, batch_size, num_hiddens)
return outputs, state
#测试代码
encoder = Seq2SeqEncoder(vocab_size=10, embed_size=8, num_hiddens=32, num_layers=2)
encoder.eval()
# batch_size=4, num_steps=7
X = torch.zeros((4, 7), dtype=torch.long)
outputs, state = encoder(X)
print(outputs.shape, state.shape)
torch.Size([7, 4, 16]) torch.Size([2, 4, 16])
2.2 创建Seq2Seq的解码器类
class Seq2SeqDecoder(Decoder):
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, dropout=0, **kwargs):
super().__init__(**kwargs)
#初始化嵌入层
self.embedding = nn.Embedding(vocab_size, embed_size)
#初始化神经网络层
self.rnn = nn.GRU(embed_size + num_hiddens, num_hiddens, num_layers, dropout=dropout)
#初始化输出层
self.dense = nn.Linear(num_hiddens, vocab_size)
#定义函数:获取状态state
def init_state(self, enc_outputs, *args):
#编码器输出的结果有两个,第二个为state
return enc_outputs[1]
#前向传播
def forward(self, X, state):
#X的原始shape=(batch_size, num_steps, vocab_size)
X = self.embedding(X) #X的shape=(batch_size, num_steps, embed_size)
X = X.permute(1, 0, 2) #调整数据维度, X的shape=(num_steps, batch_size, embed_size)
# 把X和state拼接到一起. 方便计算.
# X现在的形状(num_steps, batch_size, embed_size) ,
# state的形状(batch_size, num_hiddens)
# 要把state的形状扩充成三维. 变成(num_steps, batch_size, num_hiddens)
context = state[-1].repeat(X.shape[0], 1, 1) #扩充X.shape[0]=num_steps次,1:所对应的维度不变
X_and_context = torch.cat((X, context), 2) #按照索引为2的维度合并
#此时,X_and_context的shape=(num_steps, batch_size, embed_size+num_hiddens)
#神经网络层
outputs, state = self.rnn(X_and_context, state)
#输出层
outputs = self.dense(outputs).permute(1, 0, 2) #将数据维度重新调换过来
#outputs的shape=(batch_size, num_steps, vocab_size)
#state的shape=(num_layers, batch_size, num_hiddens)
return outputs, state
#测试
decoder = Seq2SeqDecoder(vocab_size=10, embed_size=8, num_hiddens=32, num_layers=2)
decoder.eval()
state = decoder.init_state(encoder(X))
outputs, state = decoder(X, state)
outputs.shape, state.shape
(torch.Size([4, 7, 10]), torch.Size([2, 4, 32]))
3.编码器 、解码器理论图