深度学习pytorch笔记:TCN
1 TCN
1.1 主要架构
- 因果卷积
- TCN使用因果卷积来确保在预测未来值时,只会使用当前和过去的信息,而不会出现信息泄露
- 扩张卷积
- 通过扩张卷积,TCN可以在不丢失时间分辨率的情况下增加感受野
- 此时kernel size=2,dilation=[1,2,4,8]
- 一般膨胀系数是kernel size的指数次方,即1,2,4,8,16,32…
- 无偏移填充
- 为了保持输出的时间长度与输入相同,TCN在卷积操作前使用了一种特殊的填充方式
- 不仅如此,每一个隐层的输入输出的时间长度都相同
- 为了保持输出的时间长度与输入相同,TCN在卷积操作前使用了一种特殊的填充方式
2 pytorch 实现
2.1 导入库
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
2.2 裁剪模块
如上图,把输出后的x5'和x6'裁剪掉
class Chomp1d(nn.Module):
def __init__(self, chomp_size):
super(Chomp1d, self).__init__()
self.chomp_size = chomp_size
def forward(self, x):
return x[:, :, :-self.chomp_size].contiguous()
2.3 TemporalBlock
每一个TemporalBlock是由两个因果扩张卷积组成的
2.3.1 __init__
class TemporalBlock(nn.Module):
def __init__(self,
n_inputs,
n_outputs,
kernel_size,
stride,
dilation,
padding,
dropout=0.2):
super(TemporalBlock, self).__init__()
#######################一个扩张因果卷积#####################
self.conv1 = weight_norm(nn.Conv1d(n_inputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation))
#(Batch, output_channel, seq_len + padding)
self.chomp1 = Chomp1d(padding) # 裁剪掉多出来的padding部分,维持输出时间步为seq_len
#(Batch, output_channel, seq_len)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
#############################################################
####################这个Temporal Block中的另一个扩张因果卷积#################
#和同一个Temporal Block的第一个扩张因果卷积唯一的区别是,他是n_outputs到n_outputs
self.conv2 = weight_norm(nn.Conv1d(n_outputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation))
self.chomp2 = Chomp1d(padding)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(dropout)
#############################################################
self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
self.conv2, self.chomp2, self.relu2, self.dropout2)
self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
#一般TCN都需要输入和输出的通道数一样,如果不一样,需要downsampling以下
self.relu = nn.ReLU()
self.init_weights()
2.3.2 参数初始化
def init_weights(self):
"""
参数初始化
:return:
"""
self.conv1.weight.data.normal_(0, 0.01)
self.conv2.weight.data.normal_(0, 0.01)
if self.downsample is not None:
self.downsample.weight.data.normal_(0, 0.01)
2.3.3 forward
def forward(self, x):
#x: (Batch, input_channel, seq_len)
out = self.net(x)
res = x if self.downsample is None else self.downsample(x)
return self.relu(out + res)
#残差连接,防止层数过深之后的梯度消失
2.4 TCN整体
2.4.1 __init__
class TemporalConvNet(nn.Module):
def __init__(self,
num_inputs,
num_channels,
kernel_size=2,
dropout=0.2):
"""
num_channels: list,每层的hidden_channel数,例如[25,25,25,25]表示有4个隐层,每层hidden_channel数为25
"""
super(TemporalConvNet, self).__init__()
layers = []
num_levels = len(num_channels)
#需要多长层卷积单元(2.3的内容)
for i in range(num_levels):
dilation_size = 2 ** i
# 膨胀系数:1,2,4,8……
#这可以保证没有历史时刻没被考虑到
in_channels = num_inputs if i == 0 else num_channels[i-1]
# 确定每一层的输入通道数
out_channels = num_channels[i]
# 确定每一层的输出通道数
layers += [TemporalBlock(in_channels,
out_channels,
kernel_size,
stride=1,
dilation=dilation_size,
padding=(kernel_size-1) * dilation_size,
dropout=dropout)]
self.network = nn.Sequential(*layers)
注:以下是个人观点:这边kernel_size=2(TCN的标准做法),所以此时dilation = 2**i还是dilation = kernel_size**i是无所谓的,但如果 kernel_size > 2
,则可以改用 kernel_size**i
以进一步扩大感受野
2.4.2 forward
def forward(self, x):
"""
x: (Batch, input_channel, seq_len)
:return: (Batch, output_channel, seq_len)
"""
return self.network(x)