图注意力循环神经网络(AGCRN):基于图嵌入的时间序列预测
图注意力循环神经网络(AGCRN):基于图嵌入的时间序列预测
引言
时间序列预测是机器学习中的一个重要任务,在交通流量预测、股票价格预测等领域有广泛应用。传统的深度学习方法,如LSTM和GRU,虽然在某些情况下表现出色,但在处理图结构数据时(例如交通网络中节点之间的相互作用)显得力不从心。
近年来,一种结合图注意力机制与循环神经网络(RNN)的模型——AGCRN(Attentional Graph Convolutional Recurrent Network),因其在交通流量预测任务中的出色表现而备受关注。本文将详细介绍AGCRN模型,并提供一个基于PyTorch实
现的代码示例。文章最后附源码。
论文:Adaptive Graph Convolutional Recurrent Network for Traffic Forecasting
论文地址:https://arxiv.org/pdf/2007.02842
AGCRN模型概述
模型结构
AGCRN的核心思想是将图结构数据与循环神经网络相结合,利用注意力机制捕捉节点之间的依赖关系。
- 输入层:接收原始的时间序列数据(例如交通流量)和图结构信息。
- 编码器(Encoder):基于图卷积操作和LSTM,提取时序特征。
- 解码器(Decoder):根据编码器输出的隐藏状态,预测未来时间步的值。
图注意力机制
AGCRN的关键创新是引入了图注意力机制。传统的图卷积操作假设节点之间的关系是固定的(通过预定义的邻接矩阵),而注意力机制允许模型自适应地调整不同节点的重要性。
通过节点嵌入(Node Embedding)技术,模型能够将图结构信息转化为低维向量表示,并在此基础上计算节点间的注意力权重。这种方式不仅能够捕捉到局部时空依赖关系,还能灵活应对动态变化的网络拓扑。
代码实现
参数配置
class Args:
def __init__(self):
self.num_nodes = 10 # 假设图中有10个节点
self.input_dim = 1 # 每个节点的特征维度
self.rnn_units = 64 # RNN单元的数量
self.output_dim = 1 # 输出维度
self.horizon = 3 # 预测未来3个时间步
self.num_layers = 2 # 使用2层RNN
self.cheb_k = 3 # 切比雪夫多项式的阶数
self.embed_dim = 20 # 节点嵌入的维度
模型结构
class AGCRN(nn.Module):
def __init__(self, args):
super(AGCRN, self).__init__()
self.num_node = args.num_nodes
self.input_dim = args.input_dim
self.hidden_dim = args.rnn_units
self.output_dim = args.output_dim
self.horizon = args.horizon
self.num_layers = args.num_layers
# 节点嵌入层
self.node_embeddings = nn.Parameter(torch.randn(self.num_node, args.embed_dim), requires_grad=True)
# 编码器:包含图注意力机制和LSTM
self.encoder = AVDGRNN(args.num_nodes, args.input_dim, args.rnn_units,
args.cheb_k, args.embed_dim, args.num_layers)
# 解码器:基于CNN的预测层
self.end_conv = nn.Conv2d(1, self.horizon, (self.rnn_units, 1))
前向传播过程
def forward(self, input, mask):
# 编码器输出特征图
enc_out = self.encoder(input, mask)
# 解码器预测未来时间步
dec_out = self.end_conv(enc_out)
return dec_out.squeeze(1)
模型训练与推理
# 创建模型实例
args = Args()
model = AGCRN(args)
# 定义损失函数和优化器(以均方误差为例)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 假设loader是数据加载器
for epoch in range(num_epochs):
for batch_features, batch_labels in loader:
# 前向传播
outputs = model(batch_features)
loss = criterion(outputs, batch_labels)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
代码结构解析
节点嵌入(Node Embedding)
模型通过随机初始化的参数node_embeddings
将每个节点映射到低维空间。在前向传播过程中,节点嵌入被用于计算注意力权重。
self.node_embeddings = nn.Parameter(torch.randn(self.num_node, args.embed_dim), requires_grad=True)
编码器(Encoder)
编码器的核心是基于图注意力机制的循环神经网络。通过切比雪夫多项式平滑化的图卷积操作,提取节点间的关系特征。
self.encoder = AVDGRNN(args.num_nodes, args.input_dim, args.rnn_units,
args.cheb_k, args.embed_dim, args.num_layers)
解码器(Decoder)
解码器使用一个二维卷积层end_conv
,将编码器输出的特征图转换为多步预测结果。这种方式能够显式地生成未来时间步的序列。
self.end_conv = nn.Conv2d(1, self.horizon, (self.rnn_units, 1))
源码
import torch
import torch.nn.functional as F
import torch.nn as nn
# 论文:Adaptive Graph Convolutional Recurrent Network for Traffic Forecasting
# 论文地址:https://arxiv.org/pdf/2007.02842
class AVWGCN(nn.Module):
def __init__(self, dim_in, dim_out, cheb_k, embed_dim):
super(AVWGCN, self).__init__()
self.cheb_k = cheb_k
self.weights_pool = nn.Parameter(torch.FloatTensor(embed_dim, cheb_k, dim_in, dim_out))
self.bias_pool = nn.Parameter(torch.FloatTensor(embed_dim, dim_out))
def forward(self, x, node_embeddings):
#x shaped[B, N, C], node_embeddings shaped [N, D] -> supports shaped [N, N]
#output shape [B, N, C]
node_num = node_embeddings.shape[0]
supports = F.softmax(F.relu(torch.mm(node_embeddings, node_embeddings.transpose(0, 1))), dim=1)
support_set = [torch.eye(node_num).to(supports.device), supports]
#default cheb_k = 3
for k in range(2, self.cheb_k):
support_set.append(torch.matmul(2 * supports, support_set[-1]) - support_set[-2])
supports = torch.stack(support_set, dim=0)
weights = torch.einsum('nd,dkio->nkio', node_embeddings, self.weights_pool) #N, cheb_k, dim_in, dim_out
bias = torch.matmul(node_embeddings, self.bias_pool) #N, dim_out
x_g = torch.einsum("knm,bmc->bknc", supports, x) #B, cheb_k, N, dim_in
x_g = x_g.permute(0, 2, 1, 3) # B, N, cheb_k, dim_in
x_gconv = torch.einsum('bnki,nkio->bno', x_g, weights) + bias #b, N, dim_out
return x_gconv
class AGCRNCell(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim):
super(AGCRNCell, self).__init__()
self.node_num = node_num
self.hidden_dim = dim_out
self.gate = AVWGCN(dim_in+self.hidden_dim, 2*dim_out, cheb_k, embed_dim)
self.update = AVWGCN(dim_in+self.hidden_dim, dim_out, cheb_k, embed_dim)
def forward(self, x, state, node_embeddings):
#x: B, num_nodes, input_dim
#state: B, num_nodes, hidden_dim
state = state.to(x.device)
input_and_state = torch.cat((x, state), dim=-1)
z_r = torch.sigmoid(self.gate(input_and_state, node_embeddings))
z, r = torch.split(z_r, self.hidden_dim, dim=-1)
candidate = torch.cat((x, z*state), dim=-1)
hc = torch.tanh(self.update(candidate, node_embeddings))
h = r*state + (1-r)*hc
return h
def init_hidden_state(self, batch_size):
return torch.zeros(batch_size, self.node_num, self.hidden_dim)
class AVWDCRNN(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim, num_layers=1):
super(AVWDCRNN, self).__init__()
assert num_layers >= 1, 'At least one DCRNN layer in the Encoder.'
self.node_num = node_num
self.input_dim = dim_in
self.num_layers = num_layers
self.dcrnn_cells = nn.ModuleList()
self.dcrnn_cells.append(AGCRNCell(node_num, dim_in, dim_out, cheb_k, embed_dim))
for _ in range(1, num_layers):
self.dcrnn_cells.append(AGCRNCell(node_num, dim_out, dim_out, cheb_k, embed_dim))
def forward(self, x, init_state, node_embeddings):
#shape of x: (B, T, N, D)
#shape of init_state: (num_layers, B, N, hidden_dim)
assert x.shape[2] == self.node_num and x.shape[3] == self.input_dim
seq_length = x.shape[1]
current_inputs = x
output_hidden = []
for i in range(self.num_layers):
state = init_state[i]
inner_states = []
for t in range(seq_length):
state = self.dcrnn_cells[i](current_inputs[:, t, :, :], state, node_embeddings)
inner_states.append(state)
output_hidden.append(state)
current_inputs = torch.stack(inner_states, dim=1)
#current_inputs: the outputs of last layer: (B, T, N, hidden_dim)
#output_hidden: the last state for each layer: (num_layers, B, N, hidden_dim)
#last_state: (B, N, hidden_dim)
return current_inputs, output_hidden
def init_hidden(self, batch_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.dcrnn_cells[i].init_hidden_state(batch_size))
return torch.stack(init_states, dim=0) #(num_layers, B, N, hidden_dim)
class AGCRN(nn.Module):
def __init__(self, args):
super(AGCRN, self).__init__()
self.num_node = args.num_nodes
self.input_dim = args.input_dim
self.hidden_dim = args.rnn_units
self.output_dim = args.output_dim
self.horizon = args.horizon
self.num_layers = args.num_layers
# self.default_graph = args.default_graph
self.node_embeddings = nn.Parameter(torch.randn(self.num_node, args.embed_dim), requires_grad=True)
self.encoder = AVWDCRNN(args.num_nodes, args.input_dim, args.rnn_units, args.cheb_k,
args.embed_dim, args.num_layers)
#predictor
self.end_conv = nn.Conv2d(1, args.horizon * self.output_dim, kernel_size=(1, self.hidden_dim), bias=True)
def forward(self, source, targets, teacher_forcing_ratio=0.5):
#source: B, T_1, N, D
#target: B, T_2, N, D
#supports = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec1.transpose(0,1))), dim=1)
init_state = self.encoder.init_hidden(source.shape[0])
output, _ = self.encoder(source, init_state, self.node_embeddings) #B, T, N, hidden
output = output[:, -1:, :, :] #B, 1, N, hidden
#CNN based predictor
output = self.end_conv((output)) #B, T*C, N, 1
output = output.squeeze(-1).reshape(-1, self.horizon, self.output_dim, self.num_node)
output = output.permute(0, 1, 3, 2) #B, T, N, C
return output
if __name__ == '__main__':
class Args:
def __init__(self):
self.num_nodes = 10 # 假设图中有10个节点
self.input_dim = 1 # 每个节点的特征维度
self.rnn_units = 64 # RNN单元的数量
self.output_dim = 1 # 输出维度
self.horizon = 3 # 预测未来3个时间步
self.num_layers = 2 # 使用2层RNN
self.cheb_k = 3 # 切比雪夫多项式的阶数
self.embed_dim = 20 # 节点嵌入的维度
# 实例化参数
args = Args()
# 实例化模型
model = AGCRN(args)
# 创建一个虚拟的输入数据
input_tensor = torch.randn(1, 3, args.num_nodes, args.input_dim)
print("Input tensor size: ", input_tensor.size()) # 打印输入尺寸
# 创建虚拟的目标数据
target_tensor = torch.randn(1, args.horizon, args.num_nodes, args.output_dim)
print("Target tensor size:", target_tensor.size()) # 打印目标尺寸
# 将模型转换为训练模式并进行前向传播
model.train()
output = model(input_tensor, target_tensor)
print("Output size: ", output.size()) # 打印输出尺寸
总结
AGCRN模型通过结合注意力机制和循环神经网络,有效提升了交通流量预测的性能。相比于传统的LSTM或CNN模型,它能够更好地捕捉复杂的城市交通网中的空间依赖关系。
在实际应用中,可以根据具体需求调整节点嵌入维度、RNN隐藏层大小以及关注的时间窗口等超参数。此外,代码还提供了训练过程和损失函数的定义方式,方便读者进行实验和调优。
如果你对时间序列预测或图神经网络感兴趣,AGCRN无疑是一个值得深入研究的方向!