LSTM火灾温度预测(Pytorch版本)
本文为为🔗365天深度学习训练营内部文章
原作者:K同学啊
一 导入数据
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = pd.read_csv('woodpine2.csv')
print(data)
二 可视化
fig,ax = plt.subplots(1,3,constrained_layout=True,figsize=(14,3))
sns.lineplot(data=data['Tem1'],ax=ax[0])
sns.lineplot(data=data['CO 1'],ax=ax[1])
sns.lineplot(data=data['Soot 1'],ax=ax[2])
plt.show()
df = data.iloc[:,1:]
print(df)
三 构建数据集
1.数据集预处理
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
for i in ['Tem1','CO 1','Soot 1']:
df[i] = sc.fit_transform(df[i].values.reshape(-1,1))
print(df.shape)
2.取前八个时间段的Tem、CO 1、Soot 1的数据为X,第九个数据为y
width_x = 8
width_y = 1
x = []
y = []
in_start = 0
for _,_ in df.iterrows():
in_end = in_start + width_x
out_end = in_end + width_y
if out_end < len(df):
X_ = np.array(df.iloc[in_start:in_end,])
# 0代表只提取第一列
y_ = np.array(df.iloc[in_end:out_end,0])
x.append(X_)
y.append(y_)
in_start += 1
x = np.array(x)
y = np.array(y).reshape(-1,1,1)
print(x.shape,y.shape)
print('-----检查数据集中是否有空值-------')
print(np.any(np.isnan(x)))
print(np.any(np.isnan(y)))
3.划分数据集
# 3.划分数据集
X_train = torch.tensor(np.array(x[:5000]).astype('float32'))
y_train = torch.tensor(np.array(y[:5000]).astype('float32'))
X_test = torch.tensor(np.array(x[5000:]).astype('float32'))
y_test = torch.tensor(np.array(y[5000:]).astype('float32'))
print(X_train.shape,X_test.shape)
from torch.utils.data import TensorDataset,DataLoader
train_dl = DataLoader(TensorDataset(X_train,y_train),
batch_size=64,
shuffle=False)
test_dl = DataLoader(TensorDataset(X_test,y_test),
batch_size=64,
shuffle=False)
(5948, 3) (5939, 8, 3) (5939, 1, 1) -----检查数据集中是否有空值------- False False torch.Size([5000, 8, 3]) torch.Size([939, 8, 3])
四 模型训练
1.构建模型
'''
模型训练
'''
# 1.构建模型
class model_lstm(nn.Module):
def __init__(self):
super(model_lstm,self).__init__()
self.lstm0 = nn.LSTM(input_size=3,hidden_size=320,num_layers=1,batch_first=True)
self.lstm1 = nn.LSTM(input_size=320,hidden_size=320,num_layers=1,batch_first=True)
self.fc0 = nn.Linear(320,1)
def forward(self,x):
out,hidden1 = self.lstm0(x)
out,_ = self.lstm1(out,hidden1)
out = self.fc0(out)
return out[:,-1,:] # 取2个预测值,否则经过lstm会得到8*2个预测
model = model_lstm()
print(model)
model_lstm( (lstm0): LSTM(3, 320, batch_first=True) (lstm1): LSTM(320, 320, batch_first=True) (fc0): Linear(in_features=320, out_features=1, bias=True) )
2.编写训练函数
# 2.定义训练函数
# 训练循环
def train(train_dl, model, loss_fn, optimizer,lr_scheduler=None):
size = len(train_dl.dataset) # 训练集的大小,一共60000张图片
num_batches = len(train_dl) # 批次数目,1875(60000/32)
train_loss = 0 # 初始化训练损失
for X, y in train_dl: # 获取图片及其标签
X, y = X.to(device), y.to(device)
# 计算预测误差
pred = model(X) # 网络输出
loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
# 反向传播
optimizer.zero_grad() # grad属性归零
loss.backward() # 反向传播
optimizer.step() # 每一步自动更新
# 记录acc与loss
train_loss += loss.item()
if lr_scheduler is not None:
lr_scheduler.step()
print('learning rate = {:.5f}'.format(optimizer.param_groups[0]['lr']),end=' ')
train_loss /= num_batches
return train_loss
3.编写测试函数
def t(dataloader, model, loss_fn):
size = len(dataloader.dataset) # 测试集的大小,一共10000张图片
num_batches = len(dataloader) # 批次数目,313(10000/32=312.5,向上取整)
test_loss = 0
# 当不进行训练时,停止梯度更新,节省计算内存消耗
with torch.no_grad():
for imgs, target in dataloader:
imgs, target = imgs.to(device), target.to(device)
# 计算loss
target_pred = model(imgs)
loss = loss_fn(target_pred, target)
test_loss += loss.item()
test_loss /= num_batches
return test_loss
4.正式训练
loss_fn = nn.MSELoss() # 创建损失函数
learn_rate = 1e-4 # 学习率
opt = torch.optim.SGD(model.parameters(),lr=learn_rate)
epochs = 50
train_loss = []
test_loss = []
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt,epochs,last_epoch=-1)
for epoch in range(epochs):
model.train()
epoch_train_loss = train(train_dl, model, loss_fn, opt,lr_scheduler)
model.eval()
epoch_test_loss = t(test_dl, model, loss_fn)
train_loss.append(epoch_train_loss)
test_loss.append(epoch_test_loss)
template = ('Epoch:{:2d}, Train_loss:{:.5f}, Test_loss:{:.5f}')
print(template.format(epoch + 1, epoch_train_loss, epoch_test_loss))
print('Done')
learning rate = 0.00010 Epoch: 1, Train_loss:0.21849, Test_loss:0.75929 learning rate = 0.00010 Epoch: 2, Train_loss:0.21115, Test_loss:0.74356 learning rate = 0.00010 Epoch: 3, Train_loss:0.20415, Test_loss:0.72837 learning rate = 0.00010 Epoch: 4, Train_loss:0.19749, Test_loss:0.71375 learning rate = 0.00010 Epoch: 5, Train_loss:0.19115, Test_loss:0.69969 learning rate = 0.00010 Epoch: 6, Train_loss:0.18514, Test_loss:0.68619 learning rate = 0.00010 Epoch: 7, Train_loss:0.17945, Test_loss:0.67325 learning rate = 0.00009 Epoch: 8, Train_loss:0.17406, Test_loss:0.66087 learning rate = 0.00009 Epoch: 9, Train_loss:0.16897, Test_loss:0.64903 learning rate = 0.00009 Epoch:10, Train_loss:0.16417, Test_loss:0.63775 learning rate = 0.00009 Epoch:11, Train_loss:0.15965, Test_loss:0.62699 learning rate = 0.00009 Epoch:12, Train_loss:0.15540, Test_loss:0.61676 learning rate = 0.00008 Epoch:13, Train_loss:0.15141, Test_loss:0.60704 learning rate = 0.00008 Epoch:14, Train_loss:0.14766, Test_loss:0.59782 learning rate = 0.00008 Epoch:15, Train_loss:0.14415, Test_loss:0.58910 learning rate = 0.00008 Epoch:16, Train_loss:0.14087, Test_loss:0.58084 learning rate = 0.00007 Epoch:17, Train_loss:0.13780, Test_loss:0.57305 learning rate = 0.00007 Epoch:18, Train_loss:0.13493, Test_loss:0.56571 learning rate = 0.00007 Epoch:19, Train_loss:0.13227, Test_loss:0.55881 learning rate = 0.00007 Epoch:20, Train_loss:0.12978, Test_loss:0.55233 learning rate = 0.00006 Epoch:21, Train_loss:0.12748, Test_loss:0.54625 learning rate = 0.00006 Epoch:22, Train_loss:0.12534, Test_loss:0.54057 learning rate = 0.00006 Epoch:23, Train_loss:0.12336, Test_loss:0.53526 learning rate = 0.00005 Epoch:24, Train_loss:0.12153, Test_loss:0.53033 learning rate = 0.00005 Epoch:25, Train_loss:0.11984, Test_loss:0.52574 learning rate = 0.00005 Epoch:26, Train_loss:0.11829, Test_loss:0.52150 learning rate = 0.00004 Epoch:27, Train_loss:0.11686, Test_loss:0.51757 learning rate = 0.00004 Epoch:28, Train_loss:0.11556, Test_loss:0.51397 learning rate = 0.00004 Epoch:29, Train_loss:0.11436, Test_loss:0.51066 learning rate = 0.00003 Epoch:30, Train_loss:0.11328, Test_loss:0.50763 learning rate = 0.00003 Epoch:31, Train_loss:0.11230, Test_loss:0.50488 learning rate = 0.00003 Epoch:32, Train_loss:0.11141, Test_loss:0.50239 learning rate = 0.00003 Epoch:33, Train_loss:0.11061, Test_loss:0.50015 learning rate = 0.00002 Epoch:34, Train_loss:0.10990, Test_loss:0.49815 learning rate = 0.00002 Epoch:35, Train_loss:0.10926, Test_loss:0.49636 learning rate = 0.00002 Epoch:36, Train_loss:0.10870, Test_loss:0.49479 learning rate = 0.00002 Epoch:37, Train_loss:0.10821, Test_loss:0.49342 learning rate = 0.00001 Epoch:38, Train_loss:0.10779, Test_loss:0.49223 learning rate = 0.00001 Epoch:39, Train_loss:0.10742, Test_loss:0.49121 learning rate = 0.00001 Epoch:40, Train_loss:0.10711, Test_loss:0.49035 learning rate = 0.00001 Epoch:41, Train_loss:0.10685, Test_loss:0.48964 learning rate = 0.00001 Epoch:42, Train_loss:0.10664, Test_loss:0.48906 learning rate = 0.00000 Epoch:43, Train_loss:0.10647, Test_loss:0.48860 learning rate = 0.00000 Epoch:44, Train_loss:0.10634, Test_loss:0.48825 learning rate = 0.00000 Epoch:45, Train_loss:0.10624, Test_loss:0.48799 learning rate = 0.00000 Epoch:46, Train_loss:0.10616, Test_loss:0.48781 learning rate = 0.00000 Epoch:47, Train_loss:0.10612, Test_loss:0.48769 learning rate = 0.00000 Epoch:48, Train_loss:0.10609, Test_loss:0.48763 learning rate = 0.00000 Epoch:49, Train_loss:0.10607, Test_loss:0.48760 learning rate = 0.00000 Epoch:50, Train_loss:0.10606, Test_loss:0.48760 Done
五 Loss损失评估和模型预测评估
from datetime import datetime
current_time = datetime.now()
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.rcParams['figure.dpi'] = 100 #分辨率
epochs_range = range(epochs)
plt.figure(figsize=(12, 3))
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.xlabel(current_time)
plt.title('Training and Validation Loss')
plt.show()
调用模型进行预测
'''
调用模型进行预测
'''
pred_y_lstm = sc.inverse_transform(model(X_test).detach().numpy().reshape(-1,1))
y_test_1 = sc.inverse_transform(y_test.reshape(-1,1))
y_test_one = [o[0] for o in y_test_1]
pred_y_lstm_one = [i[0] for i in pred_y_lstm]
plt.figure(figsize=(8,6))
plt.plot(y_test_one[:2000],color='red',label='real_temp')
plt.plot(pred_y_lstm_one[:2000],color='blue',label='prediction')
plt.title('Title')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
预测评估
'''
R2评估
'''
from sklearn import metrics
RMSE_lstm = metrics.mean_squared_error(pred_y_lstm_one,y_test_one)**0.5
R2_lstm = metrics.r2_score(pred_y_lstm_one,y_test_one)
print('均方差误差:%.5f'%RMSE_lstm)
print('R2:%.5f'%R2_lstm)
均方差误差:0.00001 R2:0.83518