【实验记录】动手实现一个简单的神经网络实验(一)
最近上了“神经网络与深度学习”这门课,有一个自己动手实现调整神经网络模型的实验感觉还挺有记录意义,可以帮我巩固之前学习到的理论知识,所以就打算记录一下。
实验大概是使用LeNet(卷积神经网络)对MINIST数据集做图像分类任务,然后自己调整模型和参数感受一下各方面给模型带来的影响。
本来老师是给了代码让我们只要调整模型就好,但我还是想自己动手写一下。
老师给的完整代码:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# 加载包含多个张量的字典的pt文件
loaded_dict = torch.load(r'data\MNIST\processed\training.pt')
loaded_dict_test = torch.load(r'data\MNIST\processed\test.pt')
images, label = loaded_dict # 将图像和标签分开
images_test, label_test = loaded_dict_test
images_test = images_test.unsqueeze(1)
X_test = images_test.to(torch.float32)
y_test = label_test
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2),
nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6,16, kernel_size=5),
nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120),
nn.Sigmoid(),
nn.Linear(120, 84),
nn.Sigmoid(),
nn.Linear(84, 10)
)
images = images.unsqueeze(1)
X_train = images.to(torch.float32)
y_train = label
""""""
# 取X的前100个数据
X_train = X_train[:10000]
y_train = y_train[:10000]
X_test = X_test[:1000]
y_test = y_test[:1000]
class LeNet(nn.Module):
def __init__(self, net):
super().__init__()
self.net = net
def forward(self,X):
out = F.log_softmax(net(X),dim=1)
return out
def train(model, device, train_ , optimizer, epoch):
model.train()
for i, (X, y) in enumerate(train_):
X.to(device)
y.to(device)
optimizer.zero_grad()
predict_y = model(X)
loss = F.nll_loss(predict_y, y)
loss.backward()
optimizer.step()
if(i+1)%100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, i * len(X), len(train_.dataset),
100. * i / len(train_), loss.item()))
def test(model, device, test_):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for i, (X, y) in enumerate(test_):
X.to(device)
y.to(device)
predict_y = model(X)
test_loss += F.nll_loss(predict_y, y, reduction='sum').item()
pred = predict_y.max(1, keepdim = True)[1]
correct += y.eq(pred.view_as(y)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_.dataset),
100. * correct / len(test_.dataset)))
batch_size = 10
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
Net = LeNet(net)
optimizer = optim.Adam(Net.parameters())
for epoch in range(5):
train(Net, 'cpu', train_loader, optimizer = optimizer, epoch = epoch)
test(Net, 'cpu', test_loader)
我自己也写了一个,还是参考了很多老师写的qwq
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# 加载包含多个张量的字典的pt文件
loaded_dict = torch.load(r'data\MNIST\processed\training.pt')
loaded_dict_test = torch.load(r'data\MNIST\processed\test.pt')
images, label = loaded_dict # 将图像和标签分开
images_test, label_test = loaded_dict_test
images_test = images_test.unsqueeze(1)
X_test = images_test.to(torch.float32)
y_test = label_test
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2),
nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6,16, kernel_size=5),
nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120),
nn.Sigmoid(),
nn.Linear(120, 84),
nn.Sigmoid(),
nn.Linear(84, 10)
)
images = images.unsqueeze(1)
X_train = images.to(torch.float32)
y_train = label
""""""
# 取X的前100个数据
X_train = X_train[:10000]
y_train = y_train[:10000]
X_test = X_test[:1000]
y_test = y_test[:1000]
class LeNet(nn.Module):
def __init__(self, net):
super().__init__()
self.net = net
def forward(self,X):
out = F.log_softmax(net(X),dim=1)
return out
def train(model, device, train_ , optimizer, epoch):
model.train()
for i, (X, y) in enumerate(train_):
X.to(device)
y.to(device)
optimizer.zero_grad()
predict_y = model(X)
loss = F.nll_loss(predict_y, y)
loss.backward()
optimizer.step()
if(i+1)%100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, i * len(X), len(train_.dataset),
100. * i / len(train_), loss.item()))
def test(model, device, test_):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for i, (X, y) in enumerate(test_):
X.to(device)
y.to(device)
predict_y = model(X)
test_loss += F.nll_loss(predict_y, y, reduction='sum').item()
pred = predict_y.max(1, keepdim = True)[1]
correct += y.eq(pred.view_as(y)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_.dataset),
100. * correct / len(test_.dataset)))
batch_size = 10
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
Net = LeNet(net)
optimizer = optim.Adam(Net.parameters())
for epoch in range(5):
train(Net, 'cpu', train_loader, optimizer = optimizer, epoch = epoch)
test(Net, 'cpu', test_loader)
ps:我写的这个运行前要下载数据集在指定位置,老师的可以自动下载。
总结了一下写train、test函数的步骤:
train函数:
输入:模型model, 设备device, 训练集迭代器train_loader, 优化器optimizer, 轮次epoch(仅打印结果时用)
流程:
- 将模型改为训练模式
- 循环遍历迭代器
- 将迭代器转移在设备device上
- 清空优化器梯度
- 代入模型计算模型预测结果
- 计算损失函数
- 进行梯度反向传播
- 使用优化器更新模型参数
- 打印此轮结果
test函数:
输入:
- 将模型改为评估模式
- 初始化总损失值、总正确个数
- 循环遍历迭代器
- 将迭代器转移在设备device上
- 计算损失函数
- 将损失值加在总损失中
- 将正确个数加在总计算个数中
- 总损失除以总个数计算平均损失
- 打印此轮结果
加载数据:
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
DataLoader是划分迭代器的函数,传入dataset(所有数据),就可以划分出大小为batch_size的样本批量。shuffle是选择是否随机打乱的参数。