使用 AlexNet 实现图片分类 | PyTorch 深度学习实战
前一篇文章,CNN 卷积神经网络处理图片任务 | PyTorch 深度学习实战
本系列文章 GitHub Repo: https://github.com/hailiang-wang/pytorch-get-started
本篇文章内容来自于 强化学习必修课:引领人工智能新时代【梗直哥瞿炜】
使用 AlexNet 实现图片分类
- 经典卷积网络
- AlexNet 特点
- 实验代码
- 实验结果
- Links
经典卷积网络
以下是卷积神经网络发展的里程碑:
- AlexNet 在各项比赛中,比其它算法好,证明了深度神经网络算法的优越性和前景
- VGGNet 则使用比 AlexNet 更深更宽的网络,取得了比 AlexNet 还好的成绩
- GoogLeNet 效果则比 VGGNet 更好
- ResNet 引入残差模块,解决了深度网络训练中的退化问题,超越之前的模型
- DenseNet 模型采用密集连接的结构,使模型具有更好的鲁棒性
AlexNet 特点
AlexNet 结构
更多详细介绍,阅读作者 Paper 论文, ImageNet Classification with Deep Convolutional Neural Networks
视频资源:9年后重读深度学习奠基作之一:AlexNet【上】【论文精读】
实验代码
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms
from torchvision import datasets
import torch.nn as nn
import torch
import numpy as np
# configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_rootdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, "data")
################################
# 定义 dataset loader
################################
def get_train_valid_loader(data_dir,
batch_size,
augment,
random_seed,
valid_size=0.1,
shuffle=True):
# 正则化图片的参数,mean 和 std 的值来自于 imagenet 的数据统计
normalize = transforms.Normalize(
mean=[0.4914, 0.4822, 0.4465],
std=[0.2023, 0.1994, 0.2010],
)
# define transforms
valid_transform = transforms.Compose([
transforms.Resize((227, 227)),
transforms.ToTensor(),
normalize,
])
if augment:
# 数据增强
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
else:
train_transform = transforms.Compose([
transforms.Resize((227, 227)),
transforms.ToTensor(),
normalize,
])
# load the dataset
train_dataset = datasets.CIFAR10(
root=data_dir, train=True,
download=True, transform=train_transform,
)
valid_dataset = datasets.CIFAR10(
root=data_dir, train=True,
download=True, transform=valid_transform,
)
num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))
if shuffle:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=batch_size, sampler=valid_sampler)
return (train_loader, valid_loader)
def get_test_loader(data_dir,
batch_size,
shuffle=True):
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
)
# define transform
transform = transforms.Compose([
transforms.Resize((227, 227)),
transforms.ToTensor(),
normalize,
])
dataset = datasets.CIFAR10(
root=data_dir, train=False,
download=True, transform=transform,
)
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=batch_size, shuffle=shuffle
)
return data_loader
# CIFAR10 dataset
CIFAR10_data = os.path.join(data_rootdir, "CIFAR10")
train_loader, valid_loader = get_train_valid_loader(data_dir= CIFAR10_data, batch_size = 64,
augment = False, random_seed = 1)
test_loader = get_test_loader(data_dir= CIFAR10_data,
batch_size = 64)
################################
# 定义 Model
################################
class AlexNet(nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
nn.BatchNorm2d(96),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 3, stride = 2))
self.layer2 = nn.Sequential(
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 3, stride = 2))
self.layer3 = nn.Sequential(
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(384),
nn.ReLU())
self.layer4 = nn.Sequential(
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(384),
nn.ReLU())
self.layer5 = nn.Sequential(
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 3, stride = 2))
self.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(9216, 4096),
nn.ReLU())
self.fc1 = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU())
self.fc2= nn.Sequential(
nn.Linear(4096, num_classes))
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
out = self.fc1(out)
out = self.fc2(out)
return out
######################################
# Setting Hyperparameters
######################################
num_classes = 10
num_epochs = 20
batch_size = 64
learning_rate = 0.005
model = AlexNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)
# Train the model
total_step = len(train_loader)
######################################
# Training
######################################
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Validation
with torch.no_grad():
correct = 0
total = 0
for images, labels in valid_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
del images, labels, outputs
print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))
# Now, we see how our model performs on unseen data
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
del images, labels, outputs
print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))
实验结果
以上代码在 NVIDIA GeForce RTX 2050 WDDM 显存上训练和测试,大约花了半个小时时间。
最终,测试集上的准确率达到了 82.24% 。
Links
- Writing AlexNet from Scratch in PyTorch
- ImageNet Classification with Deep Convolutional
Neural Networks - Conv2d API in PyTorch
- 9年后重读深度学习奠基作之一:AlexNet【上】【论文精读】