当前位置: 首页 > article >正文

深度学习-卷积神经网络CNN

案例-图像分类

网络结构: 卷积+BN+激活+池化

数据集介绍

CIFAR-10数据集5万张训练图像、1万张测试图像、10个类别、每个类别有6k个图像,图像大小32×32×3。下图列举了10个类,每一类随机展示了10张图片:

特征图计算

在卷积层和池化层结束后, 将特征图变形成一行n列数据, 计算特征图进行变化, 映射到全连接层时输入层特征为最后一层卷积层经池化后的特征图各维度相乘

具体流程-# Acc: 0.728

# 导包
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Compose  # Compose: 数据增强(扩充数据集)
import time
import matplotlib.pyplot as plt
​
batch_size = 16
​
​
# 创建数据集
def create_dataset():
    torch.manual_seed(21)
    train = CIFAR10(
        root='data',
        train=True,
        transform=Compose([ToTensor()])
    )
    test = CIFAR10(
        root='data',
        train=False,
        transform=Compose([ToTensor()])
    )
    return train, test
​
​
# 创建模型
class ImgCls(nn.Module):
    # 定义网络结构
    def __init__(self):
        super(ImgCls, self).__init__()
        # 定义网络层:卷积层+池化层
        self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3)
        self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3)
        self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3)
        self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=2)
        self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=2)
        self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        # 全连接层
        self.linear1 = nn.Linear(1024, 2048)
        self.linear2 = nn.Linear(2048, 1024)
        self.linear3 = nn.Linear(1024, 512)
        self.linear4 = nn.Linear(512, 256)
        self.linear5 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 10)
​
    # 定义前向传播
    def forward(self, x):
        # 第1层: 卷积+BN+激活+池化
        x = self.conv1(x)
        x = self.batch_norm_layer1(x)
        x = torch.rrelu(x)
        x = self.pool1(x)
​
        # 第2层: 卷积+BN+激活+池化
        x = self.conv2(x)
        x = self.batch_norm_layer2(x)
        x = torch.rrelu(x)
        x = self.pool2(x)
​
        # 第3层: 卷积+BN+激活+池化
        x = self.conv3(x)
        x = self.batch_norm_layer3(x)
        x = torch.rrelu(x)
        x = self.pool3(x)
​
        # 第4层: 卷积+BN+激活+池化
        x = self.conv4(x)
        x = self.batch_norm_layer4(x)
        x = torch.rrelu(x)
        x = self.pool4(x)
​
        # 第5层: 卷积+BN+激活+池化
        x = self.conv5(x)
        x = self.batch_norm_layer5(x)
        x = torch.rrelu(x)
        x = self.pool5(x)
​
        # 将特征图做成以为向量的形式:相当于特征向量
        x = x.reshape(x.size(0), -1)  # 将3维特征图转化为1维向量(1, n)
​
        # 全连接层
        x = torch.rrelu(self.linear1(x))
        x = torch.rrelu(self.linear2(x))
        x = torch.rrelu(self.linear3(x))
        x = torch.rrelu(self.linear4(x))
        x = torch.rrelu(self.linear5(x))
        # 返回输出结果
        return self.out(x)
​
​
# 训练
def train(model, train_dataset, epochs):
    torch.manual_seed(21)
    loss = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=1e-4)
    for epoch in range(epochs):
        dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
        loss_total = 0
        iter = 0
        stat_time = time.time()
        for x, y in dataloader:
            output = model(x.to(device))
            loss_value = loss(output, y.to(device))
            opt.zero_grad()
            loss_value.backward()
            opt.step()
            loss_total += loss_value.item()
            iter += 1
        print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s')
    torch.save(model.state_dict(), 'model/img_cls_model.pth')
​
​
# 测试
def test(valid_dataset, model, batch_size):
    # 构建数据加载器
    dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
​
    # 计算精度
    total_correct = 0
    # 遍历每个batch的数据,获取预测结果,计算精度
    for x, y in dataloader:
        output = model(x.to(device))
        y_pred = torch.argmax(output, dim=-1)
        total_correct += (y_pred == y.to(device)).sum()
    # 打印精度
    print(f'Acc: {(total_correct.item() / len(valid_dataset))}')
​
​
if __name__ == '__main__':
    batch_size = 16
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # 获取数据集
    train_data, test_data = create_dataset()
​
    # # 查看数据集
    # print(f'数据集类别: {train_data.class_to_idx}')
    # print(f'训练集: {train_data.data.shape}')
    # print(f'验证集: {test_data.data.shape}')
    # print(f'类别数量: {len(np.unique(train_data.targets))}')
    # # 展示图像
    # plt.figure(figsize=(8, 8))
    # plt.imshow(train_data.data[0])
    # plt.title(train_data.classes[train_data.targets[0]])
    # plt.show()
​
    # 实例化模型
    model = ImgCls().to(device)
​
    # 查看网络结构
    summary(model, (3, 32, 32), device='cuda', batch_size=batch_size)
​
    # 模型训练
    train(model, train_data, epochs=60)
    # 加载训练好的模型参数
    model.load_state_dict(torch.load('model/img_cls_model.pth'))
    model.eval()
    # 模型评估
    test(test_data, model, batch_size=16)   # Acc: 0.728
​

调整网络结构

第一次调整: 训练50轮, Acc: 0.71

第二次调整: 训练30轮, Acc:0.7351

第三次调整: batch_size=8, epoch=50 => Acc: 0.7644

# 导包
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Compose  # Compose: 数据增强(扩充数据集)
import time
import matplotlib.pyplot as plt
​
batch_size = 16
​
​
# 创建数据集
def create_dataset():
    torch.manual_seed(21)
    train = CIFAR10(
        root='data',
        train=True,
        transform=Compose([ToTensor()])
    )
    test = CIFAR10(
        root='data',
        train=False,
        transform=Compose([ToTensor()])
    )
    return train, test
​
​
# 创建模型
class ImgCls(nn.Module):
    # 定义网络结构
    def __init__(self):
        super(ImgCls, self).__init__()
        # 定义网络层:卷积层+池化层
        self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=3)
        self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        # 全连接层
        self.linear1 = nn.Linear(1024, 2048)
        self.linear2 = nn.Linear(2048, 1024)
        self.linear3 = nn.Linear(1024, 512)
        self.linear4 = nn.Linear(512, 256)
        self.linear5 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 10)
​
    # 定义前向传播
    def forward(self, x):
        # 第1层: 卷积+BN+激活+池化
        x = self.conv1(x)
        x = self.batch_norm_layer1(x)
        x = torch.relu(x)
        x = self.pool1(x)
​
        # 第2层: 卷积+BN+激活+池化
        x = self.conv2(x)
        x = self.batch_norm_layer2(x)
        x = torch.relu(x)
        x = self.pool2(x)
​
        # 第3层: 卷积+BN+激活+池化
        x = self.conv3(x)
        x = self.batch_norm_layer3(x)
        x = torch.relu(x)
        x = self.pool3(x)
​
        # 第4层: 卷积+BN+激活+池化
        x = self.conv4(x)
        x = self.batch_norm_layer4(x)
        x = torch.relu(x)
        x = self.pool4(x)
​
        # 第5层: 卷积+BN+激活+池化
        x = self.conv5(x)
        x = self.batch_norm_layer5(x)
        x = torch.rrelu(x)
        x = self.pool5(x)
​
        # 将特征图做成以为向量的形式:相当于特征向量
        x = x.reshape(x.size(0), -1)  # 将3维特征图转化为1维向量(1, n)
​
        # 全连接层
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = torch.relu(self.linear3(x))
        x = torch.relu(self.linear4(x))
        x = torch.rrelu(self.linear5(x))
        # 返回输出结果
        return self.out(x)
​
​
# 训练
def train(model, train_dataset, epochs):
    torch.manual_seed(21)
    loss = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=1e-4)
    for epoch in range(epochs):
        dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
        loss_total = 0
        iter = 0
        stat_time = time.time()
        for x, y in dataloader:
            output = model(x.to(device))
            loss_value = loss(output, y.to(device))
            opt.zero_grad()
            loss_value.backward()
            opt.step()
            loss_total += loss_value.item()
            iter += 1
        print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s')
    torch.save(model.state_dict(), 'model/img_cls_model1.pth')
​
​
# 测试
def test(valid_dataset, model, batch_size):
    # 构建数据加载器
    dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
​
    # 计算精度
    total_correct = 0
    # 遍历每个batch的数据,获取预测结果,计算精度
    for x, y in dataloader:
        output = model(x.to(device))
        y_pred = torch.argmax(output, dim=-1)
        total_correct += (y_pred == y.to(device)).sum()
    # 打印精度
    print(f'Acc: {(total_correct.item() / len(valid_dataset))}')
​
​
if __name__ == '__main__':
    batch_size = 8
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # 获取数据集
    train_data, test_data = create_dataset()
​
    # # 查看数据集
    # print(f'数据集类别: {train_data.class_to_idx}')
    # print(f'训练集: {train_data.data.shape}')
    # print(f'验证集: {test_data.data.shape}')
    # print(f'类别数量: {len(np.unique(train_data.targets))}')
    # # 展示图像
    # plt.figure(figsize=(8, 8))
    # plt.imshow(train_data.data[0])
    # plt.title(train_data.classes[train_data.targets[0]])
    # plt.show()
​
    # 实例化模型
    model = ImgCls().to(device)
​
    # 查看网络结构
    summary(model, (3, 32, 32), device='cuda', batch_size=batch_size)
​
    # 模型训练
    train(model, train_data, epochs=50)
    # 加载训练好的模型参数
    model.load_state_dict(torch.load('model/img_cls_model1.pth', weights_only=True))
    model.eval()
    # 模型评估
    test(test_data, model, batch_size=16)   # Acc: 0.7644
​


http://www.kler.cn/a/394779.html

相关文章:

  • 「人眼视觉不再是视频消费的唯一形式」丨智能编解码和 AI 视频生成专场回顾@RTE2024
  • Flink中自定义Source和Sink的使用
  • 【MYSQL】数据库日志 (了解即可)
  • sql专题 之 where和join on
  • 使用CNN进行验证码识别:深度学习与图像预处理教程
  • 算法——长度最小的子数组(leetcode209)
  • 【赵渝强老师】MySQL InnoDB的数据文件与重做日志文件
  • stream学习
  • 决策树基本 CART Python手写实现
  • Elasticsearch(ES)简介
  • 零钱兑换(DP)
  • 基于SSM框架(Spring, Spring MVC, MyBatis)的矿场仓储管理系统的基础示例
  • 【GPTs】Gif-PT:DALL·E制作创意动图与精灵动画
  • 单片机 定时器实验 实验四
  • git配置远程仓库的认证信息
  • 吴恩达Prompt Engineering(2/9): Guidelines for Prompting
  • Perfetto中如何使用SQL语句
  • Excel根据条件动态索引单元格范围
  • PVE纵览-选择适合你的Proxmox VE存储方案:LVM、LVM-Thin、目录与ZFS对
  • docker镜像安装oracle11g
  • 互联网行业面对大数据时代新挑战如何实现数据高速传输
  • 解决 VSCode 中 C/C++ 编码乱码问题的两种方法
  • 【机器学习】K近邻算法
  • C++——视频问题总结
  • 猎板PCB罗杰斯板材的应用案例
  • 【填鸭表单】TDuckX-v2.0发布!