12.12深度学习_CNN_项目实战
基于CNN的AnimalTriClassifier
关于项目实现的文档说明书,三个要素:数据、模型、训练
1、项目简介
关于项目的基本介绍。
本项目实现的是对猫科动物的划分,划分的物种有猫、狗、野生三种分类,属于小颗粒度分类
- 大颗粒度分类:以物种作为分类,比如飞机、青蛙、狗、猫、马、鹿等。
- 实体颗粒度分类:具体到具体的人,比如指纹识别、人脸识别等具体的个体,具体的实体
1.1 项目名称
基于CNN的AnimalTriClassifie
1.2 项目简介
本项目旨在使用卷积神经网络(CNN)进行图像分类任务。我们将使用 LeNet5(衍生) 模型来训练一个可以区分猫、狗和野生动物的分类器。项目中包括了数据预处理、模型训练、测试、验证以及单张图片推理等功能。
2、数据
公开的数据集
2.1 公开数据集
Animal Faces
2.3 数据增强
提升模型的泛化能力和鲁棒性。
# 数据预处理和加载
transform = transforms.Compose([
# transforms.RandomVerticalFlip(),
# transforms.RandomRotation(degrees=(0, 180)),
# transforms.RandomHorizontalFlip(), # 随机水平翻转
# transforms.RandomRotation(10),
# transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Resize((64, 64)),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
transforms.RandomRotation(degrees=(0, 180)),
transforms.RandomInvert(), # 随机反转变换,
transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
])
3. 神经网络
手写LeNets5
import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet5(nn.Module):
def __init__(self, num_classes=3):
super(LeNet5, self).__init__()
# 第一层卷积层,输入通道为3,输出通道为16,卷积核大小为5x5,步幅为1,填充为2
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2), # 输出大小: (64 + 2*2 - 5)/1 + 1 = 64
nn.ReLU(), # 使用 ReLU 激活函数
nn.AvgPool2d(kernel_size=2, stride=2) # 输出大小: 64 / 2 = 32
)
# 第二层卷积层,输入通道为16,输出通道为32,卷积核大小为5x5,步幅为1,填充为2
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2), # 输出大小: (32 + 2*2 - 5)/1 + 1 = 32
nn.ReLU(), # 使用 ReLU 激活函数
nn.AvgPool2d(kernel_size=2, stride=2) # 输出大小: 32 / 2 = 16
)
# 第三层卷积层,输入通道为32,输出通道为64,卷积核大小为5x5,步幅为1,填充为2
self.layer3 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2), # 输出大小: (16 + 2*2 - 5)/1 + 1 = 16
nn.ReLU(), # 使用 ReLU 激活函数
nn.AvgPool2d(kernel_size=2, stride=2) # 输出大小: 16 / 2 = 8
)
# 全连接层
self.fc1 = nn.Linear(64 * 8 * 8, 120) # 输入大小: 64 * 8 * 8
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.reshape(out.size(0), -1) # 展平
out = F.relu(self.fc1(out)) # 使用 ReLU 激活函数
out = F.relu(self.fc2(out)) # 使用 ReLU 激活函数
out = self.fc3(out)
return out
# 创建模型实例
model = LeNet5(num_classes=3)
4. 模型训练
def train():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据预处理和加载
transform = transforms.Compose([
transforms.RandomVerticalFlip(),
# transforms.RandomRotation(degrees=(0, 180)),
# transforms.RandomHorizontalFlip(), # 随机水平翻转
# transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Resize((64, 64)),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
transforms.RandomRotation(degrees=(0, 180)),
transforms.RandomInvert(), # 随机反转变换,
transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
])
# 检查数据集路径是否存在
train_path = os.path.join(data_path, 'train')
if not os.path.exists(train_path):
raise FileNotFoundError(f"数据集路径不存在: {train_path}")
# 加载整个数据集
full_dataset = ImageFolder(root=train_path, transform=transform)
print("分类列表:", full_dataset.classes)
print("分类和索引的对应关系:", full_dataset.class_to_idx)
# 分割数据集为训练集和测试集
train_ratio = 0.7
train_size = int(train_ratio * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
# 模型准备
net = LeNet5(num_classes=len(full_dataset.classes)).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path)
net.load_state_dict(state_dict)
net.train()
# 保存网络结构到tensorboard
writer.add_graph(net, torch.randn(1, 3, 64, 64).to(device)) # 添加模型的计算图
# 训练设置
epochs = 10
batch_size = 64
criterion = nn.CrossEntropyLoss(reduction="sum")
optimizer = optim.Adam(net.parameters(), lr=0.0001)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
for epoch in range(epochs):
start_time = time.time()
accuracy = 0
total_loss = 0
# 使用 tqdm 显示进度条
for i, (x, y) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")):
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
yhat = net(x)
loss = criterion(yhat, y)
loss.backward()
optimizer.step()
accuracy += torch.sum(torch.argmax(yhat, dim=1) == y).item()
total_loss += loss.item()
# 每 1 个批次保存一次图像
if i % 1 == 0:
img_grid = vutils.make_grid(x, normalize=True, nrow=8) # 生成图像网格
writer.add_image(f"r_m_{epoch}_{i * 1}", img_grid, epoch * len(train_dataset) + i)
print(
f"Epoch {epoch+1}/{epochs} - Time: {time.time() - start_time:.2f}s, Accuracy: {accuracy / len(train_dataset):.4f}, Loss: {total_loss / len(train_dataset):.4f}")
writer.add_scalar("Loss/train", total_loss / len(train_dataset), epoch)
writer.add_scalar("Accuracy/train", accuracy / len(train_dataset), epoch)
# 测试模型
test_accuracy = test(net, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.4f}")
# 生成当前时间戳作为文件名
timestamp = time.strftime("%Y%m%d-%H%M%S") # 格式化时间戳为 "年月日-时分秒"
pth_filename = f"model_{timestamp}.pth" # 生成文件名
pth_filepath = os.path.join(prepare_path, pth_filename) # 拼接完整路径
# 保存模型权重到 prepare 文件夹
torch.save(net.state_dict(), pth_filepath)
print(f"Model saved as: {pth_filename} in prepare folder")
# 更新 last_model.pth 文件
last_model_path = os.path.join(weight_path, "last_model.pth")
torch.save(net.state_dict(), last_model_path)
print(f"Updated last_model.pth")
4.1 训练参数
轮次:ecpochs = 10
批次:batch_size=64
4.2 损失函数
交叉熵损失
4.3 优化器
optim.Adam()
4.4 训练过程可视化
使用tensorBoard
5. 模型验证
验证我们的模型的鲁棒性和泛化能力
def inference():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 读取验证集 val
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64)),
])
val_path = os.path.join(vali_path)
full_dataset = ImageFolder(root=val_path, transform=transform)
print("分类列表:", full_dataset.classes)
print("分类和索引的对应关系:", full_dataset.class_to_idx)
# 网络准备
net = LeNet5(num_classes=3).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
# 验证准确率
acc = 0
total = 0
with torch.no_grad():
for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
x, y = x.to(device), y.to(device)
yhat = net(x)
acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
total += y.size(0)
val_accuracy = acc / total
print(f"Validation Accuracy: {val_accuracy:.4f}")
5.1 验证过程数据化
生成csv:
5.2 指标报表
外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传
5.3 混淆矩阵
可视化
6. 模型优化
6.1 增加网络深度
让网络变得更好
# 通道注意力模块
class ChannelAttentionModule(nn.Module):
def __init__(self, c, r=16):
super(ChannelAttentionModule, self).__init__()
self.maxpool = nn.AdaptiveMaxPool2d(1)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.sharedMLP = nn.Sequential(
nn.Linear(c, c // r),
nn.ReLU(),
nn.Linear(c // r, c)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
maxpool_out = self.maxpool(x).view(x.size(0), -1)
avgpool_out = self.avgpool(x).view(x.size(0), -1)
max_out = self.sharedMLP(maxpool_out)
avg_out = self.sharedMLP(avgpool_out)
out = self.sigmoid((max_out + avg_out).unsqueeze(2).unsqueeze(3))
return x * out
# 空间注意力模块
class SpatialAttentionModule(nn.Module):
def __init__(self):
super(SpatialAttentionModule, self).__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=7, stride=1, padding=3)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
maxpool_out, _ = torch.max(x, dim=1, keepdim=True)
avgpool_out = torch.mean(x, dim=1, keepdim=True)
pool_out = torch.cat([maxpool_out, avgpool_out], dim=1)
out = self.conv(pool_out)
return x * self.sigmoid(out)
# 混合注意力模块
class CBAM(nn.Module):
def __init__(self, c, r=16):
super(CBAM, self).__init__()
self.cam = ChannelAttentionModule(c, r)
self.sam = SpatialAttentionModule()
def forward(self, x):
x = self.cam(x)
x = self.sam(x)
return x
6.2 继续训练
备份和保存last
# 保存模型权重到 prepare 文件夹
torch.save(net.state_dict(), pth_filepath)
print(f"Model saved as: {pth_filename} in prepare folder")
# 更新 last_model.pth 文件
last_model_path = os.path.join(weight_path, "last_model.pth")
torch.save(net.state_dict(), last_model_path)
print(f"Updated last_model.pth")
6.3 预训练和迁移学习
让网络变得更好
# 模型准备
net = LeNet5(num_classes=len(full_dataset.classes)).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path)
net.load_state_dict(state_dict)
net.train()
7. 模型应用
推理工作
7.1 图片处理
opencv的操作
def imgread(img_path):
imgdata = cv2.imread(img_path)
if imgdata is None:
raise ValueError(f"Failed to load image at path: {img_path}")
imgdata = cv2.cvtColor(imgdata, cv2.COLOR_BGR2RGB)
transformdata = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
])
imgdata = transformdata(imgdata)
imgdata = imgdata.unsqueeze(0)
return imgdata
7.2 模型推理
def inference_one():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = LeNet5(num_classes=3).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
# 获取要推理的图片
img_path = os.path.join(r"D:\Desktop\cat1.png")
imgdata = imgread(img_path).to(device)
# 使用模型进行推理
with torch.no_grad():
out = net(imgdata)
probabilities = nn.Softmax(dim=1)(out)
predicted_class_idx = torch.argmax(probabilities, dim=1).item()
classlabels = ['猫', '狗', '野生']
print(f"Predicted class: {classlabels[predicted_class_idx]}")
print(f"Probabilities: {probabilities[0].tolist()}")
7.3 类别显示
Predicted class: 猫
Probabilities: [0.9819951057434082, 0.01752881519496441, 0.0004761434975080192]
8. 模型移植
使用ONNX
8.1 导出ONNX
8.2 使用ONNX推理
def inference_one():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = LeNet5(num_classes=3).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
# 获取要推理的图片
img_path = os.path.join(r"D:\Desktop\cat1.png")
imgdata = imgread(img_path).to(device)
# 使用模型进行推理
with torch.no_grad():
out = net(imgdata)
probabilities = nn.Softmax(dim=1)(out)
predicted_class_idx = torch.argmax(probabilities, dim=1).item()
classlabels = ['猫', '狗', '野生']
print(f"Predicted class: {classlabels[predicted_class_idx]}")
print(f"Probabilities: {probabilities[0].tolist()}")
# 导出 ONNX 模型
onnx_path = os.path.join(current_path, "LeNet5.onnx")
torch.onnx.export(
net, # 模型
imgdata, # 输入张量
onnx_path, # 导出路径
export_params=True, # 导出模型参数
opset_version=11, # ONNX 操作集版本
do_constant_folding=True, # 是否执行常量折叠优化
input_names=["input"], # 输入节点名称
output_names=["output"], # 输出节点名称
)
print(f"ONNX 模型已导出到: {onnx_path}")
if __name__ =="__main__":
inference_one()
# print(onnx.__version__)
推测:
# 加载 ONNX 模型
onnx_path = os.path.join(current_path, "LeNet5.onnx")
session = ort.InferenceSession(onnx_path)
# 获取输入和输出名称
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
# 读取图像并进行推理
img_path = os.path.join(r"D:\Desktop\cat1.png")
imgdata = imgread(img_path).numpy() # 转换为 NumPy 数组
# 推理
result = session.run([output_name], {input_name: imgdata})
probabilities = result[0]
predicted_class_idx = np.argmax(probabilities, axis=1).item()
classlabels = ['猫', '狗', '野生']
print(f"Predicted class: {classlabels[predicted_class_idx]}")
print(f"Probabilities: {probabilities[0].tolist()}")
9. 项目总结
9.1 遇到的问题及解决办法
问题
-
导出onnx时命名文件为"onnx.py"会与onnx库冲突,报错为:
-
File "d:\Desktop\计算机视觉\06day\onnx.py", line 7, in <module> import onnx File "d:\Desktop\计算机视觉\06day\onnx.py", line 8, in <module> print(onnx.__version__) AttributeError: partially initialized module 'onnx' has no attribute '__version__' (most likely due to a circular import)
解决办法:
- 修改文件为onnx1.py,导入的
onnx
模块名称相同,导致了循环导入问题
9.2 收获
在本轮项目中,我切身体会了深度学习的整个流程,从数据集的准备、模型的设计与训练,到模型的验证与优化,每一步都充满了挑战与收获。以下是我在项目中的主要收获:
1. 对深度学习流程的深入理解
通过本次项目,我对深度学习的整个流程有了更加清晰的认识。从数据预处理、模型设计、训练调参,到模型验证与优化,每一步都需要细致的思考和调试。尤其是在数据增强和模型优化阶段,我深刻体会到了数据和模型对最终结果的影响。
2. 数据增强的重要性
数据增强是提升模型泛化能力的重要手段。通过本次项目,我学会了如何使用 PyTorch 提供的各种数据增强方法(如随机旋转、随机反转、颜色抖动等),并通过 TensorBoard 可视化了增强后的数据分布。数据增强不仅能够增加数据的多样性,还能有效防止模型过拟合。
3. 模型优化的技巧
在模型优化阶段,我尝试了多种方法来提升模型的性能。例如,通过增加网络深度(引入 CBAM 模块),提升了模型的表达能力;通过迁移学习,利用预训练模型的权重加速训练过程。这些优化技巧让我对模型的设计与优化有了更深的理解,但是在后续我并没有用CBAM,因为手写的LeNet5模型并不能很好的突出他的特征。
代码合集:
import os
import time
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.utils as vutils # 用于生成图像网格
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm # 导入 tqdm
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from LeNet5 import LeNet5
# 路径兼容处理
current_path = os.path.dirname(__file__) # 本地路径
data_path = os.path.join(current_path, "afhq") # 数据集路径
vali_path = os.path.join(current_path, 'afhq')
weight_path = os.path.join(current_path, "runs", "weights") # 权重路径
prepare_path = os.path.join(current_path, "runs", "prepare") # 备份路径
pth_path = os.path.join(weight_path, "last_model.pth") # 最后的一次权重路径
excel_path = os.path.join(current_path, "metrics", "metrics.xlsx")
# 生成唯一的日志目录
log_dir = os.path.join(current_path, "tboard", time.strftime("%Y%m%d-%H%M%S")) # 使用时间戳生成唯一目录
writer = SummaryWriter(log_dir=log_dir)
# 关闭科学计数法打印
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)
def train():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据预处理和加载
transform = transforms.Compose([
transforms.RandomVerticalFlip(),
# transforms.RandomRotation(degrees=(0, 180)),
# transforms.RandomHorizontalFlip(), # 随机水平翻转
# transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Resize((64, 64)),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) ,
transforms.RandomRotation(degrees=(0, 180)),
transforms.RandomInvert(), # 随机反转变换,
transforms.RandomAffine(degrees=(30, 70), translate=(0.1, 0.3), scale=(0.5, 0.75)),
])
# 检查数据集路径是否存在
train_path = os.path.join(data_path, 'train')
if not os.path.exists(train_path):
raise FileNotFoundError(f"数据集路径不存在: {train_path}")
# 加载整个数据集
full_dataset = ImageFolder(root=train_path, transform=transform)
print("分类列表:", full_dataset.classes)
print("分类和索引的对应关系:", full_dataset.class_to_idx)
# 分割数据集为训练集和测试集
train_ratio = 0.7
train_size = int(train_ratio * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
# 模型准备
net = LeNet5(num_classes=len(full_dataset.classes)).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path)
net.load_state_dict(state_dict)
net.train()
# 保存网络结构到tensorboard
writer.add_graph(net, torch.randn(1, 3, 64, 64).to(device)) # 添加模型的计算图
# 训练设置
epochs = 10
batch_size = 64
criterion = nn.CrossEntropyLoss(reduction="sum")
optimizer = optim.Adam(net.parameters(), lr=0.0001)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
for epoch in range(epochs):
start_time = time.time()
accuracy = 0
total_loss = 0
# 使用 tqdm 显示进度条
for i, (x, y) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")):
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
yhat = net(x)
loss = criterion(yhat, y)
loss.backward()
optimizer.step()
accuracy += torch.sum(torch.argmax(yhat, dim=1) == y).item()
total_loss += loss.item()
# 每 1 个批次保存一次图像
if i % 1 == 0:
img_grid = vutils.make_grid(x, normalize=True, nrow=8) # 生成图像网格
writer.add_image(f"r_m_{epoch}_{i * 1}", img_grid, epoch * len(train_dataset) + i)
print(
f"Epoch {epoch+1}/{epochs} - Time: {time.time() - start_time:.2f}s, Accuracy: {accuracy / len(train_dataset):.4f}, Loss: {total_loss / len(train_dataset):.4f}")
writer.add_scalar("Loss/train", total_loss / len(train_dataset), epoch)
writer.add_scalar("Accuracy/train", accuracy / len(train_dataset), epoch)
# 测试模型
test_accuracy = test(net, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.4f}")
# 生成当前时间戳作为文件名
timestamp = time.strftime("%Y%m%d-%H%M%S") # 格式化时间戳为 "年月日-时分秒"
pth_filename = f"model_{timestamp}.pth" # 生成文件名
pth_filepath = os.path.join(prepare_path, pth_filename) # 拼接完整路径
# 保存模型权重到 prepare 文件夹
torch.save(net.state_dict(), pth_filepath)
print(f"Model saved as: {pth_filename} in prepare folder")
# 更新 last_model.pth 文件
last_model_path = os.path.join(weight_path, "last_model.pth")
torch.save(net.state_dict(), last_model_path)
print(f"Updated last_model.pth")
def test(model, test_loader, device):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for x, y in test_loader:
x, y = x.to(device), y.to(device)
outputs = model(x)
_, predicted = torch.max(outputs.data, 1)
total += y.size(0)
correct += (predicted == y).sum().item()
return correct / total
def imgread(img_path):
imgdata = cv2.imread(img_path)
if imgdata is None:
raise ValueError(f"Failed to load image at path: {img_path}")
imgdata = cv2.cvtColor(imgdata, cv2.COLOR_BGR2RGB)
transformdata = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616))
])
imgdata = transformdata(imgdata)
imgdata = imgdata.unsqueeze(0)
return imgdata
def inference():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 读取验证集 val
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64)),
])
val_path = os.path.join(vali_path)
full_dataset = ImageFolder(root=val_path, transform=transform)
print("分类列表:", full_dataset.classes)
print("分类和索引的对应关系:", full_dataset.class_to_idx)
# 网络准备
net = LeNet5(num_classes=3).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
# 验证准确率
acc = 0
total = 0
with torch.no_grad():
for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
x, y = x.to(device), y.to(device)
yhat = net(x)
acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
total += y.size(0)
val_accuracy = acc / total
print(f"Validation Accuracy: {val_accuracy:.4f}")
def inference_one():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = LeNet5(num_classes=3).to(device) # 使用 LeNet5 模型
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
# 获取要推理的图片
img_path = os.path.join(r"D:\Desktop\cat1.png")
imgdata = imgread(img_path).to(device)
# 使用模型进行推理
with torch.no_grad():
out = net(imgdata)
probabilities = nn.Softmax(dim=1)(out)
predicted_class_idx = torch.argmax(probabilities, dim=1).item()
classlabels = ['猫', '狗', '野生']
print(f"Predicted class: {classlabels[predicted_class_idx]}")
print(f"Probabilities: {probabilities[0].tolist()}")
if __name__ == "__main__":
# train()
# inference()
inference_one()
# 测试模型
# 保存模型
# 训练过程可视化
# test.py
import os
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from LeNet5_ import LeNet5
import time
from sklearn.metrics import *
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm # 导入 tqdm
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from LeNet5 import LeNet5
# 路径兼容处理
current_path = os.path.dirname(__file__) # 本地路径
val_path = os.path.join(current_path, 'afhq')
csv_path = os.path.join(current_path, 'metrics')
weight_path = os.path.join(current_path, "runs", "weights") # 权重路径
pth_path = os.path.join(weight_path, "last_model.pth") # 最后的一次权重路径
# 生成唯一的日志目录
log_dir = os.path.join(current_path, "tboard", time.strftime("%Y%m%d-%H%M%S")) # 使用时间戳生成唯一目录
writer = SummaryWriter(log_dir=log_dir)
# 关闭科学计数法打印
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)
#这里定义测试方法 获取准确率
def test():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64)),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 使用外部的 val_path 变量
full_dataset = ImageFolder(root=val_path, transform=transform)
# 网络准备
net = LeNet5(num_classes=3).to(device)
state_dict = torch.load(pth_path, map_location=device)
net.load_state_dict(state_dict)
net.to(device)
net.eval()
val_loader = DataLoader(full_dataset, batch_size=8, shuffle=False)
acc = 0
total = 0
with torch.no_grad():
for i, (x, y) in enumerate(tqdm(val_loader, desc=f"Validation")):
x, y = x.to(device), y.to(device)
yhat = net(x)
acc += torch.sum(torch.argmax(yhat, dim=1) == y).item()
total += y.size(0)
val_accuracy = acc / total
print(f"Validation Accuracy: {val_accuracy:.4f}")
# 验证过程数据记录表格
excel_path = os.path.join(current_path, r"./metrics", "validation_metrics.csv")
#混淆矩阵 然后pd.tocsv() 测试数据数据化
def test_csv():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64))
])
model = LeNet5(num_classes=3) # 确保 num_classes 参数正确
state_dict = torch.load(pth_path)
model.load_state_dict(state_dict)
model.to(device)
model.eval()
current_path = os.path.dirname(__file__)
val_data = ImageFolder(root=val_path, transform=transform)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False) # 使用 DataLoader 进行批量加载
acc = 0
total_csv_data = [] # 使用列表来存储数据
with torch.no_grad():
for x, y in tqdm(val_loader, desc=f"Validation"): # 通过 DataLoader 进行迭代
x, y = x.to(device), y.to(device)
y_pred = model(x)
acc += torch.sum(torch.argmax(y_pred, dim=1) == y).item()
# 获取预测结果、真实标签和概率分布
pred_csv_data = torch.argmax(y_pred, dim=1).unsqueeze(dim=1).cpu().detach().numpy()
true_csv_data = y.cpu().unsqueeze(dim=1).detach().numpy()
csv_data = y_pred.cpu().detach().numpy()
# 将预测结果、真实标签和概率分布拼接
batch_csv_data = np.concatenate([csv_data, pred_csv_data, true_csv_data], axis=1)
total_csv_data.append(batch_csv_data) # 将当前批次的数据追加到列表中
# 将所有批次的数据拼接成一个大的 numpy 数组
total_csv_data = np.vstack(total_csv_data)
# 打印验证数据集的准确率
print("验证数据集的准确率:%.4f" % (acc / len(val_data)))
# 保存为 CSV 文件
columns = ["猫", "狗", "野生", "y_pred", "y_true"]
df = pd.DataFrame(total_csv_data, columns=columns)
df.to_csv(os.path.join(csv_path, "validation_results1.csv"), index=False)
# 报表化
def test_report():
# 读取 CSV 文件
csv_data = pd.read_csv(os.path.join(csv_path, "validation_results1.csv"))
# 提取真实标签和预测标签
y_true = csv_data["y_true"].values.astype(int)
y_pred = csv_data["y_pred"].values.astype(int)
# 获取标签名称
class_names = ["猫", "狗", "野生"]
# 计算混淆矩阵
matrix = confusion_matrix(y_true, y_pred)
# 计算准确率、召回率、F1 分数
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average=None)
recall = recall_score(y_true, y_pred, average=None)
f1 = f1_score(y_true, y_pred, average=None)
# 打印报表
print("=" * 50)
print("模型性能报表")
print("=" * 50)
print(f"准确率 (Accuracy): {accuracy:.4f}")
print("\n分类报告:")
print(classification_report(y_true, y_pred, target_names=class_names))
print("\n混淆矩阵:")
print(matrix)
print("=" * 50)
# 将报表保存为 CSV 文件
report_data = {
"指标": ["准确率 (Accuracy)", "猫 Precision", "狗 Precision", "野生 Precision",
"猫 Recall", "狗 Recall", "野生 Recall",
"猫 F1 Score", "狗 F1 Score", "野生 F1 Score"],
"值": [accuracy, *precision, *recall, *f1]
}
report_df = pd.DataFrame(report_data)
report_df.to_csv(os.path.join(csv_path, "model_report.csv"), index=False)
print(f"报表已保存为: {os.path.join(csv_path, 'model_report.csv')}")
# 可视化 用matplotlib 测试数据可视化
def test_visual():
csv_data = pd.read_csv(os.path.join(csv_path, "validation_results1.csv"))
y_true = csv_data["y_true"].values # 真实标签
y_pred = csv_data["y_pred"].values # 预测标签
# 确保 y_true 和 y_pred 是数值类型
y_true = y_true.astype(int)
y_pred = y_pred.astype(int)
plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用 SimHei 字体
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
# 计算混淆矩阵
matrix = confusion_matrix(y_true, y_pred)
print(matrix)
# 获取标签名称
class_names = ["猫", "狗", "野生"]
# 绘制混淆矩阵
plt.matshow(matrix, cmap=plt.cm.Greens)
plt.colorbar()
# 显示具体的数字
for i in range(len(matrix)):
for j in range(len(matrix)):
plt.annotate(
matrix[i, j],
xy=(j, i),
horizontalalignment="center",
verticalalignment="center",
)
# 美化
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.xticks(range(len(class_names)), class_names, rotation=45)
plt.yticks(range(len(class_names)), class_names)
plt.title("训练结果混淆矩阵视图")
plt.show()
if __name__ == '__main__':
#test()
#test_csv()
#test_report()
test_visual()