当前位置：首页 > article >正文

记录一次mmpretrain训练数据并转onnx推理

article 2025/1/10 17:30:44

1.前言

2.代码

3.数据形态【分类用】

4.配置文件

5.训练

6.测试-分析-混淆矩阵等等，测试图片效果等

7.导出onnx

8.onnx推理

9.docker环境简单补充

1.前言

好久没有做图像分类了，于是想用商汤的mmclassification快速搞一波，发现已经没有了。现在是mmpretrain集成。

2.代码

截止到我写文章，我是下载的GITHUB中的mmpretrain，我是main分支，是1.2版本。https://github.com/open-mmlab/mmpretrainhttps://github.com/open-mmlab/mmpretrain 安装环境：

（1）跟着文档来就好

1.2依赖环境 — MMPretrain 1.2.0 文档https://mmpretrain.readthedocs.io/zh-cn/latest/get_started.html 主要是这两步： cd mmpretrain --> pip install -U openmim && mim install -e .

open-mmlab喜欢用mim来装东西，又快，又对。包括mmcv、mmdeploy、mmdet等。

（2）自己搞一个docker，我文章最后做补充文档~

3.数据形态【分类用】

可以看出，data下是训练集和验证集，然后是类名，类名下是各自图片，就这样就行了。

4.配置文件

代码里有个config文件，下面的resnet下面的，resnet50_8xb32_in1k.py抄一个过来做自己的，它里边还有如下一些配置文件：

依次把所有内容抄过来，做一个自己的配置文件。我放在config_me下边，叫my_resnet50_8xb32_in1k.py，最终内容如下边代码：

这里有两点需要注意，一个是去模型库下载预训练权重【读readme找模型库，对应配置文件下载的对应预训练pth】，第二个是dataset_type = 'CustomDataset'这里用自定义就行了，数据形态上边那样就行，不用、不用去改dateset下的imagenet、 coco啥的标签......


CLASS_NUMS = 8  # 你要分类的数量，比如我是8类
BATCH_SIZE = 20
TRAIN_NUM_WORKERS = 8
VAL_NUM_WORKERS = 4
TR_DATA_ROOT = "/xx/data/train"  # 训练集
VAL_DATA_ROOT = "/xx/data/val"  # 验证集
MAX_EPOCH = 600
MultiStepLR_list = [100, 200, 300]  # 学习率递减epoch分批
VAL_INTERVAL = 20  # 多少迭代验证一次
SAVE_INTERVAL = 50  # 多少迭代保存一次模型
LOG_INTERVAL = 100  # 多少迭代/批次打印一次
PRE_CHECKPOINT = "/configs_me/resnet50_8xb32_in1k_20210831-ea4938fc.pth"  # 去模型库下载与config文件相对应的预训练模型权重

frozen_stagesss = 2  # -1不冻结层，这里选择冻结骨干2层

# model settings
model = dict(
    type='ImageClassifier',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(3, ),
        frozen_stages=frozen_stagesss,     # 冻结主干网的层数
        style='pytorch'),
    neck=dict(type='GlobalAveragePooling'),
    head=dict(
        type='LinearClsHead',
        num_classes=CLASS_NUMS,
        in_channels=2048,  # load_from后就该2048  512报错
        # in_channels=512,
        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
        topk=(1, 5),  # 二分类啥的或者不用top5准确率的，用topk=(1, ),
    ))


# dataset settings
dataset_type = 'CustomDataset'
data_preprocessor = dict(
    num_classes=CLASS_NUMS,
    # RGB format normalization parameters
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    # convert image from BGR to RGB
    to_rgb=True,
)

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='RandomResizedCrop', scale=224),
    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
    dict(type='PackInputs'),
]

test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='ResizeEdge', scale=256, edge='short'),  # 缩放短边尺寸至 256px
    dict(type='CenterCrop', crop_size=224),
    dict(type='PackInputs'),
]

train_dataloader = dict(
    batch_size=BATCH_SIZE,
    num_workers=TRAIN_NUM_WORKERS,
    dataset=dict(
        type=dataset_type,
        data_root=TR_DATA_ROOT,
        # ann_file='meta/train.txt',
        # split='train',
        pipeline=train_pipeline),
    sampler=dict(type='DefaultSampler', shuffle=True),  # 默认采样
    # persistent_workers=True,  # 保持进程，缩短每个epoch准备时间
)

val_dataloader = dict(
    batch_size=BATCH_SIZE,
    num_workers=VAL_NUM_WORKERS,
    dataset=dict(
        type=dataset_type,
        data_root=VAL_DATA_ROOT,
        # ann_file='meta/test.txt',
        # split='test',
        pipeline=test_pipeline),
    sampler=dict(type='DefaultSampler', shuffle=False),
    # persistent_workers=True,
)

val_evaluator = dict(type='Accuracy', topk=(1, 5))  # 二分类不能用top1和top5
# val_evaluator = dict(type='Accuracy', topk=(1, ))

# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

# optimizer
optim_wrapper = dict(
    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))

# learning policy
param_scheduler = dict(
    type='MultiStepLR', by_epoch=True, milestones=MultiStepLR_list, gamma=0.5)

# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=MAX_EPOCH, val_interval=VAL_INTERVAL)
val_cfg = dict()
test_cfg = dict()

# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
# auto_scale_lr = dict(base_batch_size=256)
# 通过默认策略自动缩放学习率，此策略适用于总批次大小 256
# 如果你使用不同的总批量大小，比如 512 并启用自动学习率缩放
# 我们将学习率扩大到 2 倍

# defaults to use registries in mmpretrain
default_scope = 'mmpretrain'

# configure default hooks
default_hooks = dict(
    # record the time of every iteration.
    timer=dict(type='IterTimerHook'),
    # print log every 100 iterations.
    logger=dict(type='LoggerHook', interval=LOG_INTERVAL),
    # enable the parameter scheduler.
    param_scheduler=dict(type='ParamSchedulerHook'),
    # save checkpoint per epoch.
    checkpoint=dict(type='CheckpointHook', interval=SAVE_INTERVAL),
    # set sampler seed in distributed evrionment.
    sampler_seed=dict(type='DistSamplerSeedHook'),
    # validation results visualization, set True to enable it.
    visualization=dict(type='VisualizationHook', enable=False),
)

# configure environment
env_cfg = dict(
    # whether to enable cudnn benchmark
    cudnn_benchmark=False,
    # set multi process parameters
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
    # set distributed parameters
    dist_cfg=dict(backend='nccl'),
)

# set visualizer
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(type='UniversalVisualizer', vis_backends=vis_backends)

# set log level
log_level = 'INFO'

# load from which checkpoint
load_from = PRE_CHECKPOINT

# whether to resume training from the loaded checkpoint
resume = False

# Defaults to use random seed and disable `deterministic`
randomness = dict(seed=None, deterministic=False)

5.训练

把tools文件夹下边的train.py，复制一份【PS.后边都指的复制到项目根目录下】，只改动如下代码，然后python train.py就可以训练了。【注意训练结果权重在你的work-dir指定目录下】

parser.add_argument('--config', default="config_me/my_resnet50_8xb32_in1k.py", help='train config file path')
parser.add_argument('--work-dir', default="my_train_result", help='the dir to save logs and models')

还可以加几句打印类别顺序：【还没试过，待定】
classes = runner.test_loop.dataloader.dataset.metainfo.get('classes')
print("=== 本次训练类别顺序： ======================")
print(classes)
print('=========================================')

6.测试-分析-混淆矩阵等等，测试图片效果等

同理，把tools下的test.py复制，改动如下，可以评估验证集：

    parser.add_argument('--config', default="config_me/my_resnet50_8xb32_in1k.py", help='test config file path')
    parser.add_argument('--checkpoint', default="my_train_result/epoch_200.pth", help='checkpoint file')
    parser.add_argument('--work-dir', default="test_result", help='the directory to save the file containing evaluation metrics')
    # parser.add_argument('--out', default="test_result/res_epoch_20.pkl", help='the file to output results.')  # 这个是保存为pkl可以

同理， analyze_results.py复制一份出来，改动如下，可以分析模型对测试集的效果：

    parser.add_argument('--config', default=default="config_me/my_resnet50_8xb32_in1k.py", help='test config file path')
    parser.add_argument('--result', default="test_result/res_epoch_20.pkl", help='test result json/pkl file')
    parser.add_argument('--out-dir', default="test_result/analyze", help='dir to store output files')

同理， confusion_matrix.py复制一份出来，改动如下，可以计算验证集的混淆矩阵：

    parser.add_argument('--config',  default="config_me/my_resnet50_8xb32_in1k.py", help='test config file path')
    parser.add_argument(
        '--ckpt_or_result',   default="my_train_result/epoch_200.pth",
        type=str,
        help='The checkpoint file (.pth) or '
        'dumpped predictions pickle file (.pkl).')

运行的时候，加上 --show 和--include-values等，显示带数字的混淆矩阵
同理，把demo下边的image_demo.py复制一份，改动如下，可以测试图片推理：

    parser.add_argument('--img', default="data/val/3.jpg", help='Image file')
    parser.add_argument('--model', default="configs_me/my_resnet50_8xb32_in1k.py", help='Model name or config file path')
    parser.add_argument('--checkpoint', default="xxx/epoch_400.pth", help='Checkpoint file path.')
    parser.add_argument(
        '--show',
        action='store_true',
        help='Whether to show the prediction result in a window.')
    parser.add_argument(
        '--show-dir',
        default="test_111111111111111",
        type=str,
        help='The directory to save the visualization image.')

7.导出onnx

这里用到mmdeploy， 把mmdeploy，git clone一个到本项目文件夹下，再cd到mmdeploy里，同样运行mim install -e .来安装mmdeploy。或者参考：Get Started — mmdeploy 1.3.1 文档

目前我这里是：1.3.1版本

导出onnx脚本：export_onnx.py

# === mmdeploy方式导出onnx ====================================
from mmdeploy.apis import torch2onnx
from mmdeploy.backend.sdk.export_info import export2SDK

img = '随便一张测试图路径 xxx/xx。jpg'
work_dir = '另存onnx的目录'
save_file = 'epoch_500.onnx'
deploy_cfg = 'mmdeploy/configs/mmpretrain/classification_onnxruntime_static.py'
model_cfg = 'configs_me/my_resnet50_8xb32_in1k.py' # 训练的配置文件
model_checkpoint = 'train_res_1024/epoch_500.pth'  # 训练的pth结果
device = 'cpu'

# 1. convert model to onnx
torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)

# 2. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

8.onnx推理

（1）mmdeploy推理方式

import os

# === 使用mmdeploy推理onnx ===============================
from mmdeploy.apis import inference_model

# 类别顺序：训练的时候，test时候，或者混淆矩阵那里可以打印出class顺序
classes = ["class1", "class2", "class3"]
data_paths = 'data/1 (1).png'


model_cfg = 'configs_me/my_resnet50_8xb32_in1k.py'
deploy_cfg = 'mmdeploy/configs/mmpretrain/classification_onnxruntime_static.py'
data_paths= 'xxx/1.jpg'
backend_files = ['xxx/rscd_c8_2w_epoch_500.onnx']  # 刚导出的
device = 'cpu'


# for img in os.listdir(data_paths):
img_path = data_paths
result = inference_model(model_cfg, deploy_cfg, backend_files, img_path, device)
socres = result[0].pred_score.cpu().numpy().tolist()
labels = result[0].pred_label.cpu().numpy().tolist()

label = labels[0]
score = socres[label]
print("图片名：", img_path, "预测类别：", classes[label], "预测分数：", round(score, 4))

（2）onnx-runtime推理方式，脱离框架【very nice ！！！！！！！！！】

里边数据处理是参考 config文件里边图像，比如resize啥的要对。


import os
import onnxruntime
import cv2
import numpy as np

def resize_edge(image, scale=256, edge='short'):
    """将图像的短边缩放到指定尺寸，保持宽高比不变"""
    h, w = image.shape[:2]
    if edge == 'short':
        if h < w:
            scale_ratio = scale / h
        else:
            scale_ratio = scale / w
    else:
        if h > w:
            scale_ratio = scale / h
        else:
            scale_ratio = scale / w
    new_size = (int(w * scale_ratio), int(h * scale_ratio))
    resized_image = cv2.resize(image, new_size)
    return resized_image

def center_crop(image, crop_size=224):
    """从图像中心裁剪指定尺寸的区域"""
    h, w = image.shape[:2]
    center_x, center_y = w // 2, h // 2
    half_crop_size = crop_size // 2
    # 确定中心裁剪区域
    start_x = max(center_x - half_crop_size, 0)
    start_y = max(center_y - half_crop_size, 0)
    cropped_image = image[start_y:start_y + crop_size, start_x:start_x + crop_size]
    return cropped_image

def pack_inputs(image):
    """将图像转化为 3x224x224 格式并归一化"""
    # 调整通道顺序，变为3x224x224
    img_crop = image[:, :, ::-1].transpose(2, 0, 1).astype(np.float32)
    img_crop[0, :] = (img_crop[0, :] - 123.675) / 58.395
    img_crop[1, :] = (img_crop[1, :] - 116.28) / 57.12
    img_crop[2, :] = (img_crop[2, :] - 103.53) / 57.375
    return img_crop

def img_preprocess(image_path):
    """
    图像预处理，以resnet50配置文件为例:
    test_pipeline = [
        dict(type='LoadImageFromFile'),
        dict(type='ResizeEdge', scale=256, edge='short'),  # 缩放短边尺寸至 256px
        dict(type='CenterCrop', crop_size=224),
        dict(type='PackInputs'),
    ]
    """
    image = cv2.imread(image_path)
    resized_image = resize_edge(image, scale=256, edge='short')
    cropped_image = center_crop(resized_image, crop_size=224)
    final_image = pack_inputs(cropped_image)
    return final_image

def img_infer(onnx_model, img_path):

    img_crop = img_preprocess(img_path)
    input = np.expand_dims(img_crop, axis=0)

    onnx_session = onnxruntime.InferenceSession(onnx_model, providers=['CPUExecutionProvider'])

    input_name = []
    for node in onnx_session.get_inputs():
        input_name.append(node.name)

    output_name = []
    for node in onnx_session.get_outputs():
        output_name.append(node.name)

    input_feed = {}
    for name in input_name:
        input_feed[name] = input
    pred = onnx_session.run(None, input_feed)
    return pred  # 预测结果


if __name__ == '__main__':

    onnx_model = "onnx_model/epoch_500.onnx"
    classes = ["class1", "class2", "class3"]  # 混淆矩阵和测试时候可以打印出来
    classes_explain = ["第一类", "第二类", "第三类"]

    # 一张图推理
    img_path = "data/1 (1).png"
    res = img_infer(onnx_model, img_path)
    print("图片名：", img_path, "预测类别：", classes_explain[np.argmax(res)], "预测分数：", round(np.max(res), 4))