Windows11+PyCharm利用MMSegmentation训练自己的数据集保姆级教程
系统版本:Windows 11
依赖环境:Anaconda3
运行软件:PyCharm
一.环境配置
- 通过Anaconda Prompt(anaconda)打开终端
- 创建一个虚拟环境
conda create --name mmseg python=3.9
3.激活虚拟环境
conda activate mmseg
4.安装pytorch和cuda
torch版本要求是1.12或者1.13,这里选择安装1.12,安装命令从pytorch官网找,地址
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
5.安装mmcv
安装命令生成地址:地址
pip install mmcv==2.0.0rc4 -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12/index.html
6.下载源码
这里可以去github上下载1.1.1版本代码,也可以下载我准备好的代码:地址
7.用pycharm打开mmsegmentation-1.1.1
选择好配置的环境之后,打开终端,运行如下命令:
pip install -v -e .
至此环境配置完成。
二.准备自己的数据集
数据集的准备请查看:数据集制作教程
上面提供的源码中包含可训练的数据集,可以直接下载!
三.开始训练
在pycharm中打开上面下载的源码。
1.在mmseg/datasets文件夹下新建mysegDataset.py
from mmseg.registry import DATASETS
from .basesegdataset import BaseSegDataset
@DATASETS.register_module()
class mysegDataset(BaseSegDataset):
# 类别和对应的 RGB配色
METAINFO = {
'classes':['background', 'red', 'green', 'white', 'seed-black', 'seed-white'],
'palette':[[127,127,127], [200,0,0], [0,200,0], [144,238,144], [30,30,30], [251,189,8]]
}
# 指定图像扩展名、标注扩展名
def __init__(self,
seg_map_suffix='.png', # 标注mask图像的格式
reduce_zero_label=False, # 类别ID为0的类别是否需要除去
**kwargs) -> None:
super().__init__(
seg_map_suffix=seg_map_suffix,
reduce_zero_label=reduce_zero_label,
**kwargs)
2.注册数据集
在`mmseg/datasets/__init__.py`中注册刚刚定义的`mysegDataset`数据集类,如下图所示,在最后添加即可
3.pipeline配置文件
在configs/_base_/datasets文件夹下新建mysegDataset_pipeline.py,并添加如下代码。
# 数据集路径
dataset_type = 'mysegDataset' # 数据集类名
data_root = 'Watermelon87_Semantic_Seg_Mask/' # 数据集路径(相对于mmsegmentation主目录)
# 输入模型的图像裁剪尺寸,一般是 128 的倍数,越小显存开销越少
crop_size = (512, 512)
# 训练预处理
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
# 测试预处理
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
# TTA后处理
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
# 训练 Dataloader
train_dataloader = dict(
batch_size=2,
num_workers=0,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
# 验证 Dataloader
val_dataloader = dict(
batch_size=1,
num_workers=0,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
# 测试 Dataloader
test_dataloader = val_dataloader
# 验证 Evaluator
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU', 'mDice', 'mFscore'])
# 测试 Evaluator
test_evaluator = val_evaluator
4.配置生成
在主目录下新建configset.py,并添加如下代码。
改代码中主要用于配置训练参数,右键运行生成配置文件。
from mmengine import Config
cfg = Config.fromfile('./configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py') ##选择训练模型
dataset_cfg = Config.fromfile('./configs/_base_/datasets/mysegDataset_pipeline.py') ## 选择pipeline
cfg.merge_from_dict(dataset_cfg)
# 类别个数
NUM_CLASS = 6
cfg.crop_size = (256, 256)
cfg.model.data_preprocessor.size = cfg.crop_size
cfg.model.data_preprocessor.test_cfg = dict(size_divisor=128)
# 单卡训练时,需要把 SyncBN 改成 BN
cfg.norm_cfg = dict(type='BN', requires_grad=True) # 只使用GPU时,BN取代SyncBN
cfg.model.backbone.norm_cfg = cfg.norm_cfg
cfg.model.decode_head.norm_cfg = cfg.norm_cfg
cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg
# 模型 decode/auxiliary 输出头,指定为类别个数
cfg.model.decode_head.num_classes = NUM_CLASS
cfg.model.auxiliary_head.num_classes = NUM_CLASS
# 训练 Batch Size
cfg.train_dataloader.batch_size = 2
# 结果保存目录
cfg.work_dir = './work_dirs/mysegDataset-UNet'
# 模型保存与日志记录
cfg.train_cfg.max_iters = 10000 # 训练迭代次数
cfg.train_cfg.val_interval = 500 # 评估模型间隔
cfg.default_hooks.logger.interval = 100 # 日志记录间隔
cfg.default_hooks.checkpoint.interval = 2500 # 模型权重保存间隔
cfg.default_hooks.checkpoint.max_keep_ckpts = 1 # 最多保留几个模型权重
cfg.default_hooks.checkpoint.save_best = 'mIoU' # 保留指标最高的模型权重
# 随机数种子
cfg['randomness'] = dict(seed=0)
cfg.dump('myconfigs/mysegDataset_UNet.py')
5.修改num_workers=0
使用Windows系统训练,将上一步生成的配置文件中所有的num_workers修改成0。
crop_size = (
256,
256,
)
data_preprocessor = dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
512,
1024,
),
std=[
58.395,
57.12,
57.375,
],
type='SegDataPreProcessor')
data_root = 'Watermelon87_Semantic_Seg_Mask/'
dataset_type = 'mysegDataset'
default_hooks = dict(
checkpoint=dict(
by_epoch=False,
interval=2500,
max_keep_ckpts=1,
save_best='mIoU',
type='CheckpointHook'),
logger=dict(interval=100, log_metric_by_epoch=False, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='SegVisualizationHook'))
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
img_ratios = [
0.5,
0.75,
1.0,
1.25,
1.5,
1.75,
]
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=False)
model = dict(
auxiliary_head=dict(
align_corners=False,
channels=64,
concat_input=False,
dropout_ratio=0.1,
in_channels=128,
in_index=3,
loss_decode=dict(
loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=False),
norm_cfg=dict(requires_grad=True, type='BN'),
num_classes=6,
num_convs=1,
type='FCNHead'),
backbone=dict(
act_cfg=dict(type='ReLU'),
base_channels=64,
conv_cfg=None,
dec_dilations=(
1,
1,
1,
1,
),
dec_num_convs=(
2,
2,
2,
2,
),
downsamples=(
True,
True,
True,
True,
),
enc_dilations=(
1,
1,
1,
1,
1,
),
enc_num_convs=(
2,
2,
2,
2,
2,
),
in_channels=3,
norm_cfg=dict(requires_grad=True, type='BN'),
norm_eval=False,
num_stages=5,
strides=(
1,
1,
1,
1,
1,
),
type='UNet',
upsample_cfg=dict(type='InterpConv'),
with_cp=False),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
256,
256,
),
std=[
58.395,
57.12,
57.375,
],
test_cfg=dict(size_divisor=128),
type='SegDataPreProcessor'),
decode_head=dict(
align_corners=False,
channels=64,
concat_input=False,
dropout_ratio=0.1,
in_channels=64,
in_index=4,
loss_decode=dict(
loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
norm_cfg=dict(requires_grad=True, type='BN'),
num_classes=6,
num_convs=1,
type='FCNHead'),
pretrained=None,
test_cfg=dict(crop_size=256, mode='whole', stride=170),
train_cfg=dict(),
type='EncoderDecoder')
norm_cfg = dict(requires_grad=True, type='BN')
optim_wrapper = dict(
clip_grad=None,
optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005),
type='OptimWrapper')
optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
param_scheduler = [
dict(
begin=0,
by_epoch=False,
end=160000,
eta_min=0.0001,
power=0.9,
type='PolyLR'),
]
randomness = dict(seed=0)
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
data_root='Watermelon87_Semantic_Seg_Mask/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
2048,
1024,
), type='Resize'),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type='mysegDataset'),
num_workers=0,
persistent_workers=False,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
iou_metrics=[
'mIoU',
'mDice',
'mFscore',
], type='IoUMetric')
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
2048,
1024,
), type='Resize'),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs'),
]
train_cfg = dict(max_iters=10000, type='IterBasedTrainLoop', val_interval=500)
train_dataloader = dict(
batch_size=2,
dataset=dict(
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
data_root='Watermelon87_Semantic_Seg_Mask/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
keep_ratio=True,
ratio_range=(
0.5,
2.0,
),
scale=(
2048,
1024,
),
type='RandomResize'),
dict(
cat_max_ratio=0.75, crop_size=(
512,
512,
), type='RandomCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
],
type='mysegDataset'),
num_workers=0,
persistent_workers=False,
sampler=dict(shuffle=True, type='InfiniteSampler'))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
keep_ratio=True,
ratio_range=(
0.5,
2.0,
),
scale=(
2048,
1024,
),
type='RandomResize'),
dict(cat_max_ratio=0.75, crop_size=(
512,
512,
), type='RandomCrop'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
]
tta_model = dict(type='SegTTAModel')
tta_pipeline = [
dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'),
dict(
transforms=[
[
dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
],
[
dict(direction='horizontal', prob=0.0, type='RandomFlip'),
dict(direction='horizontal', prob=1.0, type='RandomFlip'),
],
[
dict(type='LoadAnnotations'),
],
[
dict(type='PackSegInputs'),
],
],
type='TestTimeAug'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
data_root='Watermelon87_Semantic_Seg_Mask/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
2048,
1024,
), type='Resize'),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs'),
],
type='mysegDataset'),
num_workers=0,
persistent_workers=False,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
iou_metrics=[
'mIoU',
'mDice',
'mFscore',
], type='IoUMetric')
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
name='visualizer',
type='SegLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
])
work_dir = './work_dirs/mysegDataset-UNet'
6.训练模型
在终端中运行以下命令:
python tools/train.py myconfigs/mysegDataset_UNet.py