mmdetection框架下使用yolov3训练Seaships数据集
之前复现的yolov3算法采用的是传统的coco数据集,这里我需要在新的数据集上跑,也就是船舶检测方向的SeaShips数据集,这里给出教程。
Seaships论文链接:https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8438999
一、数据集下载
可以去官网下载或者直接点击链接下载:
Seaships官网:https://github.com/jiaming-wang/SeaShips
下载链接:http://www.lmars.whu.edu.cn/prof_web/shaozhenfeng/datasets/SeaShips(7000).zip
Seaships原数据集有3万+张图像,但给出的数据集一共只有7000张,应该是经过筛选后的高质量图像。
这是论文给出的数据集中各类型的船只图像数量。
这是论文《AnenhancedCNN-enabledlearningmethodforpromotingshipdetectionin maritimesurveillance system》采用Seaships数据集进行实验的图像数量,一共是7000张。
下载完后的数据集文件夹结构应该是这样的:
一共是三个文件夹,JPEGImages里面保存的是7000张图像文件
ImageSets保存的是四个txt文件
里面分别是四种集的图像编号,如test.txt文件内容如下(部分):
Annotations里面存放的是7000张图像的标注文件:
二、数据集格式转换
YOLO系列算法采用的是coco数据集,coco数据集的标注文件格式如下:
我们可以使用下面的代码直接将Seaships数据集转换成coco数据集文件夹架构的文件:
import os
import cv2
import json
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm
# Seaships 数据集的类别
SEASHIPS_CLASSES = (
'ship', 'ore carrier', 'bulk cargo carrier', 'general cargo ship', 'container ship', 'fishing boat'
)
# 将类别名称映射为 COCO 格式的 category_id
label_ids = {name: i + 1 for i, name in enumerate(SEASHIPS_CLASSES)}
def parse_xml(xml_path):
"""
解析 XML 文件,提取标注信息。
"""
tree = ET.parse(xml_path)
root = tree.getroot()
objects = []
for obj in root.findall('object'):
# 解析类别名称
name = obj.find('name').text
if name not in label_ids:
print(f"警告: 未知类别 '{name}',跳过该对象。")
continue
# 解析 difficult 标签
difficult_tag = obj.find('difficult')
difficult = int(difficult_tag.text) if difficult_tag is not None else 0
# 解析边界框
bnd_box = obj.find('bndbox')
if bnd_box is not None:
bbox = [
int(bnd_box.find('xmin').text),
int(bnd_box.find('ymin').text),
int(bnd_box.find('xmax').text),
int(bnd_box.find('ymax').text)
]
else:
print(f"警告: 在文件 {xml_path} 中未找到 <bndbox> 标签,跳过该对象。")
continue
# 添加到对象列表
objects.append({
'name': name,
'label_id': label_ids[name],
'difficult': difficult,
'bbox': bbox
})
return objects
def load_split_files(split_dir):
"""
加载划分文件(train.txt, val.txt, test.txt)。
"""
split_files = {}
for split_name in ['train', 'val', 'test']:
split_path = os.path.join(split_dir, f'{split_name}.txt')
if os.path.exists(split_path):
with open(split_path, 'r') as f:
split_files[split_name] = [line.strip() for line in f.readlines()]
else:
print(f"警告: 未找到 {split_name}.txt 文件,跳过该划分。")
split_files[split_name] = []
return split_files
def convert_to_coco(image_dir, xml_dir, split_dir, output_dir):
"""
将 Seaships 数据集转换为 COCO 格式,并根据划分文件划分数据集。
"""
# 创建输出目录
os.makedirs(os.path.join(output_dir, 'annotations'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'val'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'test'), exist_ok=True)
# 加载划分文件
split_files = load_split_files(split_dir)
# 定义 COCO 格式的基本结构
def create_coco_structure():
return {
"info": {
"description": "Seaships Dataset",
"version": "1.0",
"year": 2023,
"contributor": "Your Name",
"date_created": "2023-10-01"
},
"licenses": [],
"images": [],
"annotations": [],
"categories": [
{"id": i + 1, "name": name, "supercategory": "none"}
for i, name in enumerate(SEASHIPS_CLASSES)
]
}
# 处理每个数据集
for split_name, file_names in split_files.items():
coco_data = create_coco_structure()
annotation_id = 1
for file_name in tqdm(file_names, desc=f"处理 {split_name} 数据集"):
xml_file = os.path.join(xml_dir, f'{file_name}.xml')
image_name = f'{file_name}.jpg'
image_path = os.path.join(image_dir, image_name)
# 检查图像文件和 XML 文件是否存在
if not os.path.exists(image_path):
print(f"警告: 图像文件 {image_name} 不存在,跳过该标注文件。")
continue
if not os.path.exists(xml_file):
print(f"警告: 标注文件 {xml_file} 不存在,跳过该图像文件。")
continue
# 读取图像尺寸
image = cv2.imread(image_path)
height, width, _ = image.shape
# 添加图像信息
image_id = len(coco_data['images']) + 1
coco_data['images'].append({
"id": image_id,
"file_name": image_name,
"width": width,
"height": height
})
# 解析 XML 文件
objects = parse_xml(xml_file)
for obj in objects:
xmin, ymin, xmax, ymax = obj['bbox']
bbox = [xmin, ymin, xmax - xmin, ymax - ymin] # COCO 格式的 bbox 是 [x, y, width, height]
area = (xmax - xmin) * (ymax - ymin)
coco_data['annotations'].append({
"id": annotation_id,
"image_id": image_id,
"category_id": obj['label_id'],
"bbox": bbox,
"area": area,
"iscrowd": 0,
"difficult": obj['difficult']
})
annotation_id += 1
# 复制图像文件到对应的文件夹
shutil.copy(image_path, os.path.join(output_dir, split_name, image_name))
# 保存 COCO 格式的标注文件
with open(os.path.join(output_dir, 'annotations', f'instances_{split_name}.json'), 'w') as f:
json.dump(coco_data, f, indent=4)
print(f"转换完成,结果已保存到 {output_dir}")
# 设置路径
image_dir = "your path to images" # 图像文件目录
xml_dir = "your path to annotations" # XML 标注文件目录
split_dir = "your path to txt directory" # 划分文件目录(包含 train.txt, val.txt, test.txt)
output_dir = "your path to output directory" # 输出的 COCO 格式文件夹
# 执行转换
convert_to_coco(image_dir, xml_dir, split_dir, output_dir)
将代码保存为Seaships_to_coco.py文件。
运行以下代码进行转换:
python seaships_to_coco.py
运行完成以后生成Seaships_coco文件夹,下面包含和coco数据集相同格式的文件:
这样我们就得到了coco格式的Seaships数据集了。
三、修改配置文件
3.1 修改coco.py
将classes修改为Seaships数据集的类:
Seaships类如下六种:
'ship', 'ore carrier', 'bulk cargo carrier', 'general cargo ship', 'container ship', 'fishing boat'
3.2 修改class_names.py
同样将coco_class修改为seaships的类别:
3.3 修改需要运行的配置的文件
比如我跑的这个py文件,需要把里面所有的路径都修改成自己coco格式的seaships数据集。
把所有coco的路径都改成自己seaships数据集的路径,包括测试集、训练集等。
完整代码如下:
auto_scale_lr = dict(base_batch_size=64, enable=False)
backend_args = None
data_preprocessor = dict(
bgr_to_rgb=True,
mean=[
0,
0,
0,
],
pad_size_divisor=32,
std=[
255.0,
255.0,
255.0,
],
type='DetDataPreprocessor')
data_root = 'data/SeaShips_coco/'
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(interval=7, type='CheckpointHook'),
logger=dict(interval=50, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
env_cfg = dict(
cudnn_benchmark=False,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
input_size = (
320,
320,
)
launcher = 'none'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
model = dict(
backbone=dict(
depth=53,
init_cfg=dict(checkpoint='open-mmlab://darknet53', type='Pretrained'),
out_indices=(
3,
4,
5,
),
type='Darknet'),
bbox_head=dict(
anchor_generator=dict(
base_sizes=[
[
(
116,
90,
),
(
156,
198,
),
(
373,
326,
),
],
[
(
30,
61,
),
(
62,
45,
),
(
59,
119,
),
],
[
(
10,
13,
),
(
16,
30,
),
(
33,
23,
),
],
],
strides=[
32,
16,
8,
],
type='YOLOAnchorGenerator'),
bbox_coder=dict(type='YOLOBBoxCoder'),
featmap_strides=[
32,
16,
8,
],
in_channels=[
512,
256,
128,
],
loss_cls=dict(
loss_weight=1.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
loss_conf=dict(
loss_weight=1.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
loss_wh=dict(loss_weight=2.0, reduction='sum', type='MSELoss'),
loss_xy=dict(
loss_weight=2.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
num_classes=80,
out_channels=[
1024,
512,
256,
],
type='YOLOV3Head'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
0,
0,
0,
],
pad_size_divisor=32,
std=[
255.0,
255.0,
255.0,
],
type='DetDataPreprocessor'),
neck=dict(
in_channels=[
1024,
512,
256,
],
num_scales=3,
out_channels=[
512,
256,
128,
],
type='YOLOV3Neck'),
test_cfg=dict(
conf_thr=0.005,
max_per_img=100,
min_bbox_size=0,
nms=dict(iou_threshold=0.45, type='nms'),
nms_pre=1000,
score_thr=0.05),
train_cfg=dict(
assigner=dict(
min_pos_iou=0,
neg_iou_thr=0.5,
pos_iou_thr=0.5,
type='GridAssigner')),
type='YOLOV3')
optim_wrapper = dict(
clip_grad=dict(max_norm=35, norm_type=2),
optimizer=dict(lr=0.001, momentum=0.9, type='SGD', weight_decay=0.0005),
type='OptimWrapper')
param_scheduler = [
dict(begin=0, by_epoch=False, end=2000, start_factor=0.1, type='LinearLR'),
dict(
by_epoch=True, gamma=0.1, milestones=[
218,
246,
], type='MultiStepLR'),
]
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='annotations/instances_test.json',
backend_args=None,
data_prefix=dict(img='test/'),
data_root='data/SeaShips_coco/',
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
320,
320,
), type='Resize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file='data/SeaShips_coco/annotations/instances_test.json',
backend_args=None,
metric='bbox',
type='CocoMetric')
test_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
320,
320,
), type='Resize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
]
train_cfg = dict(max_epochs=273, type='EpochBasedTrainLoop', val_interval=7)
train_dataloader = dict(
batch_sampler=dict(type='AspectRatioBatchSampler'),
batch_size=8,
dataset=dict(
ann_file='annotations/instances_train.json',
backend_args=None,
data_prefix=dict(img='train/'),
data_root='data/SeaShips_coco/',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
mean=[
0,
0,
0,
],
ratio_range=(
1,
2,
),
to_rgb=True,
type='Expand'),
dict(
min_crop_size=0.3,
min_ious=(
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
),
type='MinIoURandomCrop'),
dict(keep_ratio=True, scale=(
320,
320,
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackDetInputs'),
],
type='CocoDataset'),
num_workers=4,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_pipeline = [
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(mean=[
0,
0,
0,
], ratio_range=(
1,
2,
), to_rgb=True, type='Expand'),
dict(
min_crop_size=0.3,
min_ious=(
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
),
type='MinIoURandomCrop'),
dict(keep_ratio=True, scale=(
320,
320,
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackDetInputs'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file='annotations/instances_val.json',
backend_args=None,
data_prefix=dict(img='val/'),
data_root='data/SeaShips_coco/',
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
320,
320,
), type='Resize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
ann_file='data/SeaShips_coco/annotations/instances_val.json',
backend_args=None,
metric='bbox',
type='CocoMetric')
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
name='visualizer',
type='DetLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
])
work_dir = '/home/21021110287/wxz/mmdetection/work_dirs/yolo_seaships'
路径里面包含seaships的就是我自己修改过后的,大家在用的时候记得改成自己的路径即可。将该文件保存为 yolov3_seaships.py。
运行以下代码开始训练算法(验证集上跑):
python <the path to train.py> <the path to yolov3_seaships.py> --work-dir <the path to your output dirctory>
第一个路径是train.py文件的路径 第二个是刚刚保存的运行配置文件的路径,最后一个路径是自定义的输出日志保存结果的路径,如果不设置则会自动生成work_dir文件夹保存结果,命令如下:
python <the path to train.py> <the path to yolov3_seaships.py>
如果需要在测试集上跑的话还需要添加检查点文件路径:
python <the path to your test.py> <the path to yolov3_seaships.py> <the path to your pth file> --work-dir <the path to the output dirctory>
四、运行结果
运行上述命令后 我们的算法就开始跑起来了:
最终运行结果的日志文件如下:
五、Faster-RCNN
如果还想在faster-rcnn或者ssd上运行,直接选择configs文件夹下不同的配置文件修改运行命令即可
faster-rcnn可能会出现service not available的错误,则需要把运行配置文件中加载与训练模型的代码注释掉,否则没有预训练模型无法运行:
如果不想注释掉就可以按照下面的方法去下载预训练模型(即权重文件):
在python环境下输入下面命令下载模型即可:
之后找到模型文件(.pth文件),复制路径,添加到加载预训练模型的那行代码中”checkpoint=“的后面即可重新运行,这样会发现运行速度远超未加载权重的时候。