当前位置：首页 > article >正文

mmdetection实战，训练自己的数据集

article 2024/10/7 14:09:11

1 库安装

        pip install timm==1.0.7 thop efficientnet_pytorch==0.7.1 einops grad-cam==1.4.8 dill==0.3.6 albumentations==1.4.11 pytorch_wavelets==1.3.0 tidecv PyWavelets -i https://pypi.tuna.tsinghua.edu.cn/simple
        pip install -U openmim -i https://pypi.tuna.tsinghua.edu.cn/simple
        mim install mmengine -i https://pypi.tuna.tsinghua.edu.cn/simple
        mim install "mmcv==2.1.0" -i https://pypi.tuna.tsinghua.edu.cn/simple

pip install YOLO
pip install ultralytics

*需要注意mmcv库的兼容性。

2 源码下载

GitHub - open-mmlab/mmdetection at v3.0.0

3 数据集准备

我是yolo数据集转coco

使用时需要修改类别和路径。

import json
import os
import shutil

import cv2

# info ，license，categories 结构初始化；
# 在train.json,val.json,test.json里面信息是一致的；

# info，license暂时用不到
info = {
    "year": 2024,
    "version": '1.0',
    "date_created": 2024 - 6 - 9
}

licenses = {
    "id": 1,
    "name": "null",
    "url": "null",
}

# 自己的标签类别，跟yolo的数据集类别要对应好；
categories = [
    {
        "id": 0,
        "name": 'L',
        "supercategory": 'lines',
    },
    {
        "id": 1,
        "name": 'R',
        "supercategory": 'lines',
    },
    {
        "id": 2,
        "name": 'I',
        "supercategory": 'lines',
    },
    {
        "id": 3,
        "name": 'M',
        "supercategory": 'lines',
    },
    {
        "id": 4,
        "name": 'A',
        "supercategory": 'lines',
    }

]

# 初始化train,test、valid 数据字典
# info licenses categories 在train和test里面都是一致的；
train_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []}
test_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []}
valid_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []}


# image_path 对应yolov8的图像路径，比如images/train；
# label_path 对应yolov8的label路径，比如labels/train 跟images要对应；
def yolo_covert_coco_format(image_path, label_path):
    images = []
    annotations = []
    for index, img_file in enumerate(os.listdir(image_path)):
        if img_file.endswith('.jpg'):
            image_info = {}
            img = cv2.imread(os.path.join(image_path, img_file))
            height, width, channel = img.shape
            image_info['id'] = index
            image_info['file_name'] = img_file
            image_info['width'], image_info['height'] = width, height
        else:
            continue
        if image_info != {}:
            images.append(image_info)
        # 处理label信息-------
        label_file = os.path.join(label_path, img_file.replace('.jpg', '.txt'))
        with open(label_file, 'r') as f:
            for idx, line in enumerate(f.readlines()):
                info_annotation = {}
                class_num, xs, ys, ws, hs = line.strip().split(' ')
                class_id, xc, yc, w, h = int(class_num), float(xs), float(ys), float(ws), float(hs)
                xmin = (xc - w / 2) * width
                ymin = (yc - h / 2) * height
                xmax = (xc + w / 2) * width
                ymax = (yc + h / 2) * height
                bbox_w = int(width * w)
                bbox_h = int(height * h)
                img_copy = img[int(ymin):int(ymax), int(xmin):int(xmax)].copy()

                info_annotation["category_id"] = class_id  # 类别的id
                info_annotation['bbox'] = [xmin, ymin, bbox_w, bbox_h]  ## bbox的坐标
                info_annotation['area'] = bbox_h * bbox_w  ###area
                info_annotation['image_id'] = index  # bbox的id
                info_annotation['id'] = index * 100 + idx  # bbox的id
                # cv2.imwrite(f"./temp/{info_annotation['id']}.jpg", img_copy)
                info_annotation['segmentation'] = [[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]]  # 四个点的坐标
                info_annotation['iscrowd'] = 0  # 单例
                annotations.append(info_annotation)
    return images, annotations


# key == train，test，val
# 对应要生成的json文件，比如instances_train.json，instances_test.json，instances_val.json
# 只是为了不重复写代码。。。。。
def gen_json_file(yolov8_data_path, coco_format_path, key):
    # json path
    json_path = os.path.join(coco_format_path, f'annotations/instances_{key}.json')
    dst_path = os.path.join(coco_format_path, f'{key}')
    if not os.path.exists(os.path.dirname(json_path)):
        os.makedirs(os.path.dirname(json_path), exist_ok=True)
    data_path = os.path.join(yolov8_data_path, f'images/{key}')
    label_path = os.path.join(yolov8_data_path, f'labels/{key}')
    images, anns = yolo_covert_coco_format(data_path, label_path)
    if key == 'train':
        train_data['images'] = images
        train_data['annotations'] = anns
        with open(json_path, 'w') as f:
            json.dump(train_data, f, indent=2)
        # shutil.copy(data_path,'')
    elif key == 'test':
        test_data['images'] = images
        test_data['annotations'] = anns
        with open(json_path, 'w') as f:
            json.dump(test_data, f, indent=2)
    elif key == 'val':
        valid_data['images'] = images
        valid_data['annotations'] = anns
        with open(json_path, 'w') as f:
            json.dump(valid_data, f, indent=2)
    else:
        print(f'key is {key}')
    print(f'generate {key} json success!')
    return


if __name__ == '__main__':
    yolov8_data_path = r''
    coco_format_path = r''
    gen_json_file(yolov8_data_path, coco_format_path, key='train')
    gen_json_file(yolov8_data_path, coco_format_path, key='val')
    gen_json_file(yolov8_data_path, coco_format_path, key='test')

coco标签可视化工具，可以用于检查数据集是否有问题


import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import os
# 定义COCO数据集的路径
dataDir = r''
dataType = 'train2017'
annFile = f'{dataDir}/annotations/instances_{dataType}.json'

# 初始化COCO API
coco = COCO(annFile)

# 获取一张图片的ID
imgIds = coco.getImgIds()
img = coco.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]

# 加载并显示图片
I = io.imread(f'{dataDir}/{dataType}/{img["file_name"]}')
plt.imshow(I)
plt.axis('off')

# 获取图片中的标注
annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
anns = coco.loadAnns(annIds)

# 显示标注和类别标签
for ann in anns:
    bbox = ann['bbox']
    category_id = ann['category_id']
    category_name = coco.loadCats(category_id)[0]['name']  # 获取类别名称

    # 画出边框
    rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=2, edgecolor='r', facecolor='none')
    plt.gca().add_patch(rect)

    # 在边框上方显示类别名称
    plt.text(bbox[0], bbox[1] - 10, category_name, color='yellow', fontsize=12, weight='bold', backgroundcolor='black')

# 确保输出目录存在

output_path = f'./{img["file_name"]}_annotated.jpg'
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
plt.show()

4 代码修改

命令行进入代码文件夹，然后输入指令

pip install -v -e.

根据你想要跑什么模型，以faster rcnn为例。

你需要修改faster-rcnn_r50_fpn_1x.py ,classes_name.py ,coco.py, coco_detection.py 和shedule_1x.py中的数据集参数和训练参数。例如类别名，类别数，数据集路径，epoches和batch等。

这些文件的路径

mmdetection-3.0.0/configs/_base_/schedules/schedule_1x.py

mmdetection-3.0.0/configs/_base_/datasets/coco_detection.py

mmdetection-3.0.0/configs/_base_/models/faster-rcnn_r50_fpn.py

mmdetection-3.0.0/mmdet/datasets/coco.py

5 运行

python tools/train.py  configs/_base_/models/faster-rcnn_r50_fpn.py

或

python tools/train.py work_dirs/faster-rcnn_r50_fpn_1x_coco/faster-rcnn_r50_fpn_1x_coco.py

后续若需要修改参数可以直接在faster-rcnn_r50_fpn_1x_coco.py中修改。

6 测试

python tools/test.py work_dirs/faster-rcnn_r50_fpn_1x_coco/faster-rcnn_r50_fpn_1x_coco.py work_dirs/faster-rcnn_r50_fpn_1x_coco/epoch_20.pth --out res.pkl

7 评价指标转换

来自

https://github.com/z1069614715

的代码。可以转换mmdet评价指标为yolo评价指标。

import os, torch, cv2, math, tqdm, time, shutil, argparse, json, pickle
import numpy as np
from prettytable import PrettyTable


def clip_boxes(boxes, shape):
    # Clip boxes (xyxy) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[..., 0].clamp_(0, shape[1])  # x1
        boxes[..., 1].clamp_(0, shape[0])  # y1
        boxes[..., 2].clamp_(0, shape[1])  # x2
        boxes[..., 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2


def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    # Rescale boxes (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]  # x padding
    boxes[..., [1, 3]] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes


def box_iou(box1, box2, eps=1e-7):
    """
    Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py

    Args:
        box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
        box2 (torch.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:
        (torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
    """

    # NOTE: Need .float() to get accurate iou values
    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    (a1, a2), (b1, b2) = box1.float().unsqueeze(1).chunk(2, 2), box2.float().unsqueeze(0).chunk(2, 2)
    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp_(0).prod(2)

    # IoU = inter / (area1 + area2 - inter)
    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)


def process_batch(detections, labels, iouv):
    """
    Return correct prediction matrix
    Arguments:
        detections (array[N, 6]), x1, y1, x2, y2, conf, class
        labels (array[M, 5]), class, x1, y1, x2, y2
    Returns:
        correct (array[N, 10]), for 10 IoU levels
    """
    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
    iou = box_iou(labels[:, 1:], detections[:, :4])
    correct_class = labels[:, 0:1] == detections[:, 5]
    for i in range(len(iouv)):
        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
        if x[0].shape[0]:
            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
            if x[0].shape[0] > 1:
                matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                # matches = matches[matches[:, 2].argsort()[::-1]]
                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
            correct[matches[:, 1].astype(int), i] = True
    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)


def smooth(y, f=0.05):
    # Box filter of fraction f
    nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
    p = np.ones(nf // 2)  # ones padding
    yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
    return np.convolve(yp, np.ones(nf) / nf, mode='valid')  # y-smoothed


def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=''):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
        tp:  True positives (nparray, nx1 or nx10).
        conf:  Objectness value from 0-1 (nparray).
        pred_cls:  Predicted object classes (nparray).
        target_cls:  True object classes (nparray).
        plot:  Plot precision-recall curve at mAP@0.5
        save_dir:  Plot save directory
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes, nt = np.unique(target_cls, return_counts=True)
    nc = unique_classes.shape[0]  # number of classes, number of detections

    # Create Precision-Recall curve and compute AP for each class
    px, py = np.linspace(0, 1, 1000), []  # for plotting
    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        i = pred_cls == c
        n_l = nt[ci]  # number of labels
        n_p = i.sum()  # number of predictions
        if n_p == 0 or n_l == 0:
            continue

        # Accumulate FPs and TPs
        fpc = (1 - tp[i]).cumsum(0)
        tpc = tp[i].cumsum(0)

        # Recall
        recall = tpc / (n_l + eps)  # recall curve
        r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

        # Precision
        precision = tpc / (tpc + fpc)  # precision curve
        p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score

        # AP from recall-precision curve
        for j in range(tp.shape[1]):
            ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
            if plot and j == 0:
                py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5

    # Compute F1 (harmonic mean of precision and recall)
    f1 = 2 * p * r / (p + r + eps)

    i = smooth(f1.mean(0), 0.1).argmax()  # max F1 index
    p, r, f1 = p[:, i], r[:, i], f1[:, i]
    tp = (r * nt).round()  # true positives
    fp = (tp / (p + eps) - tp).round()  # false positives
    return tp, fp, p, r, f1, ap, unique_classes.astype(int)


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves
    # Arguments
        recall:    The recall curve (list)
        precision: The precision curve (list)
    # Returns
        Average precision, precision curve, recall curve
    """

    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Compute the precision envelope
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

    # Integrate area under curve
    method = 'interp'  # methods: 'continuous', 'interp'
    if method == 'interp':
        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
    else:  # 'continuous'
        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve

    return ap, mpre, mrec


def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--label_coco', type=str, default='/home/hjj/Desktop/dataset/dataset_visdrone/test_coco.json',
                        help='label coco path')
    parser.add_argument('--pred_coco', type=str, default='runs/val/exp/predictions.json', help='pred coco path')
    # parser.add_argument('--pred_coco', type=str, default='/home/hjj/Desktop/github_code/mmdetection-visdrone/work_dirs/dino-4scale_r50_8xb2-12e_visdrone/test/prediction.pickle', help='pred coco path')
    parser.add_argument('--iou', type=float, default=0.7, help='iou threshold')
    parser.add_argument('--conf', type=float, default=0.001, help='conf threshold')
    opt = parser.parse_known_args()[0]
    return opt


if __name__ == '__main__':
    opt = parse_opt()

    iouv = torch.linspace(0.5, 0.95, 10)  # iou vector for mAP@0.5:0.95
    niou = iouv.numel()
    stats = []

    label_coco_json_path, pred_coco_json_path = opt.label_coco, opt.pred_coco
    with open(label_coco_json_path) as f:
        label = json.load(f)

    classes = []
    for data in label['categories']:
        classes.append(data['name'])

    image_id_hw_dict = {}
    for data in label['images']:
        image_id_hw_dict[data['id']] = [data['height'], data['width']]

    label_id_dict = {}
    for data in tqdm.tqdm(label['annotations'], desc='Process label...'):
        if data['image_id'] not in label_id_dict:
            label_id_dict[data['image_id']] = []

        category_id = data['category_id']
        x_min, y_min, w, h = data['bbox'][0], data['bbox'][1], data['bbox'][2], data['bbox'][3]
        x_max, y_max = x_min + w, y_min + h
        label_id_dict[data['image_id']].append(np.array([int(category_id), x_min, y_min, x_max, y_max]))

    if pred_coco_json_path.endswith('json'):
        with open(pred_coco_json_path) as f:
            pred = json.load(f)
        pred_id_dict = {}
        for data in tqdm.tqdm(pred, desc='Process pred...'):
            if data['image_id'] not in pred_id_dict:
                pred_id_dict[data['image_id']] = []

            score = data['score']
            category_id = data['category_id']
            x_min, y_min, w, h = data['bbox'][0], data['bbox'][1], data['bbox'][2], data['bbox'][3]
            x_max, y_max = x_min + w, y_min + h

            pred_id_dict[data['image_id']].append(
                np.array([x_min, y_min, x_max, y_max, float(score), int(category_id)]))
    else:
        with open(pred_coco_json_path, 'rb') as f:
            pred = pickle.load(f)
        pred_id_dict = {}
        for data in tqdm.tqdm(pred, desc='Process pred...'):
            image_id = os.path.splitext(os.path.basename(data['img_path']))[0]
            if image_id not in pred_id_dict:
                pred_id_dict[image_id] = []

            for i in range(data['pred_instances']['labels'].size(0)):
                score = data['pred_instances']['scores'][i]
                category_id = data['pred_instances']['labels'][i]
                bboxes = data['pred_instances']['bboxes'][i]

                x_min, y_min, x_max, y_max = bboxes.cpu().detach().numpy()
                # x_min, x_max = x_min / data['scale_factor'][0], x_max / data['scale_factor'][0]
                # y_min, y_max = y_min / data['scale_factor'][1], y_max / data['scale_factor'][1]

                pred_id_dict[image_id].append(np.array([x_min, y_min, x_max, y_max, float(score), int(category_id)]))

    for idx, image_id in enumerate(tqdm.tqdm(list(image_id_hw_dict.keys()), desc="Cal mAP...")):
        label = np.array(label_id_dict[image_id])

        if image_id not in pred_id_dict:
            pred = np.empty((0, 6))
        else:
            pred = torch.from_numpy(np.array(pred_id_dict[image_id]))

        nl, npr = label.shape[0], pred.shape[0]
        correct = torch.zeros(npr, niou, dtype=torch.bool)
        if npr == 0:
            if nl:
                stats.append((correct, *torch.zeros((2, 0)), torch.from_numpy(label[:, 0])))
            continue

        if nl:
            correct = process_batch(pred, torch.from_numpy(label), iouv)
        stats.append((correct, pred[:, 4], pred[:, 5], torch.from_numpy(label[:, 0])))

    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]
    tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats)
    print(f'precision:{p}')
    print(f'recall:{r}')
    print(f'mAP@0.5:{ap[:, 0]}')

    table = PrettyTable()
    table.title = f"Metrice"
    table.field_names = ["Classes", 'Precision', 'Recall', 'mAP50', 'mAP50-95']
    table.add_row(['all', f'{np.mean(p):.3f}', f'{np.mean(r):.3f}', f'{np.mean(ap[:, 0]):.3f}', f'{np.mean(ap):.3f}'])
    for cls_idx, classes in enumerate(classes):
        table.add_row([classes, f'{p[cls_idx]:.3f}', f'{r[cls_idx]:.3f}', f'{ap[cls_idx, 0]:.3f}',
                       f'{ap[cls_idx, :].mean():.3f}'])
    print(table)

需要使用coco格式的标签文件和推理生成的pkl文件。

转换后结果示例：

precision:[0.79133871 0.92334536 0.94697749 0.95896188 0.93151955]
recall:[0.7721585  0.9375     0.96192053 0.81843044 0.91293532]
mAP@0.5:[0.81393383 0.96300486 0.98147308 0.91452196 0.95637408]
+-------------------------------------------------+
|                     Metrice                     |
+---------+-----------+--------+-------+----------+
| Classes | Precision | Recall | mAP50 | mAP50-95 |
+---------+-----------+--------+-------+----------+
|   all   |   0.910   | 0.881  | 0.926 |  0.553   |
|   LGB   |   0.791   | 0.772  | 0.814 |  0.462   |
|   RFB   |   0.923   | 0.938  | 0.963 |  0.545   |
|   IMM   |   0.947   | 0.962  | 0.981 |  0.592   |
|    MW   |   0.959   | 0.818  | 0.915 |  0.567   |
|   AGM   |   0.932   | 0.913  | 0.956 |  0.596   |
+---------+-----------+--------+-------+----------+

若出现评价指标为0的情况，建议检查标签文件是否有问题。

可以尝试重新转换一下标签文件。

yolo2coco

import os
import cv2
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import argparse

# visdrone2019
classes = ['LGB',
'RFB',
'IMM',
'MW',
'AGM']

parser = argparse.ArgumentParser()
parser.add_argument('--image_path', default=r'',type=str, help="path of images")
parser.add_argument('--label_path', default=r'',type=str, help="path of labels .txt")
parser.add_argument('--save_path', default='test.json', type=str, help="if not split the dataset, give a path to a json file")
arg = parser.parse_args()

def yolo2coco(arg):
    print("Loading data from ", arg.image_path, arg.label_path)

    assert os.path.exists(arg.image_path)
    assert os.path.exists(arg.label_path)
    
    originImagesDir = arg.image_path                                   
    originLabelsDir = arg.label_path
    # images dir name
    indexes = os.listdir(originImagesDir)

    dataset = {'categories': [], 'annotations': [], 'images': []}
    for i, cls in enumerate(classes, 0):
        dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'})
    
    # 标注的id
    ann_id_cnt = 0
    for k, index in enumerate(tqdm(indexes)):
        # 支持 png jpg 格式的图片.
        txtFile = f'{index[:index.rfind(".")]}.txt'
        stem = index[:index.rfind(".")]
        # 读取图像的宽和高
        try:
            im = cv2.imread(os.path.join(originImagesDir, index))
            height, width, _ = im.shape
        except Exception as e:
            print(f'{os.path.join(originImagesDir, index)} read error.\nerror:{e}')
        # 添加图像的信息
        if not os.path.exists(os.path.join(originLabelsDir, txtFile)):
            # 如没标签，跳过，只保留图片信息.
            continue
        dataset['images'].append({'file_name': index,
                            'id': stem,
                            'width': width,
                            'height': height})
        with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
            labelList = fr.readlines()
            for label in labelList:
                label = label.strip().split()
                x = float(label[1])
                y = float(label[2])
                w = float(label[3])
                h = float(label[4])

                # convert x,y,w,h to x1,y1,x2,y2
                H, W, _ = im.shape
                x1 = (x - w / 2) * W
                y1 = (y - h / 2) * H
                x2 = (x + w / 2) * W
                y2 = (y + h / 2) * H
                # 标签序号从0开始计算, coco2017数据集标号混乱，不管它了。
                cls_id = int(label[0])   
                width = max(0, x2 - x1)
                height = max(0, y2 - y1)
                dataset['annotations'].append({
                    'area': width * height,
                    'bbox': [x1, y1, width, height],
                    'category_id': cls_id,
                    'id': ann_id_cnt,
                    'image_id': stem,
                    'iscrowd': 0,
                    # mask, 矩形是从左上角点按顺时针的四个顶点
                    'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
                })
                ann_id_cnt += 1

    # 保存结果
    with open(arg.save_path, 'w') as f:
        json.dump(dataset, f)
        print('Save annotation to {}'.format(arg.save_path))

if __name__ == "__main__":
    yolo2coco(arg)

查看全文

http://www.kler.cn/news/336073.html