当前位置：首页 > article >正文

深度学习常见数据集处理方法

article 2025/3/1 7:00:22

1、数据集格式转换（json转txt）

import json
import os
 
'''
任务：实例分割，labelme的json文件, 转txt文件
Ultralytics YOLO format
<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>
'''
 
# 类别映射表，定义每个类别对应的ID
label_to_class_id = {
    "tree": 0
    # 根据需要添加更多类别
}
 
# json转txt
def convert_labelme_json_to_yolo(json_file, output_dir, img_width, img_height):
    with open(json_file, 'r') as f:
        labelme_data = json.load(f)
    
    # 获取文件名（不含扩展名）
    file_name = os.path.splitext(os.path.basename(json_file))[0]
    
    # 输出的txt文件路径
    txt_file_path = os.path.join(output_dir, f"{file_name}.txt")
 
    with open(txt_file_path, 'w') as txt_file:
        for shape in labelme_data['shapes']:
            label = shape['label']
            points = shape['points']
 
            # 根据类别映射表获取类别ID，如果类别不在映射表中，跳过该标签
            class_id = label_to_class_id.get(label)
            if class_id is None:
                print(f"Warning: Label '{label}' not found in class mapping. Skipping.")
                continue
 
            # 将点的坐标归一化到0-1范围
            normalized_points = [(x / img_width, y / img_height) for x, y in points]
 
            # 写入类别ID
            txt_file.write(f"{class_id}")
 
            # 写入多边形掩膜的所有归一化顶点坐标
            for point in normalized_points:
                txt_file.write(f" {point[0]:.6f} {point[1]:.6f}")
            txt_file.write("\n")
 
if __name__ == "__main__":
    json_dir = "json"  # 替换为LabelMe标注的JSON文件目录
    output_dir = "labels"  # 输出的YOLO格式txt文件目录
    img_width = 500   # 图像宽度，根据实际图片尺寸设置
    img_height = 500  # 图像高度，根据实际图片尺寸设置
 
    # 创建输出文件夹
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
 
    # 批量处理所有json文件
    for json_file in os.listdir(json_dir):
        if json_file.endswith(".json"):
            json_path = os.path.join(json_dir, json_file)
            convert_labelme_json_to_yolo(json_path, output_dir, img_width, img_height)

2、数据集扩充（带json标签）

import time
import random
import cv2
import os
import numpy as np
from skimage.util import random_noise
import base64
import json
import re
from copy import deepcopy
import argparse
 
class DataAugmentForObjectDetection():
    #代码中包含五中数据增强的手段(噪声，光线，改变像素点，平移，镜像，打开后的数据增强为True，取消为False)
    def __init__(self, change_light_rate=0.5,
                 add_noise_rate=0.2, random_point=0.5, flip_rate=0.5, shift_rate=0.5, rand_point_percent=0.03,
                 is_addNoise=True, is_changeLight=False, is_random_point=True, is_shift_pic_bboxes=True,
                 is_filp_pic_bboxes=True):
 
        self.change_light_rate = change_light_rate
        self.add_noise_rate = add_noise_rate
        self.random_point = random_point
        self.flip_rate = flip_rate
        self.shift_rate = shift_rate
        self.rand_point_percent = rand_point_percent
 
        # 是否使用某种增强方式
        self.is_addNoise = is_addNoise
        self.is_changeLight = is_changeLight
        self.is_random_point = is_random_point
        self.is_filp_pic_bboxes = is_filp_pic_bboxes
        self.is_shift_pic_bboxes = is_shift_pic_bboxes
 
    # 加噪声(随机噪声)
    def _addNoise(self, img):
        return random_noise(img, seed=int(time.time())) * 255
 
    # 调整亮度
    def _changeLight(self, img):
        alpha = random.uniform(0.35, 1)
        blank = np.zeros(img.shape, img.dtype)
        return cv2.addWeighted(img, alpha, blank, 1 - alpha, 0)
 
    # 随机的改变点的值
    def _addRandPoint(self, img):
        percent = self.rand_point_percent
        num = int(percent * img.shape[0] * img.shape[1])
        for i in range(num):
            rand_x = random.randint(0, img.shape[0] - 1)
            rand_y = random.randint(0, img.shape[1] - 1)
            if random.randint(0, 1) == 0:
                img[rand_x, rand_y] = 0
            else:
                img[rand_x, rand_y] = 255
        return img
 
    # 平移图像(注：需要到labelme工具上调整图像，部分平移的标注框可能会超出图像边界，对训练造成影响)
    def _shift_pic_bboxes(self, img, json_info):
        h, w, _ = img.shape
        x_min = w
        x_max = 0
        y_min = h
        y_max = 0
        shapes = json_info['shapes']
        for shape in shapes:
            points = np.array(shape['points'])
            x_min = min(x_min, points[:, 0].min())
            y_min = min(y_min, points[:, 1].min())
            x_max = max(x_max, points[:, 0].max())
            y_max = max(y_max, points[:, 0].max())
        d_to_left = x_min
        d_to_right = w - x_max
        d_to_top = y_min
        d_to_bottom = h - y_max
        x = random.uniform(-(d_to_left - 1) / 3, (d_to_right - 1) / 3)
        y = random.uniform(-(d_to_top - 1) / 3, (d_to_bottom - 1) / 3)
 
        M = np.float32([[1, 0, x], [0, 1, y]])
        shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
        for shape in shapes:
            for p in shape['points']:
                p[0] += x
                p[1] += y
        return shift_img, json_info
 
    # 图像镜像翻转
    def _filp_pic_bboxes(self, img, json_info):
        h, w, _ = img.shape
        sed = random.random()
        if 0 < sed < 0.33:
            flip_img = cv2.flip(img, 0)  # _flip_x
            inver = 0
        elif 0.33 < sed < 0.66:
            flip_img = cv2.flip(img, 1)  # _flip_y
            inver = 1
        else:
            flip_img = cv2.flip(img, -1)  # flip_x_y
            inver = -1
        shapes = json_info['shapes']
        for shape in shapes:
            for p in shape['points']:
                if inver == 0:
                    p[1] = h - p[1]
                elif inver == 1:
                    p[0] = w - p[0]
                elif inver == -1:
                    p[0] = w - p[0]
                    p[1] = h - p[1]
        return flip_img, json_info
 
 
    def dataAugment(self, img, dic_info):
        change_num = 0
        while change_num < 1:
            if self.is_changeLight:
                if random.random() > self.change_light_rate:
                    change_num += 1
                    img = self._changeLight(img)
 
            if self.is_addNoise:
                if random.random() < self.add_noise_rate:
                    change_num += 1
                    img = self._addNoise(img)
            if self.is_random_point:
                if random.random() < self.random_point:
                    change_num += 1
                    img = self._addRandPoint(img)
            if self.is_shift_pic_bboxes:
                if random.random() < self.shift_rate:
                    change_num += 1
                    img, dic_info = self._shift_pic_bboxes(img, dic_info)
            if self.is_filp_pic_bboxes or 1:
                if random.random() < self.flip_rate:
                    change_num += 1
                    img, bboxes = self._filp_pic_bboxes(img, dic_info)
        return img, dic_info
 
 
class ToolHelper():
    # 从json文件中提取原始标定的信息
    def parse_json(self, path):
        with open(path)as f:
            json_data = json.load(f)
        return json_data
 
    # 对图片进行字符编码
    def img2str(self, img_name):
        with open(img_name, "rb")as f:
            base64_data = str(base64.b64encode(f.read()))
        match_pattern = re.compile(r'b\'(.*)\'')
        base64_data = match_pattern.match(base64_data).group(1)
        return base64_data
 
    # 保存图片结果
    def save_img(self, save_path, img):
        cv2.imwrite(save_path, img)
 
    # 保持json结果
    def save_json(self, file_name, save_folder, dic_info):
        with open(os.path.join(save_folder, file_name), 'w') as f:
            json.dump(dic_info, f, indent=2)
 
 
if __name__ == '__main__':
    need_aug_num = 5  #每张图片需要增强的次数
    toolhelper = ToolHelper()
    is_endwidth_dot = True  #文件是否以.jpg或者png结尾
    dataAug = DataAugmentForObjectDetection()
    parser = argparse.ArgumentParser()
    parser.add_argument('--source_img_json_path', type=str, default=r'/home/leeqianxi/YOLO/datasets/data/data')#需要更改的json地址
    parser.add_argument('--save_img_json_path', type=str, default=r'/home/leeqianxi/YOLO/datasets/data/new_data')#改变后的json保存地址
    args = parser.parse_args()
    source_img_json_path = args.source_img_json_path  # 图片和json文件原始位置
    save_img_json_path = args.save_img_json_path  # 图片增强结果保存文件
 
    # 如果保存文件夹不存在就创建
    if not os.path.exists(save_img_json_path):
        os.mkdir(save_img_json_path)
 
    for parent, _, files in os.walk(source_img_json_path):
        files.sort()  # 排序一下
        for file in files:
            if file.endswith('jpg') or file.endswith('png'):
                cnt = 0
                pic_path = os.path.join(parent, file)
                json_path = os.path.join(parent, file[:-4] + '.json')
                json_dic = toolhelper.parse_json(json_path)
                # 如果图片是有后缀的
                if is_endwidth_dot:
                    # 找到文件的最后名字
                    dot_index = file.rfind('.')
                    _file_prefix = file[:dot_index]  # 文件名的前缀
                    _file_suffix = file[dot_index:]  # 文件名的后缀
                img = cv2.imread(pic_path)
 
                while cnt < need_aug_num:  # 继续增强
                    auged_img, json_info = dataAug.dataAugment(deepcopy(img), deepcopy(json_dic))
                    img_name = '{}_{}{}'.format(_file_prefix, cnt + 1, _file_suffix)  # 图片保存的信息
                    img_save_path = os.path.join(save_img_json_path, img_name)
                    toolhelper.save_img(img_save_path, auged_img)  # 保存增强图片
 
                    json_info['imagePath'] = img_name
                    base64_data = toolhelper.img2str(img_save_path)
                    json_info['imageData'] = base64_data
                    toolhelper.save_json('{}_{}.json'.format(_file_prefix, cnt + 1),
                                         save_img_json_path, json_info)  # 保存xml文件
                    print(img_name)
                    cnt += 1  # 继续增强下一张

3、数据集划分（训练集、测试集、验证集）

# 将图片和标注数据按比例切分为 训练集和测试集
import shutil
import random
import os
 
# 原始路径
image_original_path = "/home/leeqianxi/YOLO/ultralytics/pic/"
label_original_path = "/home/leeqianxi/YOLO/ultralytics/labels/"
 
cur_path = os.getcwd()
# 训练集路径
train_image_path = os.path.join(cur_path, "datasets/images/train/")
train_label_path = os.path.join(cur_path, "datasets/labels/train/")
 
# 验证集路径
val_image_path = os.path.join(cur_path, "datasets/images/val/")
val_label_path = os.path.join(cur_path, "datasets/labels/val/")
 
# 测试集路径
test_image_path = os.path.join(cur_path, "datasets/images/test/")
test_label_path = os.path.join(cur_path, "datasets/labels/test/")
 
# 训练集目录
list_train = os.path.join(cur_path, "datasets/train.txt")
list_val = os.path.join(cur_path, "datasets/val.txt")
list_test = os.path.join(cur_path, "datasets/test.txt")
 
train_percent = 0.8
val_percent = 0.2
test_percent = 0
 
 
def del_file(path):
    for i in os.listdir(path):
        file_data = path + "\\" + i
        os.remove(file_data)
 
 
def mkdir():
    if not os.path.exists(train_image_path):
        os.makedirs(train_image_path)
    else:
        del_file(train_image_path)
    if not os.path.exists(train_label_path):
        os.makedirs(train_label_path)
    else:
        del_file(train_label_path)
 
    if not os.path.exists(val_image_path):
        os.makedirs(val_image_path)
    else:
        del_file(val_image_path)
    if not os.path.exists(val_label_path):
        os.makedirs(val_label_path)
    else:
        del_file(val_label_path)
 
    if not os.path.exists(test_image_path):
        os.makedirs(test_image_path)
    else:
        del_file(test_image_path)
    if not os.path.exists(test_label_path):
        os.makedirs(test_label_path)
    else:
        del_file(test_label_path)
 
 
def clearfile():
    if os.path.exists(list_train):
        os.remove(list_train)
    if os.path.exists(list_val):
        os.remove(list_val)
    if os.path.exists(list_test):
        os.remove(list_test)
 
 
def main():
    mkdir()
    clearfile()
 
    file_train = open(list_train, 'w')
    file_val = open(list_val, 'w')
    file_test = open(list_test, 'w')
 
    total_txt = os.listdir(label_original_path)
    num_txt = len(total_txt)
    list_all_txt = range(num_txt)
 
    num_train = int(num_txt * train_percent)
    num_val = int(num_txt * val_percent)
    num_test = num_txt - num_train - num_val
 
    train = random.sample(list_all_txt, num_train)
    # train从list_all_txt取出num_train个元素
    # 所以list_all_txt列表只剩下了这些元素
    val_test = [i for i in list_all_txt if not i in train]
    # 再从val_test取出num_val个元素，val_test剩下的元素就是test
    val = random.sample(val_test, num_val)
 
    print("训练集数目：{}, 验证集数目：{}, 测试集数目：{}".format(len(train), len(val), len(val_test) - len(val)))
    for i in list_all_txt:
        name = total_txt[i][:-4]
 
        srcImage = image_original_path + name + '.png'
        srcLabel = label_original_path + name + ".txt"
 
        if i in train:
            dst_train_Image = train_image_path + name + '.png'
            dst_train_Label = train_label_path + name + '.txt'
            shutil.copyfile(srcImage, dst_train_Image)
            shutil.copyfile(srcLabel, dst_train_Label)
            file_train.write(dst_train_Image + '\n')
        elif i in val:
            dst_val_Image = val_image_path + name + '.png'
            dst_val_Label = val_label_path + name + '.txt'
            shutil.copyfile(srcImage, dst_val_Image)
            shutil.copyfile(srcLabel, dst_val_Label)
            file_val.write(dst_val_Image + '\n')
        else:
            dst_test_Image = test_image_path + name + '.jpg'
            dst_test_Label = test_label_path + name + '.txt'
            shutil.copyfile(srcImage, dst_test_Image)
            shutil.copyfile(srcLabel, dst_test_Label)
            file_test.write(dst_test_Image + '\n')
 
    file_train.close()
    file_val.close()
    file_test.close()

 
if __name__ == "__main__":
    main()

4、图像裁剪为固定大小

from PIL import Image
import os

def crop_image(image_path, output_dir, crop_c, crop_size=(500, 500)):
    # 打开原始图片
    img = Image.open(image_path)
    
    img_width, img_height = img.size
    crop_width, crop_height = crop_size
    
    # 确保输出目录存在
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 计算可以裁剪的行数和列数
    horizontal_crops = img_width // crop_width
    vertical_crops = img_height // crop_height
    
    # 裁剪并保存子图
    crop_count = crop_c
    for i in range(vertical_crops):
        for j in range(horizontal_crops):
            left = j * crop_width
            upper = i * crop_height
            right = left + crop_width
            lower = upper + crop_height
            
            # 裁剪图像
            cropped_img = img.crop((left, upper, right, lower))
            
            # 保存裁剪后的图像
            output_path = os.path.join(output_dir, f"crop_{crop_count+1}.png")
            cropped_img.save(output_path)
            crop_count += 1
            
    print(f"裁剪完成，共裁剪 {crop_count} 张图片。")


if __name__ == "__main__":

    image_path = "img.png"  # 输入图片的路径
    output_dir = 'cropped_images'  # 输出文件夹路径
    crop_c = 0
    crop_image(image_path, output_dir, crop_c)

查看全文

http://www.kler.cn/a/418865.html