机器视觉 AI 数据集制作
工业中,机器视觉物体分拣时,需要制作,数据集,那么,一般情况下,可以选择几个物体的几张图片,或者视频,将待识别的物体的掩模扣取出来,随机的贴在 传送带背景中,并批量自动的写成 VOC 数据集
使用图像处理的技术手段,将上述的目标的掩模扣取出来,或者使用 ps 的技术扣取掩模均可。
# -*- coding : utf-8 -*-
# @Data : 2019-08-16
# @Author : xm
# @Email :
# @File : image_process.py
# Desctiption: 求取图像中物体的边界矩形
import numpy as np
import cv2
import os
def calculatBoundImage(src_Image):
"""
求取图像中物体的边界矩形框
:param src_Image: 输出的源图像
:return: 返回图像中的物体边界矩形
"""
tmp_image = src_Image.copy()
#print(tmp_image)
if (len(tmp_image.shape) == 3):
tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_BGR2GRAY)
# 自适应阈值进行二值化
thresh_image = cv2.adaptiveThreshold(tmp_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 71, 10)
thresh_image = cv2.morphologyEx(thresh_image, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT, (25, 25)))
# 寻找最外层轮廓
contours_ls, hierarchy = cv2.findContours(thresh_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
pnt_cnt_ls = np.array([tmp_contour.shape[0] for tmp_contour in contours_ls])
contour_image = src_Image.copy()
contours_idx = np.argmax(pnt_cnt_ls)
contour_image = cv2.drawContours(contour_image, contours_ls, contours_idx, (0, 0, 255))
longest_contour = contours_ls[contours_idx]
countour_image_gray = np.zeros(src_Image.shape, dtype=np.uint8)
countour_image_gray = cv2.drawContours(countour_image_gray, contours_ls, contours_idx, (1, 1, 1), cv2.FILLED)
obj_image = src_Image * countour_image_gray
bound_box = cv2.boundingRect(longest_contour)
return bound_box, contour_image, obj_image
def rotateImage(src_Image, angle_deg, rotate_center=None):
"""
对目标图像进行旋转
:param src_Image: 输入的源图像
:param angle_deg: 旋转的角度
:param rotate_center: 旋转的中心
:return: 旋转后的图片
"""
(h, w) = src_Image.shape[:2]
if rotate_center is None:
rotate_center = ((w -1) / 2, (h - 1) / 2)
rot_mat = cv2.getRotationMatrix2D(rotate_center, angle_deg, 1.0)
rot_iamge = cv2.warpAffine(src_Image, rot_mat, (w, h))
return rot_iamge
def VideotoImage(video_file, folder_path):
"""
数据的视频保存为提取之后的物体图
:param video_file: 视频文件
:param folder_path: 保存图片的路径
:return: 保存的图片
"""
video_cap = cv2.VideoCapture(video_file)
image_idx = 2000
while True:
ret, frame = video_cap.read()
if (frame is None):
continue
bound_box, contour_image, obj_image = calculatBoundImage(frame)
bound_thres = 4500
if (bound_box[2] > bound_thres or bound_box[3] > bound_thres):
continue
contour_image = cv2.rectangle(contour_image, (bound_box[0], bound_box[1]),(bound_box[0] + bound_box[2],bound_box[1] + bound_box[3]), (225, 0, 0), thickness=2)
#cv2.imshow('frame', contour_image)
image_name = str(image_idx).zfill(6) + '.jpg'
image_idx += 1
if image_idx % 2 == 0:
cv2.imwrite(folder_path + image_name, obj_image)
cv2.waitKey(25)
if 0xFF & cv2.waitKey(5) == 27:
break
video_cap.release()
def BatchImageProcess(image_path, folder_path):
"""
批量图片物体提取,背景为黑色
:param Image_path: 图片的路径
:param folder_path: 图像处理之后的保存路径
:return: 保存的图片
"""
image_file_list = os.listdir(image_path)
# 获取物体图像的文件名
image_idx = 0
for image_name in range(len(image_file_list)):
obj_image_path = image_path + image_file_list[image_idx]
src_Image = cv2.imread(obj_image_path)
bound_box, contour_image, obj_image = calculatBoundImage(src_Image)
bound_thres = 4500
if (bound_box[2] > bound_thres or bound_box[3] > bound_thres):
continue
contour_image = cv2.rectangle(contour_image, (bound_box[0], bound_box[1]), (bound_box[0] + bound_box[2], bound_box[1] + bound_box[3]), (225, 0, 0), thickness=2)
#cv2.imshow('frame', contour_image)
image_name = str(image_idx).zfill(6) + '.jpg'
cv2.imwrite(folder_path + image_name, obj_image)
image_idx += 1
def main():
image_path = "/home/xm/workspace/ImageProcess/tmp/circle/"
folder_path = "/home/xm/workspace/ImageProcess/tmp/"
BatchImageProcess(image_path, folder_path)
# def main():
# src_Image = cv2.imread("./Images/00001.png")
# bound_box, contour_image, obj_image = calculatBoundImage(src_Image)
# print("bound_box", bound_box)
#
# cv2.namedWindow("input image", cv2.WINDOW_AUTOSIZE)
# cv2.imshow("input image", contour_image)
#
#
# # 一般源图像进行旋转再提取轮廓
# rot_image = rotateImage(src_Image, 20, rotate_center=None)
# cv2.imshow("obj image", obj_image)
# cv2.imshow("rot image", rot_image)
# cv2.waitKey(0)
#
# # vide_file = "./Images/blue_1_82.mp4"
# # folder_path = "./results/"
# #
# # VideotoImage(vide_file, folder_path)
if __name__ == "__main__":
main()
# -*- coding : utf-8 -*-
# @Data : 2019-08-17
# @Author : xm
# @Email :
# @File : ImageDataSetGeneration.py
# Desctiption: 生成物体分类的图像数据集
import numpy as np
import cv2
import os
from lxml import etree, objectify
def rotateImage(src_image, rotate_deg):
"""
对图像进行旋转
:param src_image: 输入源图像
:param rotate_dog: 旋转角度
:return: 旋转后的图像
"""
img_h, img_w = src_image.shape[0:2]
rotate_mat = cv2.getRotationMatrix2D((img_w / 2.0, img_h / 2.0), rotate_deg, 1.0)
dst_image = cv2.warpAffine(src_image, rotate_mat, (img_w, img_h))
return dst_image
def calculateBoundImage(src_image):
"""
求图像中物体的边界矩形
:param src_image: 源图像
:return: 图像中物体的边界矩形、轮廓图、目标图像
"""
tmp_image = src_image.copy()
if len(tmp_image.shape) == 3:
tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_BGR2GRAY)
ret, thresh_images = cv2.threshold(tmp_image, 0, 255,cv2.THRESH_BINARY)
contours_ls, _ = cv2.findContours(thresh_images, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
all_points = np.concatenate(contours_ls, axis=0)
bound_box = cv2.boundingRect(all_points)
return bound_box
def randomMoveObjectInImage(src_image, src_bound_box):
"""
将物体在图像中随机摆放
:param src_image: 背景图 COCO/VOC
:param src_bound_box: 原始边界框
:return: 相机旋转后的边界框
"""
x, y, w, h = src_bound_box
img_h, img_w = src_image.shape[0:2]
img_h -= h
img_w -= w
random_array = np.random.uniform(0.0, 1.0, 2)
bbox_x = np.floor(img_w * random_array[0])
bbox_y = np.floor(img_h * random_array[1])
return np.array([bbox_x, bbox_y, w, h])
def calculateIOU(bound_box_1, bound_box_2):
"""
计算两个 bound_box 之间的 IOU
:param bound_box_1: 边界框 1, shape [x, y, w, h]
:param bound_box_2: 边界框 2,shape [x, y, w, h]
:return: 两个 bound box 之间的 IOU 值
"""
min_xy = np.maximum(bound_box_1[0:2], bound_box_2[0:2])
max_xy = np.minimum(bound_box_1[0:2] + bound_box_2[2:4],
bound_box_2[0:2] + bound_box_2[2:4])
delta_xy = max_xy - min_xy
intersection_area = delta_xy[0] * delta_xy[1]
if (intersection_area < 0):
return
box_area_1 = bound_box_1[2] * bound_box_1[3]
box_area_2 = bound_box_2[2] * bound_box_2[3]
union_area = box_area_1 + box_area_2 - intersection_area
return intersection_area / union_area
def resizeObjectImage(src_image, max_min_box_size):
"""
对物体图像进行随机缩放
:param src_image: 原始图像
:param max_min_box_size: 缩放后图像中的物体的 bound box 的最大边的范围
:return: 缩放后的图像
"""
src_bbox = calculateBoundImage(src_image)
src_bbox_max = np.max(src_bbox[2:4])
cur_bbox_max = np.random.uniform(max_min_box_size[1], max_min_box_size[0], 1)[-1]
cur_ratio = cur_bbox_max / src_bbox_max
src_h, src_w = src_image.shape[0:2]
dst_h, dst_w = np.floor(src_h * cur_ratio), np.floor(src_w * cur_ratio)
dst_image = cv2.resize(src_image, (np.int(dst_w), np.int(dst_h)))
return dst_image
def addObjectToImage(backgroup_image, obj_image, bound_box):
"""
将目标物体添加到背景图中
:param backgroup_image: 背景图
:param obj_image: 目标物体图
:param bound_box: 边界矩形框
:return: 添加了目标物体的背景图
"""
tmp_image = obj_image.copy()
if len(tmp_image.shape) == 3:
tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_BGR2GRAY)
mask = tmp_image > 5
min_x, min_y, max_x, max_y = bound_box[0], bound_box[1], bound_box[0] + bound_box[2], bound_box[1] + bound_box[3]
backgroup_image[np.int(min_y):np.int(max_y), np.int(min_x):np.int(max_x)][mask] = obj_image[mask]
return backgroup_image
def formImageAndlabel(background_image, obj_ls, max_min_size_ration, iou_thres):
"""
形成训练图像,并生成对应的 label 列表
:param background_image: 输入背景图
:param obj_ls: 目标 list
:param max_min_size_ration: 最大最小旋转角度
:param iou_thres: IOU 阈值
:return: 返训练的图像,对应的 label
"""
max_ratio, min_ratio = max_min_size_ration
image_size = np.min(background_image.shape[0:2])
dst_image = background_image.copy()
max_min_box_size = [np.floor(max_ratio * image_size), np.floor(min_ratio * image_size)]
label_ls = []
for obj_image, obj_name in obj_ls:
# 对图像进行随机缩放
resize_obj_image = resizeObjectImage(obj_image, max_min_box_size)
# 对图像进行随机旋转
rotate_image = rotateImage(resize_obj_image, np.random.uniform(0, 360, 1)[-1])
# 多次迭代, 直到将图像平移到适当位置为止
src_bbox = calculateBoundImage(rotate_image)
sub_obj_image = rotate_image[src_bbox[1]:src_bbox[1] + src_bbox[3], src_bbox[0]:src_bbox[0] + src_bbox[2]]
iter_cnt = 100
if len(label_ls) == 0:
iter_cnt = 1
for iter_idx in range(iter_cnt):
dst_bbox = randomMoveObjectInImage(dst_image, src_bbox)
if len(label_ls) != 0:
is_fit = True
for tmp_box, tmp_obj_name in label_ls:
#print("....", tmp_box)
#print("+++++", dst_bbox)
IOU = calculateIOU(tmp_box, dst_bbox)
if (IOU is not None) and (IOU > iou_thres):
is_fit = False
break
if is_fit == False:
continue
else:
break
dst_image = addObjectToImage(dst_image, sub_obj_image, dst_bbox)
label_ls.append([dst_bbox, obj_name])
return dst_image, label_ls
def formImageLableXML(src_image, image_file_name, label_info, label_path):
"""
生成图片的 label XML
:param src_image: 原始图像
:param image_file_name: 图像的文件名
:param label_infor: 标签信息
:param label_path: 标签的路径
:return: XML
"""
ele = objectify.ElementMaker(annotate=False)
anno_tree = ele.annotation(
ele.folder('VOC2019_xm'),
ele.filename(image_file_name),
ele.source(
ele.database('The VOC2019 Database'),
ele.annotation('PASCAL VOC2019'),
ele.image('flickr'),
ele.flickrid('264265361')
),
ele.owner(
ele.flickrid('xm'),
ele.name('xm')
),
ele.size(
ele.width(str(src_image.shape[0])),
ele.height(str(src_image.shape[1])),
ele.depth(str(src_image.shape[2]))
),
ele.segmented('0')
)
for cur_box, cur_obj_name in label_info:
cur_ele = objectify.ElementMaker(annotate=False)
cur_tree = cur_ele.object(
ele.name(cur_obj_name),
ele.pose('Frontal'),
ele.truncated('0'),
ele.difficult('0'),
ele.bndbox(
ele.xmin(str(cur_box[0])),
ele.ymin(str(cur_box[1])),
ele.xmax(str(cur_box[0] + cur_box[2])),
ele.ymax(str(cur_box[1] + cur_box[3]))
)
)
anno_tree.append(cur_tree)
etree.ElementTree(anno_tree).write(label_path, pretty_print=True)
def main():
obj_name_ls = ['circle', 'square']
# 各种物体对应的图像的路径
base_obj_file_name = '/home/xm/workspace/ImageProcess/DataSet/'
obj_file_name = [base_obj_file_name + cur_obj for cur_obj in obj_name_ls]
print(obj_file_name)
# 每个种类的样本数量
obj_count = 600
# 图像中物体最大的数量
image_max_obj_cnt = 2
# 图像中物体的 bound box 的最大尺寸点,整个图像最小尺寸比例,
max_size_radio = 0.45
min_size_radio = 0.20
# 图像的总数
image_count = len(obj_name_ls) * 600
# 数据集的保存路径
dataset_basic_path = '/home/xm/workspace/ImageProcess/COCO/VOCdevkit/VOC2019/'
image_folder = dataset_basic_path + 'JPEGImages/'
#print(image_folder)
label_folder = dataset_basic_path + 'Annotations/'
#print(label_folder)
image_set_folder = dataset_basic_path + 'ImageSets/Main/'
#print(image_set_folder)
for data_idx in range(image_count):
# 获取 VOC 数据集中图像文件夹中所有文件的名称
voc_folder_dir = '/home/xm/workspace/ImageProcess/VOC'
voc_image_file_list = os.listdir(voc_folder_dir)
#获取物体图像的文件名列表
obj_image_ls_ls = []
for obj_image_dir in obj_name_ls:
cur_image_dir = base_obj_file_name + obj_image_dir
obj_image_ls_ls.append(os.listdir(cur_image_dir))
# 随机取一张 VOC 图做背景
background_image_file = voc_image_file_list[np.random.randint(0, len(voc_image_file_list), 1)[-1]]
background_image_file = voc_folder_dir + '/' + background_image_file
background_image = cv2.imread(background_image_file)
# 随机取若干物体
obj_image_name_ls = []
obj_cnt = np.random.randint(1, image_max_obj_cnt, 1)[-1]
for obj_idx in range(obj_cnt):
cur_obj_idx = np.random.randint(0, len(obj_image_ls_ls), 1)[-1]
cur_obj_image_ls = obj_image_ls_ls[cur_obj_idx]
cur_obj_file = cur_obj_image_ls[np.random.randint(0, len(cur_obj_image_ls), 1)[-1]]
cur_obj_image = cv2.imread(base_obj_file_name + obj_name_ls[cur_obj_idx] + '/' + cur_obj_file)
obj_image_name_ls.append([cur_obj_image, obj_name_ls[cur_obj_idx]])
# 随机生成图像
get_image, label_ls = formImageAndlabel(background_image, obj_image_name_ls, [max_size_radio, min_size_radio], iou_thres=0.05)
#
# # 保存图像与标签
cur_image_name = str(data_idx).zfill(6) + '.jpg'
#print(cur_image_name)
cur_label_name = str(data_idx).zfill(6) + '.xml'
#print(cur_label_name)
cv2.imwrite(image_folder + cur_image_name, get_image)
formImageLableXML(get_image, cur_image_name, label_ls, label_folder + cur_label_name)
for obj_bbox, obj_name in label_ls:
pnt_1 = tuple(map(int, obj_bbox[0:2]))
pnt_2 = tuple(map(int, obj_bbox[0:2]))
cv2.rectangle(get_image, pnt_1, pnt_2, (0, 0, 255))
print(cur_image_name)
cv2.imshow("get image", get_image)
cv2.waitKey(10)
train_set_name = 'train.txt'
train_val_name = 'val.txt'
test_set_name = 'test.txt'
idx_thre = np.floor(0.6 * image_count)
idx_thre_ = np.floor(0.8 * image_count)
train_file = open(image_set_folder + train_set_name, 'w')
for line_idx in range(int(idx_thre)):
line_str = str(line_idx).zfill(6) + '\n'
train_file.write(line_str)
train_file.close()
train_val_file = open(image_set_folder + train_val_name, 'w')
for line_idx in range(int(idx_thre), int(idx_thre_)):
line_str = str(line_idx).zfill(6) + '\n'
train_val_file.write(line_str)
train_val_file.close()
test_file = open(image_set_folder + test_set_name, 'w')
for line_idx in range(int(idx_thre_), image_count):
line_str = str(line_idx).zfill(6) + '\n'
test_file.write(line_str)
test_file.close()
if __name__ == '__main__':
main()