当前位置：首页 > article >正文

含掩膜mask的单通道灰度图转化为COCO数据集格式标签的json文件（python）

article 2025/2/19 6:21:32

输入：单通道的灰度图，灰度图内含掩膜mask
目标：把灰度图中的语义mask转换为COCO数据集格式的json文件
输出：COCO数据集格式的json文件

期间遇到的问题：
发现有的掩膜内部存在其他类别的掩膜，即mask内部还套了mask，这种情况的mask怎么只用一个数组来表示？

以下是查找的可用代码：

from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import os
from tqdm import tqdm

def create_sub_masks(mask_image):
    width, height = mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x, y))[:3]

            # If the pixel is not black...
            if pixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks


def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        # import ipdb;ipdb.set_trace()
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        segmentation = np.array(poly.exterior.coords)
        segmentation = np.maximum(segmentation, 0).ravel().tolist()
        # import ipdb;ipdb.set_trace()
        # print(segmentation)
        #if segmentation == []:
        #    continue
        segmentations.append(segmentation)

    # Combine the polygons to calculate the bounding box and area
    multi_poly = MultiPolygon(polygons)
    if multi_poly.bounds == ():
        return "skip"
    x, y, max_x, max_y = multi_poly.bounds
    # x = max(0, x)
    # y = max(0, y)
    width = max_x - x
    height = max_y - y
    bbox = (x, y, width, height)
    area = multi_poly.area

    annotation = {
        'segmentation': segmentations,
        'iscrowd': is_crowd,
        'image_id': image_id,
        'category_id': category_id,
        'id': annotation_id,
        'bbox': bbox,
        'area': area
    }

    return annotation


def get_name(root, mode_folder=True):
    for root, dirs, file in os.walk(root):
        if mode_folder:
            return sorted(dirs)
        else:
            return sorted(file)


def get_annotation(mask_image_root):
    dataset = {"info": {"year": 2023, "version": "2023", "description": "", "url": "",
                        },
               "license": {},
               "images": [],
               "annotations": [],
               "categories": []}
    class_index = {0: "background",1:'cate1',2:'cate2'}
    for s, k in enumerate(list(class_index.keys())):
        dataset["categories"].append({"id": k, "name": class_index[k], "supercategory": "xxx"})

    is_crowd = 0

    # These ids will be automatically increased as we go
    annotation_id = 0
    image_id = 0

    # Create the annotations
    rrr = maskdir
    for i, root in tqdm(enumerate(mask_image_root)):
        print(i)
        mask_image = Image.open(rrr + root).convert('RGB')
        print(root)
        weight, height = mask_image.size
        # file_name = "rgb_" + root.split("/")[-1].split("_")[-1]
        file_name = mask_image_root[i]
        print(file_name)
        dataset["images"].append({
                                  "file_name": file_name,
                                  "id": i,
                                  "width": weight,
                                  "height": height})
        # import ipdb;ipdb.set_trace()
        sub_masks = create_sub_masks(mask_image)
        # import ipdb;ipdb.set_trace()
        for color, sub_mask in sub_masks.items():
            category_id = 1
            annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
            if annotation == "skip":
                continue
            dataset["annotations"].append(annotation)
            annotation_id += 1
        image_id += 1
    with open("trainmask.json", "w") as f:
        json.dump(dataset, f)



# rrr = "./InstanceSegmentation/"
# all_root = get_name(rrr, mode_folder=False)
# get_annotation(all_root)
if __name__=='__main__':
    maskdir = './mask/trainmask/'
    maskimglist = os.listdir(maskdir)
    get_annotation(maskimglist)