华为开源自研AI框架昇思MindSpore应用案例:ICNet用于实时的语义分割
ICNet用于实时的语义分割
ICNet 被广泛应用于实时的语义分割领域。它在处理图像数据时,能够以较高的效率进行语义分割操作,为相关领域的研究和实际应用提供了有力的支持。ICNet 的实时性使其在众多场景中都具有很大的优势,例如在视频处理、自动驾驶等对实时性要求较高的领域,ICNet 能够快速准确地对图像进行语义分割,为后续的决策和处理提供关键信息。
如果你对MindSpore感兴趣,可以关注昇思MindSpore社区
一、环境准备
1.进入ModelArts官网
云平台帮助用户快速创建和部署模型,管理全周期AI工作流,选择下面的云平台以开始使用昇思MindSpore,获取安装命令,安装MindSpore2.0.0-alpha版本,可以在昇思教程中进入ModelArts官网
选择下方CodeLab立即体验
等待环境搭建完成
2.使用CodeLab体验Notebook实例
选择ModelArts Upload Files上传Git文件,地址为GitHub - yfjcode/ICNet: mindspore icnet model
选择Kernel环境
切换至GPU环境,切换成第一个限时免费
进入昇思MindSpore官网,点击上方的安装
获取安装命令
回到Notebook中,在第一块代码前加入命令
conda update -n base -c defaults conda
安装MindSpore 2.0 GPU版本
conda install mindspore=2.0.0a0 -c mindspore -c conda-forge
安装mindvision
pip install mindvision
安装下载download
pip install download
二、应用体验
1.模型准备
根据原作者提示
环境准备与数据读取 本案例基于MindSpore-CPU版本实现,在CPU上完成模型训练。
案例实现所使用的数据:Cityscape Dataset Website
为了下载数据集,我们首先需要在Cityscapes数据集官网进行注册,并且最好使用edu教育邮箱进行注册,此后等待几天,就可以下载数据集了,这里我们下载了两个文件:gtFine_trainvaltest.zip和leftImg8bit_trainvaltest.zip (11GB)。
下载完成后,我们对数据集压缩文件进行解压,文件的目录结构如下所示。
由于我们是在CPU上跑得,原本数据集有1个多G,全部拿来跑得话,很容易掉卡,故我们就选择一个城市的一些图片完成。
首先要处理数据,生成对应的.mindrecord 和 .mindrecord.db文件
需要注意的是,在生成这两个文件之前,我们要建立一个文件夹,用cityscapes_mindrecord命名,放在cityscapes文件夹的同级目录下: 而且要保持cityscapes_mindrecord文件夹里面为空
下面是构建数据集的代码:注意,要保持cityscapes_mindrecord文件夹里面为空,报错可能是文件夹已经有文件了,文件夹地址为:/home/ma-user/work/ICNet/data/cityscapes_mindrecord
需要删掉/data/cityscapes_mindrecord文件
删掉文件后,需要修改路径,删掉/home/ma-user/work/ICNet,用./替换,之后直接运行代码块即可
"""Prepare Cityscapes dataset"""
import os
import random
import argparse
import numpy as np
from PIL import Image
from PIL import ImageOps
from PIL import ImageFilter
import mindspore.dataset as de
from mindspore.mindrecord import FileWriter
import mindspore.dataset.vision as transforms
import mindspore.dataset.transforms as tc
def _get_city_pairs(folder, split='train'):
"""Return two path arrays of data set img and mask"""
def get_path_pairs(image_folder, masks_folder):
image_paths = []
masks_paths = []
for root, _, files in os.walk(image_folder):
for filename in files:
if filename.endswith('.png'):
imgpath = os.path.join(root, filename)
foldername = os.path.basename(os.path.dirname(imgpath))
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
maskpath = os.path.join(masks_folder, foldername, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
image_paths.append(imgpath)
masks_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(image_paths), image_folder))
return image_paths, masks_paths
if split in ('train', 'val'):
# "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val"
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
# "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val"
mask_folder = os.path.join(folder, 'gtFine/' + split)
# The order of img_paths and mask_paths is one-to-one correspondence
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
def _sync_transform(img, mask):
"""img and mask augmentation"""
a = random.Random()
a.seed(1234)
base_size = 1024
crop_size = 960
# random mirror
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = crop_size
# random scale (short edge)
short_size = random.randint(int(base_size * 0.5), int(base_size * 2.0))
w, h = img.size
if h > w:
ow = short_size
oh = int(1.0 * h * ow / w)
else:
oh = short_size
ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# pad crop
if short_size < crop_size:
padh = crop_size - oh if oh < crop_size else 0
padw = crop_size - ow if ow < crop_size else 0
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
# random crop crop_size
w, h = img.size
x1 = random.randint(0, w - crop_size)
y1 = random.randint(0, h - crop_size)
img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
# gaussian blur as in PSP
if random.random() < 0.5:
img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
# final transform
output = _img_mask_transform(img, mask)
return output
def _class_to_index(mask):
"""class to index"""
# Reference:
# https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py
_key = np.array([-1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1,
2, 3, 4, -1, -1, -1,
5, -1, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15,
-1, -1, 16, 17, 18])
# [-1, ..., 33]
_mapping = np.array(range(-1, len(_key) - 1)).astype('int32')
# assert the value
values = np.unique(mask)
for value in values:
assert value in _mapping
# Get the index of each pixel value in the mask corresponding to _mapping
index = np.digitize(mask.ravel(), _mapping, right=True)
# According to the above index, according to _key, get the corresponding
return _key[index].reshape(mask.shape)
def _img_transform(img):
return np.array(img)
def _mask_transform(mask):
target = _class_to_index(np.array(mask).astype('int32'))
return np.array(target).astype('int32')
def _img_mask_transform(img, mask):
"""img and mask transform"""
input_transform = tc.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225), is_hwc=False)])
img = _img_transform(img)
mask = _mask_transform(mask)
img = input_transform(img)
img = np.array(img).astype(np.float32)
mask = np.array(mask).astype(np.float32)
return (img, mask)
def data_to_mindrecord_img(prefix='cityscapes-2975.mindrecord', file_num=1,
root='./', split='train', mindrecord_dir="./"):
"""to mindrecord"""
mindrecord_path = os.path.join(mindrecord_dir, prefix)
writter = FileWriter(mindrecord_path, file_num)
img_paths, mask_paths = _get_city_pairs(root, split)
cityscapes_json = {
"images": {"type": "int32", "shape": [1024, 2048, 3]},
"mask": {"type": "int32", "shape": [1024, 2048]},
}
writter.add_schema(cityscapes_json, "cityscapes_json")
images_files_num = len(img_paths)
for index in range(images_files_num):
img = Image.open(img_paths[index]).convert('RGB')
img = np.array(img, dtype=np.int32)
mask = Image.open(mask_paths[index])
mask = np.array(mask, dtype=np.int32)
row = {"images": img, "mask": mask}
# print("images",img, "mask", mask)
# print("images_files_num,index, img_paths[index],mask_paths[index]",images_files_num,index,img_paths[index],mask_paths[index])
if (index + 1) % 10 == 0:
print("writing {}/{} into mindrecord".format(index + 1, images_files_num))
writter.write_raw_data([row])
writter.commit()
def get_Image_crop_nor(img, mask):
image = np.uint8(img)
mask = np.uint8(mask)
image = Image.fromarray(image)
mask = Image.fromarray(mask)
output = _sync_transform(image, mask)
return output
def create_icnet_dataset(mindrecord_file, batch_size=16, device_num=1, rank_id=0):
"""create dataset for training"""
a = random.Random()
a.seed(1234)
ds = de.MindDataset(mindrecord_file, columns_list=["images", "mask"],
num_shards=device_num, shard_id=rank_id, shuffle=True)
ds = ds.map(operations=get_Image_crop_nor, input_columns=["images", "mask"], output_columns=["image", "masks"])
ds = ds.batch(batch_size=batch_size, drop_remainder=False)
return ds
dataset_path="./data/cityscapes/"
mindrecord_path="./data/cityscapes_mindrecord/"
data_to_mindrecord_img(root=dataset_path, mindrecord_dir=mindrecord_path)
# if __name__ == '__main__':
# parser = argparse.ArgumentParser(description="dataset_to_mindrecord")
# parser.add_argument("--dataset_path", type=str, default="/home/ma-user/work/ICNet/data/cityscapes/", help="dataset path")
# parser.add_argument("--mindrecord_path", type=str, default="/home/ma-user/work/ICNet/data/cityscapes_mindrecord/",
# help="mindrecord_path")
# args_opt = parser.parse_args()
# data_to_mindrecord_img(root=args_opt.dataset_path, mindrecord_dir=args_opt.mindrecord_path)
可以看到已经生成的对应的数据集文件,然后我们创建稍后用到的数据
注意修改路径
prefix = 'cityscapes-2975.mindrecord'
train_mindrecord_dir="/home/ma-user/work/ICNet/data/cityscapes_mindrecord"
train_train_batch_size_percard=4
device_num=1
rank_id=0
mindrecord_dir = train_mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix)
print("mindrecord_file",mindrecord_file)
# print("cfg['train'][‘’train_batch_size_percard‘]",cfg['train']["train_batch_size_percard"])
dataset = create_icnet_dataset(mindrecord_file, batch_size=train_train_batch_size_percard,
device_num=device_num, rank_id=rank_id)
print(dataset)
2.模型构建
建立需要训练模型的一些参数:(这里只是展示,不运行,具体参数运行在后面)
1.Model
model: name: "icnet" backbone: "resnet50v1" base_size: 1024 # during augmentation, shorter size will be resized between [base_size0.5, base_size2.0] crop_size: 960 # end of augmentation, crop to training
2.Optimizer
optimizer: init_lr: 0.02 momentum: 0.9 weight_decay: 0.0001
3.Training
train: train_batch_size_percard: 4 valid_batch_size: 1 cityscapes_root: "/data/cityscapes/" epochs: 10 val_epoch: 1 # run validation every val-epoch ckpt_dir: "./ckpt/" # ckpt and training log will be saved here mindrecord_dir: '/home/ma-user/work/ICNet/data/cityscapes_mindrecord' pretrained_model_path: '/home/ma-user/work/ICNet/root/cacheckpt/resnet50-icnet-150_2.ckpt' save_checkpoint_epochs: 5 keep_checkpoint_max: 10
4.Valid
test: ckpt_path: "" # set the pretrained model path correctly
注意修改路径
train_epochs=10
train_data_size = dataset.get_dataset_size()
print("data_size", train_data_size)
epoch = train_epochs
project_path="/home/ma-user/work/ICNet/"
train_pretrained_model_path="/home/ma-user/work/ICNet/root/cacheckpt/resnet50-icnet-150_2.ckpt"
import mindspore as ms
import mindspore.nn as nn
import mindspore.ops as ops
from src.loss import ICNetLoss
from src.models.resnet50_v1 import get_resnet50v1b
__all__ = ['ICNetdc']
class ICNetdc(nn.Cell):
"""Image Cascade Network"""
def __init__(self, nclass=19, pretrained_path="", istraining=True, norm_layer=nn.SyncBatchNorm):
super(ICNetdc, self).__init__()
self.conv_sub1 = nn.SequentialCell(
_ConvBNReLU(3, 32, 3, 2, norm_layer=norm_layer),
_ConvBNReLU(32, 32, 3, 2, norm_layer=norm_layer),
_ConvBNReLU(32, 64, 3, 2, norm_layer=norm_layer)
)
self.istraining = istraining
self.ppm = PyramidPoolingModule()
self.backbone = SegBaseModel(root=pretrained_path, istraining=istraining)
self.head = _ICHead(nclass, norm_layer=norm_layer)
self.loss = ICNetLoss()
self.resize_bilinear = nn.ResizeBilinear()
self.__setattr__('exclusive', ['conv_sub1', 'head'])
def construct(self, x, y):
"""ICNet_construct"""
if x.shape[0] != 1:
x = x.squeeze()
# sub 1
x_sub1 = self.conv_sub1(x)
h, w = x.shape[2:]
# sub 2
x_sub2 = self.resize_bilinear(x, size=(h / 2, w / 2))
_, x_sub2, _, _ = self.backbone(x_sub2)
# sub 4
_, _, _, x_sub4 = self.backbone(x)
# add PyramidPoolingModule
x_sub4 = self.ppm(x_sub4)
output = self.head(x_sub1, x_sub2, x_sub4)
if self.istraining:
outputs = self.loss(output, y)
else:
outputs = output
return outputs
class PyramidPoolingModule(nn.Cell):
"""PPM"""
def __init__(self, pyramids=None):
super(PyramidPoolingModule, self).__init__()
self.avgpool = ops.ReduceMean(keep_dims=True)
self.pool2 = nn.AvgPool2d(kernel_size=15, stride=15)
self.pool3 = nn.AvgPool2d(kernel_size=10, stride=10)
self.pool6 = nn.AvgPool2d(kernel_size=5, stride=5)
self.resize_bilinear = nn.ResizeBilinear()
def construct(self, x):
"""ppm_construct"""
feat = x
height, width = x.shape[2:]
x1 = self.avgpool(x, (2, 3))
x1 = self.resize_bilinear(x1, size=(height, width), align_corners=True)
feat = feat + x1
x2 = self.pool2(x)
x2 = self.resize_bilinear(x2, size=(height, width), align_corners=True)
feat = feat + x2
x3 = self.pool3(x)
x3 = self.resize_bilinear(x3, size=(height, width), align_corners=True)
feat = feat + x3
x6 = self.pool6(x)
x6 = self.resize_bilinear(x6, size=(height, width), align_corners=True)
feat = feat + x6
return feat
class _ICHead(nn.Cell):
"""Head"""
def __init__(self, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
super(_ICHead, self).__init__()
self.cff_12 = CascadeFeatureFusion12(128, 64, 128, nclass, norm_layer, **kwargs)
self.cff_24 = CascadeFeatureFusion24(2048, 512, 128, nclass, norm_layer, **kwargs)
self.conv_cls = nn.Conv2d(128, nclass, 1, has_bias=False)
self.outputs = list()
self.resize_bilinear = nn.ResizeBilinear()
def construct(self, x_sub1, x_sub2, x_sub4):
"""Head_construct"""
outputs = self.outputs
x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2)
x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1)
h1, w1 = x_cff_12.shape[2:]
up_x2 = self.resize_bilinear(x_cff_12, size=(h1 * 2, w1 * 2),
align_corners=True)
up_x2 = self.conv_cls(up_x2)
h2, w2 = up_x2.shape[2:]
up_x8 = self.resize_bilinear(up_x2, size=(h2 * 4, w2 * 4),
align_corners=True) # scale_factor=4,
outputs.append(up_x8)
outputs.append(up_x2)
outputs.append(x_12_cls)
outputs.append(x_24_cls)
return outputs
class _ConvBNReLU(nn.Cell):
"""ConvBNRelu"""
def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=1, dilation=1,
groups=1, norm_layer=nn.SyncBatchNorm, bias=False, **kwargs):
super(_ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad_mode='pad', padding=padding,
dilation=dilation,
group=1, has_bias=False)
self.bn = norm_layer(out_channels, momentum=0.1)
self.relu = nn.ReLU()
def construct(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class CascadeFeatureFusion12(nn.Cell):
"""CFF Unit"""
def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
super(CascadeFeatureFusion12, self).__init__()
self.conv_low = nn.SequentialCell(
nn.Conv2d(low_channels, out_channels, 3, pad_mode='pad', padding=2, dilation=2, has_bias=False),
norm_layer(out_channels, momentum=0.1)
)
self.conv_high = nn.SequentialCell(
nn.Conv2d(high_channels, out_channels, kernel_size=1, has_bias=False),
norm_layer(out_channels, momentum=0.1)
)
self.conv_low_cls = nn.Conv2d(in_channels=out_channels, out_channels=nclass, kernel_size=1, has_bias=False)
self.resize_bilinear = nn.ResizeBilinear()
self.scalar_cast = ops.ScalarCast()
self.relu = ms.nn.ReLU()
def construct(self, x_low, x_high):
"""cff_construct"""
h, w = x_high.shape[2:]
x_low = self.resize_bilinear(x_low, size=(h, w), align_corners=True)
x_low = self.conv_low(x_low)
x_high = self.conv_high(x_high)
x = x_low + x_high
x = self.relu(x)
x_low_cls = self.conv_low_cls(x_low)
return x, x_low_cls
class CascadeFeatureFusion24(nn.Cell):
"""CFF Unit"""
def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
super(CascadeFeatureFusion24, self).__init__()
self.conv_low = nn.SequentialCell(
nn.Conv2d(low_channels, out_channels, 3, pad_mode='pad', padding=2, dilation=2, has_bias=False),
norm_layer(out_channels, momentum=0.1)
)
self.conv_high = nn.SequentialCell(
nn.Conv2d(high_channels, out_channels, kernel_size=1, has_bias=False),
norm_layer(out_channels, momentum=0.1)
)
self.conv_low_cls = nn.Conv2d(in_channels=out_channels, out_channels=nclass, kernel_size=1, has_bias=False)
self.resize_bilinear = nn.ResizeBilinear()
self.relu = ms.nn.ReLU()
def construct(self, x_low, x_high):
"""ccf_construct"""
h, w = x_high.shape[2:]
x_low = self.resize_bilinear(x_low, size=(h, w), align_corners=True)
x_low = self.conv_low(x_low)
x_high = self.conv_high(x_high)
x = x_low + x_high
x = self.relu(x)
x_low_cls = self.conv_low_cls(x_low)
return x, x_low_cls
class SegBaseModel(nn.Cell):
"""Base Model for Semantic Segmentation"""
def __init__(self, nclass=19, backbone='resnet50', root="", istraining=False):
super(SegBaseModel, self).__init__()
self.nclass = nclass
if backbone == 'resnet50':
self.pretrained = get_resnet50v1b(ckpt_root=root, istraining=istraining)
def construct(self, x):
"""forwarding pre-trained network"""
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
c1 = self.pretrained.layer1(x)
c2 = self.pretrained.layer2(c1)
c3 = self.pretrained.layer3(c2)
c4 = self.pretrained.layer4(c3)
return c1, c2, c3, c4
def poly_lr(base_lr, decay_steps, total_steps, end_lr=0.0001, power=0.9):
for i in range(total_steps):
step_ = min(i, decay_steps)
yield (base_lr - end_lr) * ((1.0 - step_ / decay_steps) ** power) + end_lr
optimizer_init_lr=0.02
optimizer_weight_decay = 0.0001
optimizer_momentum= 0.9
train_save_checkpoint_epochs=5
train_keep_checkpoint_max = 10
rank_id = 0
device_id = 0
device_num =1
# from src.lr_scheduler import poly_lr
import os
import sys
import logging
import argparse
# import yaml
import mindspore.nn as nn
from mindspore import Model
from mindspore import context
from mindspore import set_seed
from mindspore.context import ParallelMode
from mindspore.communication import init
from mindspore.train.callback import CheckpointConfig
from mindspore.train.callback import ModelCheckpoint
from mindspore.train.callback import LossMonitor
from mindspore.train.callback import TimeMonitor
iters_per_epoch = train_data_size
total_train_steps = iters_per_epoch * epoch
base_lr = optimizer_init_lr
iter_lr = poly_lr(base_lr, total_train_steps, total_train_steps, end_lr=0.0, power=0.9)
network = ICNetdc(pretrained_path=train_pretrained_model_path, norm_layer=nn.BatchNorm2d)
optim = nn.SGD(params=network.trainable_params(), learning_rate=iter_lr, momentum=optimizer_momentum,
weight_decay=optimizer_weight_decay)
model = Model(network, optimizer=optim, metrics=None)
config_ck_train = CheckpointConfig(save_checkpoint_steps=iters_per_epoch * train_save_checkpoint_epochs,
keep_checkpoint_max=train_keep_checkpoint_max)
ckpoint_cb_train = ModelCheckpoint(prefix='ICNet', directory=project_path + 'ckpt' + str(device_id),
config=config_ck_train)
time_cb_train = TimeMonitor(data_size=dataset.get_dataset_size())
loss_cb_train = LossMonitor()
print("train begins------------------------------")
model.train(epoch=epoch, train_dataset=dataset, callbacks=[ckpoint_cb_train, loss_cb_train, time_cb_train],
dataset_sink_mode=True)
3.模型验证
import os
import time
import sys
import argparse
import yaml
import numpy as np
from PIL import Image
import mindspore.ops as ops
from mindspore import load_param_into_net
from mindspore import load_checkpoint
from mindspore import Tensor
import mindspore.dataset.vision as vision
from src.models import ICNet
from src.metric import SegmentationMetric
from src.logger import SetupLogger
class Evaluator:
"""evaluate"""
def __init__(self):
# self.cfg = config
# get valid dataset images and targets
self.image_paths, self.mask_paths = _get_city_pairs(dataset_path, "val")
# self.image_paths,
# self.mask_paths
# create network
# self.model = ICNetdc(nclass=19, pretrained_path=train_pretrained_model_path, norm_layer=nn.BatchNorm2d,istraining=False)
self.model = ICNet(nclass=19, pretrained_path=train_pretrained_model_path, istraining=False)
# load ckpt
checkpoint_path="/home/ma-user/work/ICNet/ckpt0/ICNet-10_1.ckpt"
ckpt_file_name = checkpoint_path
param_dict = load_checkpoint(ckpt_file_name)
load_param_into_net(self.model, param_dict)
# evaluation metrics
self.metric = SegmentationMetric(19)
def eval(self):
"""evaluate"""
self.metric.reset()
model = self.model
model = model.set_train(False)
logger.info("Start validation, Total sample: {:d}".format(len(self.image_paths)))
list_time = []
for i in range(len(self.image_paths)):
image = Image.open(self.image_paths[i]).convert('RGB') # image shape: (W,H,3)
mask = Image.open(self.mask_paths[i]) # mask shape: (W,H)
image = self._img_transform(image) # image shape: (3,H,W) [0,1]
mask = self._mask_transform(mask) # mask shape: (H,w)
image = Tensor(image)
expand_dims = ops.ExpandDims()
image = expand_dims(image, 0)
start_time = time.time()
output = model(image)
end_time = time.time()
step_time = end_time - start_time
output = output.asnumpy()
mask = np.expand_dims(mask.asnumpy(), axis=0)
self.metric.update(output, mask)
list_time.append(step_time)
mIoU, pixAcc = self.metric.get()
average_time = sum(list_time) / len(list_time)
print("avgmiou", mIoU)
print("avg_pixacc", pixAcc)
print("avgtime", average_time)
def _img_transform(self, image):
"""img_transform"""
to_tensor = vision.ToTensor()
normalize = vision.Normalize([.485, .456, .406], [.229, .224, .225], is_hwc=False)
image = to_tensor(image)
image = normalize(image)
return image
def _mask_transform(self, mask):
mask = self._class_to_index(np.array(mask).astype('int32'))
return Tensor(np.array(mask).astype('int32')) # torch.LongTensor
def _class_to_index(self, mask):
"""assert the value"""
values = np.unique(mask)
self._key = np.array([-1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1,
2, 3, 4, -1, -1, -1,
5, -1, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15,
-1, -1, 16, 17, 18])
self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
for value in values:
assert value in self._mapping
# Get the index of each pixel value in the mask corresponding to _mapping
index = np.digitize(mask.ravel(), self._mapping, right=True)
# According to the above index index, according to _key, the corresponding mask image is obtained
return self._key[index].reshape(mask.shape)
def _get_city_pairs(folder, split='train'):
"""get dataset img_mask_path_pairs"""
def get_path_pairs(image_folder, mask_folder):
img_paths = []
mask_paths = []
for root, _, files in os.walk(image_folder):
for filename in files:
if filename.endswith('.png'):
imgpath = os.path.join(root, filename)
foldername = os.path.basename(os.path.dirname(imgpath))
maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
maskpath = os.path.join(mask_folder, foldername, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), image_folder))
return img_paths, mask_paths
if split in ('train', 'val', 'test'):
# "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val"
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
# "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val"
mask_folder = os.path.join(folder, 'gtFine/' + split)
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
train_ckpt_dir="./ckpt/"
model_name="icnet"
model_backbone="resnet50v1"
checkpoint_path="./ckpt0/ICNet-10_1.ckpt"
logger = SetupLogger(name="semantic_segmentation",
save_dir=train_ckpt_dir,
distributed_rank=0,
filename='{}_{}_evaluate_log.txt'.format(model_name,model_backbone))
evaluator = Evaluator()
evaluator.eval()
最后根据路径的图片获取语义分割文本