ResNext-50模型进行图像识别
本文为为🔗365天深度学习训练营内部文章
原作者:K同学啊
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Input,Dense,Dropout,Conv2D,MaxPool2D,Flatten,GlobalAvgPool2D,concatenate \
,BatchNormalization,Activation,Add,ZeroPadding2D,Lambda
from keras.layers import ReLU
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.callbacks import LearningRateScheduler
from keras.models import Model
'''
分组卷积模块
'''
# 定义分组卷积
def grouped_convolution_block(init_x,strides,groups,g_channels):
group_list = []
# 分组进行卷积
for c in range(groups):
# 分组取出数据
x = Lambda(lambda x:x[:, :, :, c*g_channels:(c+1)*g_channels])(init_x)
# 分组进行卷积
x = Conv2D(filters=g_channels,kernel_size=(3,3),strides=strides,padding='same',use_bias=False)(x)
# 存入list
group_list.append(x)
# 合并list中的数据
group_merge = concatenate(group_list,axis=3)
x = BatchNormalization(epsilon=1.001e-5)(group_merge)
x = ReLU()(x)
return x
'''
定义残差单元
'''
def block(x,filters,strides=1,groups=32,conv_shortcut=True):
if conv_shortcut:
shortcut = Conv2D(filters*2,kernel_size=(1,1),strides=strides,padding='same',use_bias=False)(x)
# epsilon为BN公式中防止分母为0的值
shortcut = BatchNormalization(epsilon=1.001e-5)(shortcut)
else:
shortcut = x
# 三层卷积层
x = Conv2D(filters=filters,kernel_size=(1,1),strides=1,padding='same',use_bias=False)(x)
x = BatchNormalization(epsilon=1.001e-5)(x)
x = ReLU()(x)
# 计算每组的通道数
g_channels = int(filters / groups)
# 分组进行卷积
x = grouped_convolution_block(x,strides,groups,g_channels)
x = Conv2D(filters=filters*2,kernel_size=(1,1),strides=1,padding='same',use_bias=False)(x)
x = BatchNormalization(epsilon=1.001e-5)(x)
x = Add()([x,shortcut])
x = ReLU()(x)
return x
'''
堆叠残差单元
'''
def stack(x,filters,blocks,strides,groups=32):
# 每个stack的第一个block的残差连接都需要使用1*1卷积升维
x = block(x,filters,strides=strides,groups=groups)
for i in range(blocks):
x = block(x,filters,groups=groups,conv_shortcut=False)
return x
'''
搭建ResNext-50网络
'''
def ResNext50(input_shape,num_classes):
inputs = Input(shape=input_shape)
# 填充3圈0,[224,224,3] -> [230,230,3]
x = ZeroPadding2D((3,3))(inputs)
x = Conv2D(filters=64,kernel_size=(7,7),strides=2,padding='valid')(x)
x = BatchNormalization(epsilon=1.001e-5)(x)
x = ReLU()(x)
# 填充1圈0
x = ZeroPadding2D((1,1))(x)
x = MaxPool2D(pool_size=(3,3),strides=2,padding='valid')(x)
# 堆叠残差结构
x = stack(x,filters=128,blocks=2,strides=1)
x = stack(x,filters=256,blocks=3,strides=2)
x = stack(x,filters=512,blocks=5,strides=2)
x = stack(x,filters=1024,blocks=2,strides=2)
# 根据特征图大小进行全局平均池化
x = GlobalAvgPool2D()(x)
x = Dense(num_classes,activation='softmax')(x)
# 定义模型
model = Model(inputs=inputs,outputs=x)
return model
model = ResNext50(input_shape=(224,224,3),num_classes=4)
model.summary()
ResNeXt-50 相比于传统的深层网络(如 ResNet 和 VGG)有明显的优势,特别是在计算效率和模型性能之间找到了较好的平衡。通过引入 卡尔迪纳利性 的概念,ResNeXt-50 能够在网络深度不增加的情况下显著提升模型的能力,同时保持训练的高效性和泛化能力。它适用于各种计算机视觉任务,尤其是在需要高效和准确的图像分类任务中表现出色。