CEFPN
一、论文核心
1.模型整体借鉴LibraRCNN,去掉F5和P5层,融合P4,P3,P2
2.采用了残差融合亚像素卷积的方法对C5和C4进行上采样到C4和C3大小,而没有采用传统的线性插值来进行上采样,同时将C4和C3进行1x1的卷积操作,并将这个结果跟亚像素卷积后的进行特征融合,就得到了F4和F3
二、网络结构
整体网络结构如下图所示:
1.Sub-pixel Skip Fusion
2.Sub-pixel Context Enhancement
代码如下:
class SCE(nn.Module):
def __init__(self, in_channels):
super(SCE, self).__init__()
# C = 2048
# ----------------------------------------------------- #
# 第一个分支 w, h, C --> w, h, C/2 --> SSF --> 2w, 2h, C
# ----------------------------------------------------- #
self.conv3x3 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=3, stride=1, padding=1)
self.pixel_shuffle = nn.PixelShuffle(upscale_factor=2)
# ----------------------------------------------------- #
# 第二个分支 w, h, C --> w/2, h/2, 2C --> SSF --> 2w, 2h, C
# ----------------------------------------------------- #
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.conv1x1_2 = nn.Conv2d(in_channels, in_channels * 2, kernel_size=1)
self.pixel_shuffle_4 = nn.PixelShuffle(upscale_factor=4)
# ----------------------------------------------------- #
# 第三个分支 w, h, C --> 1, 1, C --> broadcast
# ----------------------------------------------------- #
self.globalpool = nn.AdaptiveAvgPool2d((1, 1))
self.conv1x1_3 = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
def forward(self, x):
out_size = x.shape[-2:]
out_size = [x*2 for x in out_size]
branch1 = self.pixel_shuffle(self.conv3x3(x))
branch2 = F.interpolate(self.pixel_shuffle_4(self.conv1x1_2(self.maxpool(x))), size=out_size, mode="nearest")
branch3 = self.conv1x1_3(self.globalpool(x))
out = (branch1 + branch2 + branch3)
return out
3.Channel Attention Guided Module
代码如下:
class CAG(nn.Module):
def __init__(self, in_channels):
super(CAG, self).__init__()
self.maxpool = nn.AdaptiveMaxPool2d((1, 1))
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.relu = nn.ReLU()
self.fc1 = nn.Conv2d(in_channels, in_channels, 1)
self.fc2 = nn.Conv2d(in_channels, in_channels, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
fc1 = self.relu(self.fc1(self.avgpool(x)))
fc2 = self.relu(self.fc2(self.maxpool(x)))
out = fc1 + fc2
return self.sigmoid(out)
整体代码如下:
class FeaturePyramidNetwork(nn.Module):
"""
Module that adds a FPN from on top of a set of feature maps. This is based on
`"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
The feature maps are currently supposed to be in increasing depth
order.
The input to the model is expected to be an OrderedDict[Tensor], containing
the feature maps on top of which the FPN will be added.
Arguments:
in_channels_list (list[int]): number of channels for each feature map that
is passed to the module
out_channels (int): number of channels of the FPN representation
extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
be performed. It is expected to take the fpn features, the original
features and the names of the original features as input, and returns
a new list of feature maps and their corresponding names
"""
def __init__(self, in_channels, extra_blocks=None):
super(FeaturePyramidNetwork, self).__init__()
self.extra_blocks = extra_blocks
# 亚像素上采样,scale默认是2
self.pixel_shuffle = nn.PixelShuffle(upscale_factor=2)
# ------------------------------- #
# 定义SCE模块
# ------------------------------- #
self.SCE = SCE(in_channels=in_channels)
# ------------------------------- #
# 定义CAG模块
# ------------------------------- #
self.CAG = CAG(in_channels=in_channels // 8)
# ------------------------------- #
# 定义1x1卷积
# ------------------------------- #
# 经过SSF后的1x1卷积
self.SSF_C5 = nn.Conv2d(512, 256, 1)
self.SSF_C4 = nn.Conv2d(256, 256, 1)
# ------------------------------- #
# 定义Ci --> Fi 的1x1卷积
# ------------------------------- #
self.conv_1x1_4 = nn.Conv2d(1024, 256, 1)
self.conv_1x1_3 = nn.Conv2d(512, 256, 1)
self.conv_1x1_2 = nn.Conv2d(256, 256, 1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')
def forward(self, x):
names = list(x.keys())
x = list(x.values())
# ------------------------------- #
# 获得Ci特征层
# ------------------------------- #
C2, C3, C4, C5 = x
# ------------------------------- #
# 得到SCE模块的输出
# ------------------------------- #
SCE_out = self.SCE(C5)
# ------------------------------- #
# 得到Fi特征层
# ------------------------------- #
F4 = self.SSF_C5(self.pixel_shuffle(C5)) + self.conv_1x1_4(C4)
F3 = self.SSF_C4(self.pixel_shuffle(C4)) + self.conv_1x1_3(C3)
F2 = self.conv_1x1_2(C2)
# ------------------------------- #
# 得到Pi特征层
# ------------------------------- #
P4 = F4
P4_upsample = F.interpolate(P4, size=F3.shape[-2:], mode='nearest')
P3 = F3 + P4_upsample
P3_upsample = F.interpolate(P3, size=F2.shape[-2:], mode="nearest")
P2 = F2 + P3_upsample
# ------------------------------- #
# 得到特征图I
# ------------------------------- #
out_size = P4.shape[-2:]
SCE_out = F.interpolate(SCE_out, size=out_size, mode="nearest")
I_P4 = F.interpolate(P4, size=out_size, mode="nearest")
I_P3 = F.adaptive_max_pool2d(P3, output_size=out_size)
I_P2 = F.adaptive_max_pool2d(P2, output_size=out_size)
I = (I_P4 + I_P3 + I_P2 + SCE_out) / 4
# ------------------------------- #
# 得到特征图Ri和CA
# ------------------------------- #
outs = []
CA = self.CAG(I)
R5 = F.adaptive_max_pool2d(I, output_size=C5.shape[-2:])
R5 = R5 * CA
residual_R4 = F.adaptive_max_pool2d(I, output_size=C4.shape[-2:])
R4 = (residual_R4 + F4) * CA
residual_R3 = F.interpolate(I, size=C3.shape[-2:], mode="nearest")
R3 = (residual_R3 + F3) * CA
residual_R2 = F.interpolate(I, size=C2.shape[-2:], mode="nearest")
R2 = (residual_R2 + F2) * CA
for i in [R2, R3, R4, R5]:
outs.append(i)
# 在layer4对应的预测特征层基础上生成预测特征矩阵5
if self.extra_blocks is not None:
outs, names = self.extra_blocks(outs, x, names)
# make it back an OrderedDict
out = OrderedDict([(k, v) for k, v in zip(names, outs)])
return out
三、参考内容
CE-FPN: Enhancing Channel Information for Object Detection
GitHub - RooKichenn/CEFPN: CEFPN复现,论文未开源