当前位置：首页 > article >正文

YOLOv5改进——普通卷积和C3模块更换为GhostConvV2卷积和C3GhostV2模块

article 2024/10/10 8:04:42

一、GhostNetV2核心代码

二、修改common.py

三、修改yolo.py

三、建立yaml文件

四、训练

一、GhostNetV2核心代码

在models文件夹下新建modules文件夹，在modules文件夹下新建一个py文件。这里为GhostV2.py。复制以下代码到文件里面。

# TODO: ghostnetv2
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

def autopad(k, p=None, d=1):
    """
    Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.

    `k`: kernel, `p`: padding, `d`: dilation.
    """
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
class Conv(nn.Module):
    """Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initializes a standard convolution layer with optional batch normalization and activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Applies a fused convolution and activation function to the input tensor `x`."""
        return self.act(self.conv(x))
class Bottleneck(nn.Module):
    """A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""

    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
        """Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
        expansion.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_, c2, 3, 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        """Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is a
        tensor.
        """
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3(nn.Module):
    """Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
        convolutions, and expansion.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        """Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class DWConv(Conv):
    """Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""

    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
        """Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
        channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
        """
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)

class GhostConvV2(nn.Module):
    # Ghostv2 Convolution https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv2_pytorch
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True, mode=None):  # ch_in, ch_out, kernel, stride, groups
        super(GhostConvV2, self).__init__()
        c_ = c2 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
        self.mode = mode
        self.gate_fn = nn.Sigmoid()
        if mode in ['attn']:
             self.short_conv = nn.Sequential(
                nn.Conv2d(c1, c2, k, s, k // 2, bias=False),
                nn.BatchNorm2d(c2),
                nn.Conv2d(c2, c2, kernel_size=(1, 5), stride=1, padding=(0, 2), groups=c2, bias=False),
                nn.BatchNorm2d(c2),
                nn.Conv2d(c2, c2, kernel_size=(5, 1), stride=1, padding=(2, 0), groups=c2, bias=False),
                nn.BatchNorm2d(c2)
                 )

    def forward(self, x):
        y = self.cv1(x)
        if self.mode in ['attn']:
            res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))
            # res=self.short_conv(x)
            out = torch.cat((y, self.cv2(y)), 1)
            return out * F.interpolate(self.gate_fn(res), size=(out.shape[-2], out.shape[-1]),
                                       mode='nearest')
        return torch.cat((y, self.cv2(y)), 1)


class GhostBottleneckV2(nn.Module):
    # Ghostv2 Convolution https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv2_pytorch
    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
        super().__init__()
        c_ = c2 // 2
        self.conv = nn.Sequential(
            GhostConvV2(c1, c_, 1, 1, mode='attn'),  # pw
            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
            GhostConvV2(c_, c2, 1, 1, act=False))  # pw-linear
        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()

    def forward(self, x):
        return self.conv(x) + self.shortcut(x)


class C3GhostV2(C3):
    # C3 module with Ghostv2Bottleneck()
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
        self.m = nn.Sequential(*(GhostBottleneckV2(c_, c_) for _ in range(n)))

注意：很多改进教程都是将代码直接复制到common.py文件，如果改进机制多了容易造成混乱。建议创建一个modules文件夹，将改进机制放里面方便管理。

二、修改common.py

在common.py文件中，在前面的部分添加以下代码，导入GhostV2.py的内容：

from models.modules.Ghostv2 import *

三、修改yolo.py

在yolo.py文件中，在导入common模块的上面一行添加以下代码，导入GhostV2.py的内容：

from models.modules.Ghostv2 import *

注意：这里位置不要搞错，不然可能会找不到导入的模块。

如下图所示：

找到parse_model函数，将GhostConvV2卷积和C3GhostV2模块加入，如下图所示：

三、建立yaml文件

在models文件夹下，新建yaml文件，这里命名为yolov5-GhostNetv2.yaml。

将以下代码复制进文件。

# YOLOv5  by Ultralytics, AGPL-3.0 license

# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
  - [10, 13, 16, 30, 33, 23] # P3/8
  - [30, 61, 62, 45, 59, 119] # P4/16
  - [116, 90, 156, 198, 373, 326] # P5/32

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [
    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
    [-1, 1, GhostConvV2, [128, 3, 2]], # 1-P2/4
    [-1, 3, C3GhostV2, [128]],
    [-1, 1, GhostConvV2, [256, 3, 2]], # 3-P3/8
    [-1, 6, C3GhostV2, [256]],
    [-1, 1, GhostConvV2, [512, 3, 2]], # 5-P4/16
    [-1, 9, C3GhostV2, [512]],
    [-1, 1, GhostConvV2, [1024, 3, 2]], # 7-P5/32
    [-1, 3, C3GhostV2, [1024]],
    [-1, 1, SPPF, [1024, 5]], # 9
  ]

# YOLOv5 v6.0 head
head: [
    [-1, 1, GhostConvV2, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
    [[-1, 6], 1, Concat, [1]], # cat backbone P4
    [-1, 3, C3GhostV2, [512, False]], # 13

    [-1, 1, GhostConvV2, [256, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
    [[-1, 4], 1, Concat, [1]], # cat backbone P3
    [-1, 3, C3GhostV2, [256, False]], # 17 (P3/8-small)

    [-1, 1, GhostConvV2, [256, 3, 2]],
    [[-1, 14], 1, Concat, [1]], # cat head P4
    [-1, 3, C3GhostV2, [512, False]], # 20 (P4/16-medium)

    [-1, 1, GhostConvV2, [512, 3, 2]],
    [[-1, 10], 1, Concat, [1]], # cat head P5
    [-1, 3, C3GhostV2, [1024, False]], # 23 (P5/32-large)

    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
  ]

至此更改完成。

四、训练

由于更改了主干网络，weights权重选不选都是从头开始训练。

在data下打开coco.yaml文件，对路径进行修改。尽量用绝对路径，相对路径容易报错。

在yolov5下找到train.py文件，对里面参数的进行修改。

目前主要对--weight、--cfg、--data进行参数设置。其他默认即可。

--weight：先选用官方的yolov5.pt权重。自己训练完后可更换为自己的权重。

注意：这里由于更改了主干网络，weights权重选不选都是从头开始训练。

--cfg：选用刚刚在models下建立的yolov5-GhostNetv2.yaml。

--data：选用上面修改过路径的coco.yaml。

--batch-size：默认是16。如果出现以下问题，提示内存不足的情况，建议设置为8。（batch-size的设置一般为8的整数倍）

打开train.py。ctrl+shift+p 在弹出框窗口搜索Python:选择解释器，选择自己创建的Python虚拟环境，这里是yolo。

点击右上角运行程序。等待训练结束即可。

训练结果会保存在run文件夹下。

五、GFLOPs不显示

修改完后发现运行yolo.py时，发现GFLOPs指标无法显示。（yolo的运行和运行train一样，需修改cfg网络配置等参数）

解决方法如下：在utils文件夹下，打开torch_utils.py文件，找到model_info函数进行修改。YOLOv8 四可以在ultralytics/utils/torch utils.py中的get flops 函数中修改。

try:  # FLOPs
    p = next(model.parameters())
    stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32  # max stride
    im = torch.empty((1, p.shape[1], stride, stride), device=p.device)  # input image in BCHW format
    flops = thop.profile(deepcopy(model), inputs=(im, ), verbose=False)[0] / 1E9 * 2  # stride GFLOPs
    imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz]  # expand if int/float
    fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs'  # 640x640 GFLOPs
except Exception as e:
	# print(e)
    im = torch.rand(1, 3, 640, 640).to(p.device)
    flops, parms = thop.profile(model, inputs=(im,), verbose=False)
    # print(f'Params: {parms}, GFLOPs: {flops * 2 / 1e9}')
    # fs=''
    fs = f', {flops * 2 / 1E9:.1f} GFLOPs'

修改完后，再次运行。