YOLOv10导入可变形卷积(DCNv4)

DCNv4是一种为广泛的视觉应用设计的高效且有效的算子,因其可变形的特性而对一些形状不规则的小目标的检测有着较好的效果。DCNv1和DCNv2导入都较为简单,直接在模块中添加即可,但DCNv4则需要编译后才能使用。下文将介绍如何在YOLOv10中导入DCNv4。

YOLOv10添加DCNv4的方法

1、下载代码:

代码地址:GitHub - OpenGVLab/DCNv4: [CVPR 2024] Deformable Convolution v4

也可以使用git下载:git clone https://github.com/OpenGVLab/DCNv4.git

下载完后将代码复制到 ultralytics\nn\modules 目录下

2、编译代码:

打开终端切换工作路径到 ultralytics/nn/modules/DCNv4/DVNv4_op

输入以下代码进行编译:python setup.py build install

请注意,在编译前确保自己的环境中已经下载了cuda和其相适配的PyCharm,否则编译会报错

3、代码整合

在 ultralytics\nn\modules\block.py 中添加下述代码

import torch.nn as nn
import torch
from ultralytics.nn.modules.DCNv4.DCNv4_op.DCNv4.modules.dcnv4 import DCNv4
from ultralytics.nn.modules.conv import Conv
 
 
class Bottleneck(nn.Module):
    """Standard bottleneck."""
 
    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
        expansion.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, k[0], 1)
        self.cv2 = DCNv4(c2)
        self.add = shortcut and c1 == c2
 
    def forward(self, x):
        """'forward()' applies the YOLO FPN to input data."""
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 
 
class C2f_DCNv4(nn.Module):
    """Faster Implementation of CSP Bottleneck with 2 convolutions."""
 
    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
        expansion.
        """
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=(3, 3), e=1.0) for _ in range(n))
 
    def forward(self, x):
        """Forward pass through C2f layer."""
        x = self.cv1(x)
        x = x.chunk(2, 1)
        y = list(x)
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))
 
    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

在上方添加C2f_DCNv4

在 ultralytics\nn\modules\__init__.py中添加C2f_DCNv4

在ultralytics\nn\task.py中导包

在task.py中添加模块C2f_DCNv4

继续在task中添加如下代码:

self.model.cuda()

继续在task.py中替换这行代码

            try:
                m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward on CPU
            except RuntimeError:
                try:
                    self.model.to(torch.device('cuda'))
                    m.stride = torch.tensor([s / x.shape[-2] for x in forward(
                        torch.zeros(1, ch, s, s).to(torch.device('cuda')))])  # forward on CUDA
                except RuntimeError as error:
                    raise error

因DCNv4目前还不适配YOLO的图片维度,需要修改一下,将 ultralytics\nn\modules\DCNv4\DCNv4_op\DCNv4\modules\dcnv4.py 中的代码转为以下代码

# --------------------------------------------------------
# Deformable Convolution v4
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import math
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.init import xavier_uniform_, constant_
from ..functions import DCNv4Function

class CenterFeatureScaleModule(nn.Module):
    def forward(self,
                query,
                center_feature_scale_proj_weight,
                center_feature_scale_proj_bias):
        center_feature_scale = F.linear(query,
                                        weight=center_feature_scale_proj_weight,
                                        bias=center_feature_scale_proj_bias).sigmoid()
        return center_feature_scale

class DCNv4(nn.Module):
    def __init__(
            self,
            channels,
            kernel_size=3,
            stride=1,
            pad=1,
            dilation=1,
            group=None,
            offset_scale=1.0,
            dw_kernel_size=None,
            center_feature_scale=False,
            remove_center=False,
            output_bias=True,
            without_pointwise=False,
            **kwargs):
        """
        DCNv4 Module
        :param channels
        :param kernel_size
        :param stride
        :param pad
        :param dilation
        :param group
        :param offset_scale
        :param act_layer
        :param norm_layer
        """
        super().__init__()
        self.channels = channels
        self.group = channels//16 if group is None else group
        self.group_channels = self.channels // self.group

        if self.channels % self.group != 0:
            raise ValueError(
                f'channels must be divisible by group, but got {self.channels} and {self.group}')
        # _d_per_group = self.channels // self.group

        # you'd better set _d_per_group to a power of 2 which is more efficient in our CUDA implementation
        # assert _d_per_group % 16 == 0

        self.offset_scale = offset_scale
        self.kernel_size = kernel_size
        self.stride = stride
        self.dilation = dilation
        self.pad = pad
        self.offset_scale = offset_scale
        self.dw_kernel_size = dw_kernel_size
        self.center_feature_scale = center_feature_scale
        self.remove_center = int(remove_center)
        self.without_pointwise = without_pointwise

        self.K =  self.group * (kernel_size * kernel_size - self.remove_center)
        if dw_kernel_size is not None:
            self.offset_mask_dw = nn.Conv2d(channels, channels, dw_kernel_size, stride=1, padding=(dw_kernel_size - 1) // 2, groups=channels)
        self.offset_mask = nn.Linear(channels, int(math.ceil((self.K * 3)/8)*8))
        if not without_pointwise:
            self.value_proj = nn.Linear(channels, channels)
            self.output_proj = nn.Linear(channels, channels, bias=output_bias)
        self._reset_parameters()

        if center_feature_scale:
            self.center_feature_scale_proj_weight = nn.Parameter(
                torch.zeros((self.group, channels), dtype=torch.float))
            self.center_feature_scale_proj_bias = nn.Parameter(
                torch.tensor(0.0, dtype=torch.float).view((1,)).repeat(self.group, ))
            self.center_feature_scale_module = CenterFeatureScaleModule()

    def _reset_parameters(self):
        constant_(self.offset_mask.weight.data, 0.)
        constant_(self.offset_mask.bias.data, 0.)
        if self.dw_kernel_size:
            xavier_uniform_(self.offset_mask_dw.weight.data)
            constant_(self.offset_mask_dw.bias.data, 0.)
        if not self.without_pointwise:
            xavier_uniform_(self.value_proj.weight.data)
            constant_(self.value_proj.bias.data, 0.)
            xavier_uniform_(self.output_proj.weight.data)
            if self.output_proj.bias is not None:
                constant_(self.output_proj.bias.data, 0.)

    def forward(self, input):
        """
        :param query                       (N, H, W, C)
        :return output                     (N, H, W, C)
        """
        b, c, h, w = input.shape
        input = input

        x = input.permute(0, 2, 3, 1).contiguous()
        if not self.without_pointwise:
            x = self.value_proj(x)
        # x = x.reshape(b, h, w, -1)
        if self.dw_kernel_size is not None:
            offset_mask_input = self.offset_mask_dw(input)
            offset_mask_input = offset_mask_input.permute(0, 2, 3, 1)#.view(b, h*w, -1)
        else:
            offset_mask_input = input.permute(0, 2, 3, 1)
        offset_mask = self.offset_mask(offset_mask_input)#.reshape(b, h, w, -1)

        x_proj = x

        x = DCNv4Function.apply(
            x, offset_mask,
            self.kernel_size, self.kernel_size,
            self.stride, self.stride,
            self.pad, self.pad,
            self.dilation, self.dilation,
            self.group, self.group_channels,
            self.offset_scale,
            256,
            self.remove_center
            )

        if self.center_feature_scale:
            center_feature_scale = self.center_feature_scale_module(
                x, self.center_feature_scale_proj_weight, self.center_feature_scale_proj_bias)
            center_feature_scale = center_feature_scale[..., None].repeat(
                1, 1, 1, 1, self.c // self.group).flatten(-2)
            x = x * (1 - center_feature_scale) + x_proj * center_feature_scale

        if not self.without_pointwise:
            x = self.output_proj(x)

        x = x.permute(0, 3, 1, 2)
        return x

至此就可以将自己网络结构中的C2f改为C2F_DCNv4了


YOLOv10导入可变形卷积(DCNv4)
http://localhost:8090//archives/yolov10dao-ru-ke-bian-xing-juan-ji-dcnv4
作者
曾晓涛
发布于
2025年04月23日
许可协议