LSZTT commited on
Commit
f5e6066
β€’
1 Parent(s): 1f7dfbe

Upload 60 files

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. models/2.py +22 -0
  2. models/A2Attention.py +56 -0
  3. models/CBAM.py +76 -0
  4. models/EMA.py +31 -0
  5. models/LSKblock.py +29 -0
  6. models/S2Attention.py +67 -0
  7. models/SE.py +39 -0
  8. models/SGE.py +54 -0
  9. models/SK.py +53 -0
  10. models/ShuffleAttention.py +71 -0
  11. models/SimAM.py +29 -0
  12. models/__init__.py +0 -0
  13. models/__pycache__/A2Attention.cpython-38.pyc +0 -0
  14. models/__pycache__/CBAM.cpython-38.pyc +0 -0
  15. models/__pycache__/EMA.cpython-38.pyc +0 -0
  16. models/__pycache__/LSKblock.cpython-38.pyc +0 -0
  17. models/__pycache__/S2Attention.cpython-38.pyc +0 -0
  18. models/__pycache__/SE.cpython-38.pyc +0 -0
  19. models/__pycache__/SGE.cpython-38.pyc +0 -0
  20. models/__pycache__/SK.cpython-38.pyc +0 -0
  21. models/__pycache__/ShuffleAttention.cpython-38.pyc +0 -0
  22. models/__pycache__/SimAM.cpython-38.pyc +0 -0
  23. models/__pycache__/__init__.cpython-38.pyc +0 -0
  24. models/__pycache__/common.cpython-38.pyc +0 -0
  25. models/__pycache__/experimental.cpython-38.pyc +0 -0
  26. models/__pycache__/yolo.cpython-38.pyc +0 -0
  27. models/common.py +918 -0
  28. models/experimental.py +111 -0
  29. models/hub/anchors.yaml +59 -0
  30. models/hub/yolov3-spp.yaml +51 -0
  31. models/hub/yolov3-tiny.yaml +41 -0
  32. models/hub/yolov3.yaml +51 -0
  33. models/hub/yolov5-bifpn.yaml +48 -0
  34. models/hub/yolov5-fpn.yaml +42 -0
  35. models/hub/yolov5-p2.yaml +54 -0
  36. models/hub/yolov5-p34.yaml +41 -0
  37. models/hub/yolov5-p6.yaml +56 -0
  38. models/hub/yolov5-p7.yaml +67 -0
  39. models/hub/yolov5-panet.yaml +48 -0
  40. models/hub/yolov5l6.yaml +60 -0
  41. models/hub/yolov5m6.yaml +60 -0
  42. models/hub/yolov5n6.yaml +60 -0
  43. models/hub/yolov5s-LeakyReLU.yaml +49 -0
  44. models/hub/yolov5s-ghost.yaml +48 -0
  45. models/hub/yolov5s-transformer.yaml +48 -0
  46. models/hub/yolov5s6.yaml +60 -0
  47. models/hub/yolov5x6.yaml +60 -0
  48. models/segment/yolov5l-seg.yaml +48 -0
  49. models/segment/yolov5m-seg.yaml +48 -0
  50. models/segment/yolov5n-seg.yaml +48 -0
models/2.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+
4
+ model = torch.hub.load('', 'custom', path="best.pt", source='local',force_reload=True)
5
+
6
+
7
+ title = " η„ŠηΌηΌΊι™·ζ£€ζ΅‹ "
8
+
9
+ desc = ""
10
+
11
+ base_conf, base_iou = 0.30, 0.45
12
+
13
+ def det_image(img, conf_thres, iou_thres):
14
+ model.conf = conf_thres
15
+ model.iou = iou_thres
16
+ return model(img).render()[0]
17
+
18
+ gr.Interface(inputs=["image", gr.Slider(minimum=0,maximum=1, value=base_conf), gr.Slider(minimum=0,maximum=1, value=base_iou)],
19
+ outputs=["image"],
20
+ fn=det_image,
21
+ title=title,
22
+ description=desc).launch()
models/A2Attention.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+ from torch.nn import functional as F
6
+
7
+
8
+
9
+ class DoubleAttention(nn.Module):
10
+
11
+ def __init__(self, in_channels,c_m=128,c_n=128,reconstruct = True):
12
+ super().__init__()
13
+ self.in_channels=in_channels
14
+ self.reconstruct = reconstruct
15
+ self.c_m=c_m
16
+ self.c_n=c_n
17
+ self.convA=nn.Conv2d(in_channels,c_m,1)
18
+ self.convB=nn.Conv2d(in_channels,c_n,1)
19
+ self.convV=nn.Conv2d(in_channels,c_n,1)
20
+ if self.reconstruct:
21
+ self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size = 1)
22
+ self.init_weights()
23
+
24
+
25
+ def init_weights(self):
26
+ for m in self.modules():
27
+ if isinstance(m, nn.Conv2d):
28
+ init.kaiming_normal_(m.weight, mode='fan_out')
29
+ if m.bias is not None:
30
+ init.constant_(m.bias, 0)
31
+ elif isinstance(m, nn.BatchNorm2d):
32
+ init.constant_(m.weight, 1)
33
+ init.constant_(m.bias, 0)
34
+ elif isinstance(m, nn.Linear):
35
+ init.normal_(m.weight, std=0.001)
36
+ if m.bias is not None:
37
+ init.constant_(m.bias, 0)
38
+
39
+ def forward(self, x):
40
+ b, c, h,w=x.shape
41
+ assert c==self.in_channels
42
+ A=self.convA(x) #b,c_m,h,w
43
+ B=self.convB(x) #b,c_n,h,w
44
+ V=self.convV(x) #b,c_n,h,w
45
+ tmpA=A.view(b,self.c_m,-1)
46
+ attention_maps=F.softmax(B.view(b,self.c_n,-1))
47
+ attention_vectors=F.softmax(V.view(b,self.c_n,-1))
48
+ # step 1: feature gating
49
+ global_descriptors=torch.bmm(tmpA,attention_maps.permute(0,2,1)) #b.c_m,c_n
50
+ # step 2: feature distribution
51
+ tmpZ = global_descriptors.matmul(attention_vectors) #b,c_m,h*w
52
+ tmpZ=tmpZ.view(b,self.c_m,h,w) #b,c_m,h,w
53
+ if self.reconstruct:
54
+ tmpZ=self.conv_reconstruct(tmpZ)
55
+
56
+ return tmpZ
models/CBAM.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+
6
+
7
+ class ChannelAttention(nn.Module):
8
+ def __init__(self, channel, reduction=16):
9
+ super().__init__()
10
+ self.maxpool = nn.AdaptiveMaxPool2d(1)
11
+ self.avgpool = nn.AdaptiveAvgPool2d(1)
12
+ self.se = nn.Sequential(
13
+ nn.Conv2d(channel, channel // reduction, 1, bias=False),
14
+ nn.ReLU(),
15
+ nn.Conv2d(channel // reduction, channel, 1, bias=False)
16
+ )
17
+ self.sigmoid = nn.Sigmoid()
18
+
19
+ def forward(self, x):
20
+ max_result = self.maxpool(x)
21
+ avg_result = self.avgpool(x)
22
+ max_out = self.se(max_result)
23
+ avg_out = self.se(avg_result)
24
+ output = self.sigmoid(max_out + avg_out)
25
+ return output
26
+
27
+
28
+ class SpatialAttention(nn.Module):
29
+ def __init__(self, kernel_size=7):
30
+ super().__init__()
31
+ self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=kernel_size // 2)
32
+ self.sigmoid = nn.Sigmoid()
33
+
34
+ def forward(self, x):
35
+ max_result, _ = torch.max(x, dim=1, keepdim=True)
36
+ avg_result = torch.mean(x, dim=1, keepdim=True)
37
+ result = torch.cat([max_result, avg_result], 1)
38
+ output = self.conv(result)
39
+ output = self.sigmoid(output)
40
+ return output
41
+
42
+
43
+ class CBAMBlock(nn.Module):
44
+
45
+ def __init__(self, channel=512, reduction=16, kernel_size=7):
46
+ super().__init__()
47
+ self.ca = ChannelAttention(channel=channel, reduction=reduction)
48
+ self.sa = SpatialAttention(kernel_size=kernel_size)
49
+
50
+ def init_weights(self):
51
+ for m in self.modules():
52
+ if isinstance(m, nn.Conv2d):
53
+ init.kaiming_normal_(m.weight, mode='fan_out')
54
+ if m.bias is not None:
55
+ init.constant_(m.bias, 0)
56
+ elif isinstance(m, nn.BatchNorm2d):
57
+ init.constant_(m.weight, 1)
58
+ init.constant_(m.bias, 0)
59
+ elif isinstance(m, nn.Linear):
60
+ init.normal_(m.weight, std=0.001)
61
+ if m.bias is not None:
62
+ init.constant_(m.bias, 0)
63
+
64
+ def forward(self, x):
65
+ b, c, _, _ = x.size()
66
+ out = x * self.ca(x)
67
+ out = out * self.sa(out)
68
+ return out
69
+
70
+
71
+ if __name__ == '__main__':
72
+ input = torch.randn(50, 512, 7, 7)
73
+ kernel_size = input.shape[2]
74
+ cbam = CBAMBlock(channel=512, reduction=16, kernel_size=kernel_size)
75
+ output = cbam(input)
76
+ print(output.shape)
models/EMA.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+ class EMA(nn.Module):
5
+ def __init__(self, channels, factor=8):
6
+ super(EMA, self).__init__()
7
+ self.groups = factor
8
+ assert channels // self.groups > 0
9
+ self.softmax = nn.Softmax(-1)
10
+ self.agp = nn.AdaptiveAvgPool2d((1, 1))
11
+ self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
12
+ self.pool_w = nn.AdaptiveAvgPool2d((1, None))
13
+ self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups)
14
+ self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0)
15
+ self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1)
16
+
17
+ def forward(self, x):
18
+ b, c, h, w = x.size()
19
+ group_x = x.reshape(b * self.groups, -1, h, w) # b*g,c//g,h,w
20
+ x_h = self.pool_h(group_x)
21
+ x_w = self.pool_w(group_x).permute(0, 1, 3, 2)
22
+ hw = self.conv1x1(torch.cat([x_h, x_w], dim=2))
23
+ x_h, x_w = torch.split(hw, [h, w], dim=2)
24
+ x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid())
25
+ x2 = self.conv3x3(group_x)
26
+ x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
27
+ x12 = x2.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw
28
+ x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
29
+ x22 = x1.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw
30
+ weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w)
31
+ return (group_x * weights.sigmoid()).reshape(b, c, h, w)
models/LSKblock.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class LSKblock(nn.Module):
6
+ def __init__(self, dim):
7
+ super().__init__()
8
+ self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
9
+ self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
10
+ self.conv1 = nn.Conv2d(dim, dim // 2, 1)
11
+ self.conv2 = nn.Conv2d(dim, dim // 2, 1)
12
+ self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
13
+ self.conv = nn.Conv2d(dim // 2, dim, 1)
14
+
15
+ def forward(self, x):
16
+ attn1 = self.conv0(x)
17
+ attn2 = self.conv_spatial(attn1)
18
+
19
+ attn1 = self.conv1(attn1)
20
+ attn2 = self.conv2(attn2)
21
+
22
+ attn = torch.cat([attn1, attn2], dim=1)
23
+ avg_attn = torch.mean(attn, dim=1, keepdim=True)
24
+ max_attn, _ = torch.max(attn, dim=1, keepdim=True)
25
+ agg = torch.cat([avg_attn, max_attn], dim=1)
26
+ sig = self.conv_squeeze(agg).sigmoid()
27
+ attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + attn2 * sig[:, 1, :, :].unsqueeze(1)
28
+ attn = self.conv(attn)
29
+ return x * attn
models/S2Attention.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+
6
+
7
+ def spatial_shift1(x):
8
+ b, w, h, c = x.size()
9
+ x[:, 1:, :, :c // 4] = x[:, :w - 1, :, :c // 4]
10
+ x[:, :w - 1, :, c // 4:c // 2] = x[:, 1:, :, c // 4:c // 2]
11
+ x[:, :, 1:, c // 2:c * 3 // 4] = x[:, :, :h - 1, c // 2:c * 3 // 4]
12
+ x[:, :, :h - 1, 3 * c // 4:] = x[:, :, 1:, 3 * c // 4:]
13
+ return x
14
+
15
+
16
+ def spatial_shift2(x):
17
+ b, w, h, c = x.size()
18
+ x[:, :, 1:, :c // 4] = x[:, :, :h - 1, :c // 4]
19
+ x[:, :, :h - 1, c // 4:c // 2] = x[:, :, 1:, c // 4:c // 2]
20
+ x[:, 1:, :, c // 2:c * 3 // 4] = x[:, :w - 1, :, c // 2:c * 3 // 4]
21
+ x[:, :w - 1, :, 3 * c // 4:] = x[:, 1:, :, 3 * c // 4:]
22
+ return x
23
+
24
+
25
+ class SplitAttention(nn.Module):
26
+ def __init__(self, channel=512, k=3):
27
+ super().__init__()
28
+ self.channel = channel
29
+ self.k = k
30
+ self.mlp1 = nn.Linear(channel, channel, bias=False)
31
+ self.gelu = nn.GELU()
32
+ self.mlp2 = nn.Linear(channel, channel * k, bias=False)
33
+ self.softmax = nn.Softmax(1)
34
+
35
+ def forward(self, x_all):
36
+ b, k, h, w, c = x_all.shape
37
+ x_all = x_all.reshape(b, k, -1, c) # bs,k,n,c
38
+ a = torch.sum(torch.sum(x_all, 1), 1) # bs,c
39
+ hat_a = self.mlp2(self.gelu(self.mlp1(a))) # bs,kc
40
+ hat_a = hat_a.reshape(b, self.k, c) # bs,k,c
41
+ bar_a = self.softmax(hat_a) # bs,k,c
42
+ attention = bar_a.unsqueeze(-2) # #bs,k,1,c
43
+ out = attention * x_all # #bs,k,n,c
44
+ out = torch.sum(out, 1).reshape(b, h, w, c)
45
+ return out
46
+
47
+
48
+ class S2Attention(nn.Module):
49
+
50
+ def __init__(self, channels=512):
51
+ super().__init__()
52
+ self.mlp1 = nn.Linear(channels, channels * 3)
53
+ self.mlp2 = nn.Linear(channels, channels)
54
+ self.split_attention = SplitAttention()
55
+
56
+ def forward(self, x):
57
+ b, c, w, h = x.size()
58
+ x = x.permute(0, 2, 3, 1)
59
+ x = self.mlp1(x)
60
+ x1 = spatial_shift1(x[:, :, :, :c])
61
+ x2 = spatial_shift2(x[:, :, :, c:c * 2])
62
+ x3 = x[:, :, :, c * 2:]
63
+ x_all = torch.stack([x1, x2, x3], 1)
64
+ a = self.split_attention(x_all)
65
+ x = self.mlp2(a)
66
+ x = x.permute(0, 3, 1, 2)
67
+ return x
models/SE.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+
6
+
7
+
8
+ class SEAttention(nn.Module):
9
+
10
+ def __init__(self, channel=512,reduction=16):
11
+ super().__init__()
12
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
13
+ self.fc = nn.Sequential(
14
+ nn.Linear(channel, channel // reduction, bias=False),
15
+ nn.ReLU(inplace=True),
16
+ nn.Linear(channel // reduction, channel, bias=False),
17
+ nn.Sigmoid()
18
+ )
19
+
20
+
21
+ def init_weights(self):
22
+ for m in self.modules():
23
+ if isinstance(m, nn.Conv2d):
24
+ init.kaiming_normal_(m.weight, mode='fan_out')
25
+ if m.bias is not None:
26
+ init.constant_(m.bias, 0)
27
+ elif isinstance(m, nn.BatchNorm2d):
28
+ init.constant_(m.weight, 1)
29
+ init.constant_(m.bias, 0)
30
+ elif isinstance(m, nn.Linear):
31
+ init.normal_(m.weight, std=0.001)
32
+ if m.bias is not None:
33
+ init.constant_(m.bias, 0)
34
+
35
+ def forward(self, x):
36
+ b, c, _, _ = x.size()
37
+ y = self.avg_pool(x).view(b, c)
38
+ y = self.fc(y).view(b, c, 1, 1)
39
+ return x * y.expand_as(x)
models/SGE.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+
6
+
7
+ class SpatialGroupEnhance(nn.Module):
8
+ def __init__(self, groups=8):
9
+ super().__init__()
10
+ self.groups = groups
11
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
12
+ self.weight = nn.Parameter(torch.zeros(1, groups, 1, 1))
13
+ self.bias = nn.Parameter(torch.zeros(1, groups, 1, 1))
14
+ self.sig = nn.Sigmoid()
15
+ self.init_weights()
16
+
17
+ def init_weights(self):
18
+ for m in self.modules():
19
+ if isinstance(m, nn.Conv2d):
20
+ init.kaiming_normal_(m.weight, mode='fan_out')
21
+ if m.bias is not None:
22
+ init.constant_(m.bias, 0)
23
+ elif isinstance(m, nn.BatchNorm2d):
24
+ init.constant_(m.weight, 1)
25
+ init.constant_(m.bias, 0)
26
+ elif isinstance(m, nn.Linear):
27
+ init.normal_(m.weight, std=0.001)
28
+ if m.bias is not None:
29
+ init.constant_(m.bias, 0)
30
+
31
+ def forward(self, x):
32
+ b, c, h, w = x.shape
33
+ x = x.view(b * self.groups, -1, h, w) # bs*g,dim//g,h,w
34
+ xn = x * self.avg_pool(x) # bs*g,dim//g,h,w
35
+ xn = xn.sum(dim=1, keepdim=True) # bs*g,1,h,w
36
+ t = xn.view(b * self.groups, -1) # bs*g,h*w
37
+
38
+ t = t - t.mean(dim=1, keepdim=True) # bs*g,h*w
39
+ std = t.std(dim=1, keepdim=True) + 1e-5
40
+ t = t / std # bs*g,h*w
41
+ t = t.view(b, self.groups, h, w) # bs,g,h*w
42
+
43
+ t = t * self.weight + self.bias # bs,g,h*w
44
+ t = t.view(b * self.groups, 1, h, w) # bs*g,1,h*w
45
+ x = x * self.sig(t)
46
+ x = x.view(b, c, h, w)
47
+ return x
48
+
49
+
50
+ if __name__ == '__main__':
51
+ input = torch.randn(50, 512, 7, 7)
52
+ sge = SpatialGroupEnhance(groups=8)
53
+ output = sge(input)
54
+ print(output.shape)
models/SK.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+ from collections import OrderedDict
6
+
7
+
8
+ class SKAttention(nn.Module):
9
+
10
+ def __init__(self, channel=512, kernels=[1,3,5,7], reduction=16, group=1, L=32):
11
+ super().__init__()
12
+ self.d = max(L, channel // reduction)
13
+ self.convs = nn.ModuleList([])
14
+ for k in kernels:
15
+ self.convs.append(
16
+ nn.Sequential(OrderedDict([
17
+ ('conv', nn.Conv2d(channel, channel, kernel_size=k, padding=k // 2, groups=group)),
18
+ ('bn', nn.BatchNorm2d(channel)),
19
+ ('relu', nn.ReLU())
20
+ ]))
21
+ )
22
+ self.fc = nn.Linear(channel, self.d)
23
+ self.fcs = nn.ModuleList([])
24
+ for i in range(len(kernels)):
25
+ self.fcs.append(nn.Linear(self.d, channel))
26
+ self.softmax = nn.Softmax(dim=0)
27
+
28
+ def forward(self, x):
29
+ bs, c, _, _ = x.size()
30
+ conv_outs = []
31
+ ### split
32
+ for conv in self.convs:
33
+ conv_outs.append(conv(x))
34
+ feats = torch.stack(conv_outs, 0) # k,bs,channel,h,w
35
+
36
+ ### fuse
37
+ U = sum(conv_outs) # bs,c,h,w
38
+
39
+ ### reduction channel
40
+ S = U.mean(-1).mean(-1) # bs,c
41
+ Z = self.fc(S) # bs,d
42
+
43
+ ### calculate attention weight
44
+ weights = []
45
+ for fc in self.fcs:
46
+ weight = fc(Z)
47
+ weights.append(weight.view(bs, c, 1, 1)) # bs,channel
48
+ attention_weughts = torch.stack(weights, 0) # k,bs,channel,1,1
49
+ attention_weughts = self.softmax(attention_weughts) # k,bs,channel,1,1
50
+
51
+ ### fuse
52
+ V = (attention_weughts * feats).sum(0)
53
+ return V
models/ShuffleAttention.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import init
5
+ from torch.nn.parameter import Parameter
6
+
7
+
8
+ class ShuffleAttention(nn.Module):
9
+
10
+ def __init__(self, channel=512, reduction=16, G=8):
11
+ super().__init__()
12
+ self.G = G
13
+ self.channel = channel
14
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
15
+ self.gn = nn.GroupNorm(channel // (2 * G), channel // (2 * G))
16
+ self.cweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
17
+ self.cbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1))
18
+ self.sweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
19
+ self.sbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1))
20
+ self.sigmoid = nn.Sigmoid()
21
+
22
+ def init_weights(self):
23
+ for m in self.modules():
24
+ if isinstance(m, nn.Conv2d):
25
+ init.kaiming_normal_(m.weight, mode='fan_out')
26
+ if m.bias is not None:
27
+ init.constant_(m.bias, 0)
28
+ elif isinstance(m, nn.BatchNorm2d):
29
+ init.constant_(m.weight, 1)
30
+ init.constant_(m.bias, 0)
31
+ elif isinstance(m, nn.Linear):
32
+ init.normal_(m.weight, std=0.001)
33
+ if m.bias is not None:
34
+ init.constant_(m.bias, 0)
35
+
36
+ @staticmethod
37
+ def channel_shuffle(x, groups):
38
+ b, c, h, w = x.shape
39
+ x = x.reshape(b, groups, -1, h, w)
40
+ x = x.permute(0, 2, 1, 3, 4)
41
+
42
+ # flatten
43
+ x = x.reshape(b, -1, h, w)
44
+
45
+ return x
46
+
47
+ def forward(self, x):
48
+ b, c, h, w = x.size()
49
+ # group into subfeatures
50
+ x = x.view(b * self.G, -1, h, w) # bs*G,c//G,h,w
51
+
52
+ # channel_split
53
+ x_0, x_1 = x.chunk(2, dim=1) # bs*G,c//(2*G),h,w
54
+
55
+ # channel attention
56
+ x_channel = self.avg_pool(x_0) # bs*G,c//(2*G),1,1
57
+ x_channel = self.cweight * x_channel + self.cbias # bs*G,c//(2*G),1,1
58
+ x_channel = x_0 * self.sigmoid(x_channel)
59
+
60
+ # spatial attention
61
+ x_spatial = self.gn(x_1) # bs*G,c//(2*G),h,w
62
+ x_spatial = self.sweight * x_spatial + self.sbias # bs*G,c//(2*G),h,w
63
+ x_spatial = x_1 * self.sigmoid(x_spatial) # bs*G,c//(2*G),h,w
64
+
65
+ # concatenate along channel axis
66
+ out = torch.cat([x_channel, x_spatial], dim=1) # bs*G,c//G,h,w
67
+ out = out.contiguous().view(b, -1, h, w)
68
+
69
+ # channel shuffle
70
+ out = self.channel_shuffle(out, 2)
71
+ return out
models/SimAM.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class SimAM(torch.nn.Module):
6
+ def __init__(self, e_lambda=1e-4):
7
+ super(SimAM, self).__init__()
8
+
9
+ self.activaton = nn.Sigmoid()
10
+ self.e_lambda = e_lambda
11
+
12
+ def __repr__(self):
13
+ s = self.__class__.__name__ + '('
14
+ s += ('lambda=%f)' % self.e_lambda)
15
+ return s
16
+
17
+ @staticmethod
18
+ def get_module_name():
19
+ return "simam"
20
+
21
+ def forward(self, x):
22
+ b, c, h, w = x.size()
23
+
24
+ n = w * h - 1
25
+
26
+ x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
27
+ y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
28
+
29
+ return x * self.activaton(y)
models/__init__.py ADDED
File without changes
models/__pycache__/A2Attention.cpython-38.pyc ADDED
Binary file (1.93 kB). View file
 
models/__pycache__/CBAM.cpython-38.pyc ADDED
Binary file (3.02 kB). View file
 
models/__pycache__/EMA.cpython-38.pyc ADDED
Binary file (1.64 kB). View file
 
models/__pycache__/LSKblock.cpython-38.pyc ADDED
Binary file (1.34 kB). View file
 
models/__pycache__/S2Attention.cpython-38.pyc ADDED
Binary file (3 kB). View file
 
models/__pycache__/SE.cpython-38.pyc ADDED
Binary file (1.52 kB). View file
 
models/__pycache__/SGE.cpython-38.pyc ADDED
Binary file (1.91 kB). View file
 
models/__pycache__/SK.cpython-38.pyc ADDED
Binary file (1.63 kB). View file
 
models/__pycache__/ShuffleAttention.cpython-38.pyc ADDED
Binary file (2.34 kB). View file
 
models/__pycache__/SimAM.cpython-38.pyc ADDED
Binary file (1.28 kB). View file
 
models/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (146 Bytes). View file
 
models/__pycache__/common.cpython-38.pyc ADDED
Binary file (38.8 kB). View file
 
models/__pycache__/experimental.cpython-38.pyc ADDED
Binary file (4.86 kB). View file
 
models/__pycache__/yolo.cpython-38.pyc ADDED
Binary file (16.8 kB). View file
 
models/common.py ADDED
@@ -0,0 +1,918 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+ """
3
+ Common modules
4
+ """
5
+
6
+ import ast
7
+ import contextlib
8
+ import json
9
+ import math
10
+ import platform
11
+ import warnings
12
+ import zipfile
13
+ from collections import OrderedDict, namedtuple
14
+ from copy import copy
15
+ from pathlib import Path
16
+ from urllib.parse import urlparse
17
+
18
+ import cv2
19
+ import numpy as np
20
+ import pandas as pd
21
+ import requests
22
+ import torch
23
+ import torch.nn as nn
24
+ from PIL import Image
25
+ from torch.cuda import amp
26
+
27
+ # Import 'ultralytics' package or install if if missing
28
+ try:
29
+ import ultralytics
30
+
31
+ assert hasattr(ultralytics, '__version__') # verify package is not directory
32
+ except (ImportError, AssertionError):
33
+ import os
34
+
35
+ os.system('pip install -U ultralytics')
36
+ import ultralytics
37
+
38
+ from ultralytics.utils.plotting import Annotator, colors, save_one_box
39
+
40
+ from utils import TryExcept
41
+ from utils.dataloaders import exif_transpose, letterbox
42
+ from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
43
+ increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
44
+ xyxy2xywh, yaml_load)
45
+ from utils.torch_utils import copy_attr, smart_inference_mode
46
+
47
+
48
+ def autopad(k, p=None, d=1): # kernel, padding, dilation
49
+ # Pad to 'same' shape outputs
50
+ if d > 1:
51
+ k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
52
+ if p is None:
53
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
54
+ return p
55
+
56
+
57
+ class Conv(nn.Module):
58
+ # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
59
+ default_act = nn.SiLU() # default activation
60
+
61
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
62
+ super().__init__()
63
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
64
+ self.bn = nn.BatchNorm2d(c2)
65
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
66
+
67
+ def forward(self, x):
68
+ return self.act(self.bn(self.conv(x)))
69
+
70
+ def forward_fuse(self, x):
71
+ return self.act(self.conv(x))
72
+
73
+
74
+ class DWConv(Conv):
75
+ # Depth-wise convolution
76
+ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
77
+ super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
78
+
79
+
80
+ class DWConvTranspose2d(nn.ConvTranspose2d):
81
+ # Depth-wise transpose convolution
82
+ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
83
+ super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
84
+
85
+
86
+ class TransformerLayer(nn.Module):
87
+ # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
88
+ def __init__(self, c, num_heads):
89
+ super().__init__()
90
+ self.q = nn.Linear(c, c, bias=False)
91
+ self.k = nn.Linear(c, c, bias=False)
92
+ self.v = nn.Linear(c, c, bias=False)
93
+ self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
94
+ self.fc1 = nn.Linear(c, c, bias=False)
95
+ self.fc2 = nn.Linear(c, c, bias=False)
96
+
97
+ def forward(self, x):
98
+ x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
99
+ x = self.fc2(self.fc1(x)) + x
100
+ return x
101
+
102
+
103
+ class TransformerBlock(nn.Module):
104
+ # Vision Transformer https://arxiv.org/abs/2010.11929
105
+ def __init__(self, c1, c2, num_heads, num_layers):
106
+ super().__init__()
107
+ self.conv = None
108
+ if c1 != c2:
109
+ self.conv = Conv(c1, c2)
110
+ self.linear = nn.Linear(c2, c2) # learnable position embedding
111
+ self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
112
+ self.c2 = c2
113
+
114
+ def forward(self, x):
115
+ if self.conv is not None:
116
+ x = self.conv(x)
117
+ b, _, w, h = x.shape
118
+ p = x.flatten(2).permute(2, 0, 1)
119
+ return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
120
+
121
+
122
+ class Bottleneck(nn.Module):
123
+ # Standard bottleneck
124
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
125
+ super().__init__()
126
+ c_ = int(c2 * e) # hidden channels
127
+ self.cv1 = Conv(c1, c_, 1, 1)
128
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
129
+ self.add = shortcut and c1 == c2
130
+
131
+ def forward(self, x):
132
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
133
+
134
+
135
+ class BottleneckCSP(nn.Module):
136
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
137
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
138
+ super().__init__()
139
+ c_ = int(c2 * e) # hidden channels
140
+ self.cv1 = Conv(c1, c_, 1, 1)
141
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
142
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
143
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
144
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
145
+ self.act = nn.SiLU()
146
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
147
+
148
+ def forward(self, x):
149
+ y1 = self.cv3(self.m(self.cv1(x)))
150
+ y2 = self.cv2(x)
151
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
152
+
153
+
154
+ class CrossConv(nn.Module):
155
+ # Cross Convolution Downsample
156
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
157
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
158
+ super().__init__()
159
+ c_ = int(c2 * e) # hidden channels
160
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
161
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
162
+ self.add = shortcut and c1 == c2
163
+
164
+ def forward(self, x):
165
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
166
+
167
+
168
+ class C3(nn.Module):
169
+ # CSP Bottleneck with 3 convolutions
170
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
171
+ super().__init__()
172
+ c_ = int(c2 * e) # hidden channels
173
+ self.cv1 = Conv(c1, c_, 1, 1)
174
+ self.cv2 = Conv(c1, c_, 1, 1)
175
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
176
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
177
+
178
+ def forward(self, x):
179
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
180
+
181
+
182
+ class C3x(C3):
183
+ # C3 module with cross-convolutions
184
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
185
+ super().__init__(c1, c2, n, shortcut, g, e)
186
+ c_ = int(c2 * e)
187
+ self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
188
+
189
+
190
+ class C3TR(C3):
191
+ # C3 module with TransformerBlock()
192
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
193
+ super().__init__(c1, c2, n, shortcut, g, e)
194
+ c_ = int(c2 * e)
195
+ self.m = TransformerBlock(c_, c_, 4, n)
196
+
197
+
198
+ class C3SPP(C3):
199
+ # C3 module with SPP()
200
+ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
201
+ super().__init__(c1, c2, n, shortcut, g, e)
202
+ c_ = int(c2 * e)
203
+ self.m = SPP(c_, c_, k)
204
+
205
+
206
+ class C3Ghost(C3):
207
+ # C3 module with GhostBottleneck()
208
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
209
+ super().__init__(c1, c2, n, shortcut, g, e)
210
+ c_ = int(c2 * e) # hidden channels
211
+ self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
212
+
213
+
214
+ class SPP(nn.Module):
215
+ # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
216
+ def __init__(self, c1, c2, k=(5, 9, 13)):
217
+ super().__init__()
218
+ c_ = c1 // 2 # hidden channels
219
+ self.cv1 = Conv(c1, c_, 1, 1)
220
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
221
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
222
+
223
+ def forward(self, x):
224
+ x = self.cv1(x)
225
+ with warnings.catch_warnings():
226
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
227
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
228
+
229
+
230
+ class SPPF(nn.Module):
231
+ # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
232
+ def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
233
+ super().__init__()
234
+ c_ = c1 // 2 # hidden channels
235
+ self.cv1 = Conv(c1, c_, 1, 1)
236
+ self.cv2 = Conv(c_ * 4, c2, 1, 1)
237
+ self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
238
+
239
+ def forward(self, x):
240
+ x = self.cv1(x)
241
+ with warnings.catch_warnings():
242
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
243
+ y1 = self.m(x)
244
+ y2 = self.m(y1)
245
+ return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
246
+
247
+
248
+ class Focus(nn.Module):
249
+ # Focus wh information into c-space
250
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
251
+ super().__init__()
252
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
253
+ # self.contract = Contract(gain=2)
254
+
255
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
256
+ return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
257
+ # return self.conv(self.contract(x))
258
+
259
+
260
+ class GhostConv(nn.Module):
261
+ # Ghost Convolution https://github.com/huawei-noah/ghostnet
262
+ def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
263
+ super().__init__()
264
+ c_ = c2 // 2 # hidden channels
265
+ self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
266
+ self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
267
+
268
+ def forward(self, x):
269
+ y = self.cv1(x)
270
+ return torch.cat((y, self.cv2(y)), 1)
271
+
272
+
273
+ class GhostBottleneck(nn.Module):
274
+ # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
275
+ def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
276
+ super().__init__()
277
+ c_ = c2 // 2
278
+ self.conv = nn.Sequential(
279
+ GhostConv(c1, c_, 1, 1), # pw
280
+ DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
281
+ GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
282
+ self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
283
+ act=False)) if s == 2 else nn.Identity()
284
+
285
+ def forward(self, x):
286
+ return self.conv(x) + self.shortcut(x)
287
+
288
+
289
+ class Contract(nn.Module):
290
+ # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
291
+ def __init__(self, gain=2):
292
+ super().__init__()
293
+ self.gain = gain
294
+
295
+ def forward(self, x):
296
+ b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
297
+ s = self.gain
298
+ x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
299
+ x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
300
+ return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
301
+
302
+
303
+ class Expand(nn.Module):
304
+ # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
305
+ def __init__(self, gain=2):
306
+ super().__init__()
307
+ self.gain = gain
308
+
309
+ def forward(self, x):
310
+ b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
311
+ s = self.gain
312
+ x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
313
+ x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
314
+ return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
315
+
316
+
317
+ class Concat(nn.Module):
318
+ # Concatenate a list of tensors along dimension
319
+ def __init__(self, dimension=1):
320
+ super().__init__()
321
+ self.d = dimension
322
+
323
+ def forward(self, x):
324
+ return torch.cat(x, self.d)
325
+
326
+
327
+ class DetectMultiBackend(nn.Module):
328
+ # YOLOv5 MultiBackend class for python inference on various backends
329
+ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
330
+ # Usage:
331
+ # PyTorch: weights = *.pt
332
+ # TorchScript: *.torchscript
333
+ # ONNX Runtime: *.onnx
334
+ # ONNX OpenCV DNN: *.onnx --dnn
335
+ # OpenVINO: *_openvino_model
336
+ # CoreML: *.mlmodel
337
+ # TensorRT: *.engine
338
+ # TensorFlow SavedModel: *_saved_model
339
+ # TensorFlow GraphDef: *.pb
340
+ # TensorFlow Lite: *.tflite
341
+ # TensorFlow Edge TPU: *_edgetpu.tflite
342
+ # PaddlePaddle: *_paddle_model
343
+ from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
344
+
345
+ super().__init__()
346
+ w = str(weights[0] if isinstance(weights, list) else weights)
347
+ pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
348
+ fp16 &= pt or jit or onnx or engine or triton # FP16
349
+ nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
350
+ stride = 32 # default stride
351
+ cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
352
+ if not (pt or triton):
353
+ w = attempt_download(w) # download if not local
354
+
355
+ if pt: # PyTorch
356
+ model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
357
+ stride = max(int(model.stride.max()), 32) # model stride
358
+ names = model.module.names if hasattr(model, 'module') else model.names # get class names
359
+ model.half() if fp16 else model.float()
360
+ self.model = model # explicitly assign for to(), cpu(), cuda(), half()
361
+ elif jit: # TorchScript
362
+ LOGGER.info(f'Loading {w} for TorchScript inference...')
363
+ extra_files = {'config.txt': ''} # model metadata
364
+ model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
365
+ model.half() if fp16 else model.float()
366
+ if extra_files['config.txt']: # load metadata dict
367
+ d = json.loads(extra_files['config.txt'],
368
+ object_hook=lambda d: {
369
+ int(k) if k.isdigit() else k: v
370
+ for k, v in d.items()})
371
+ stride, names = int(d['stride']), d['names']
372
+ elif dnn: # ONNX OpenCV DNN
373
+ LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
374
+ check_requirements('opencv-python>=4.5.4')
375
+ net = cv2.dnn.readNetFromONNX(w)
376
+ elif onnx: # ONNX Runtime
377
+ LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
378
+ check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
379
+ import onnxruntime
380
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
381
+ session = onnxruntime.InferenceSession(w, providers=providers)
382
+ output_names = [x.name for x in session.get_outputs()]
383
+ meta = session.get_modelmeta().custom_metadata_map # metadata
384
+ if 'stride' in meta:
385
+ stride, names = int(meta['stride']), eval(meta['names'])
386
+ elif xml: # OpenVINO
387
+ LOGGER.info(f'Loading {w} for OpenVINO inference...')
388
+ check_requirements('openvino>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/
389
+ from openvino.runtime import Core, Layout, get_batch
390
+ core = Core()
391
+ if not Path(w).is_file(): # if not *.xml
392
+ w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
393
+ ov_model = core.read_model(model=w, weights=Path(w).with_suffix('.bin'))
394
+ if ov_model.get_parameters()[0].get_layout().empty:
395
+ ov_model.get_parameters()[0].set_layout(Layout('NCHW'))
396
+ batch_dim = get_batch(ov_model)
397
+ if batch_dim.is_static:
398
+ batch_size = batch_dim.get_length()
399
+ ov_compiled_model = core.compile_model(ov_model, device_name='AUTO') # AUTO selects best available device
400
+ stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
401
+ elif engine: # TensorRT
402
+ LOGGER.info(f'Loading {w} for TensorRT inference...')
403
+ import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
404
+ check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
405
+ if device.type == 'cpu':
406
+ device = torch.device('cuda:0')
407
+ Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
408
+ logger = trt.Logger(trt.Logger.INFO)
409
+ with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
410
+ model = runtime.deserialize_cuda_engine(f.read())
411
+ context = model.create_execution_context()
412
+ bindings = OrderedDict()
413
+ output_names = []
414
+ fp16 = False # default updated below
415
+ dynamic = False
416
+ for i in range(model.num_bindings):
417
+ name = model.get_binding_name(i)
418
+ dtype = trt.nptype(model.get_binding_dtype(i))
419
+ if model.binding_is_input(i):
420
+ if -1 in tuple(model.get_binding_shape(i)): # dynamic
421
+ dynamic = True
422
+ context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
423
+ if dtype == np.float16:
424
+ fp16 = True
425
+ else: # output
426
+ output_names.append(name)
427
+ shape = tuple(context.get_binding_shape(i))
428
+ im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
429
+ bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
430
+ binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
431
+ batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
432
+ elif coreml: # CoreML
433
+ LOGGER.info(f'Loading {w} for CoreML inference...')
434
+ import coremltools as ct
435
+ model = ct.models.MLModel(w)
436
+ elif saved_model: # TF SavedModel
437
+ LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
438
+ import tensorflow as tf
439
+ keras = False # assume TF1 saved_model
440
+ model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
441
+ elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
442
+ LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
443
+ import tensorflow as tf
444
+
445
+ def wrap_frozen_graph(gd, inputs, outputs):
446
+ x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
447
+ ge = x.graph.as_graph_element
448
+ return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
449
+
450
+ def gd_outputs(gd):
451
+ name_list, input_list = [], []
452
+ for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
453
+ name_list.append(node.name)
454
+ input_list.extend(node.input)
455
+ return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
456
+
457
+ gd = tf.Graph().as_graph_def() # TF GraphDef
458
+ with open(w, 'rb') as f:
459
+ gd.ParseFromString(f.read())
460
+ frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
461
+ elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
462
+ try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
463
+ from tflite_runtime.interpreter import Interpreter, load_delegate
464
+ except ImportError:
465
+ import tensorflow as tf
466
+ Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
467
+ if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
468
+ LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
469
+ delegate = {
470
+ 'Linux': 'libedgetpu.so.1',
471
+ 'Darwin': 'libedgetpu.1.dylib',
472
+ 'Windows': 'edgetpu.dll'}[platform.system()]
473
+ interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
474
+ else: # TFLite
475
+ LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
476
+ interpreter = Interpreter(model_path=w) # load TFLite model
477
+ interpreter.allocate_tensors() # allocate
478
+ input_details = interpreter.get_input_details() # inputs
479
+ output_details = interpreter.get_output_details() # outputs
480
+ # load metadata
481
+ with contextlib.suppress(zipfile.BadZipFile):
482
+ with zipfile.ZipFile(w, 'r') as model:
483
+ meta_file = model.namelist()[0]
484
+ meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
485
+ stride, names = int(meta['stride']), meta['names']
486
+ elif tfjs: # TF.js
487
+ raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
488
+ elif paddle: # PaddlePaddle
489
+ LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
490
+ check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
491
+ import paddle.inference as pdi
492
+ if not Path(w).is_file(): # if not *.pdmodel
493
+ w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
494
+ weights = Path(w).with_suffix('.pdiparams')
495
+ config = pdi.Config(str(w), str(weights))
496
+ if cuda:
497
+ config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
498
+ predictor = pdi.create_predictor(config)
499
+ input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
500
+ output_names = predictor.get_output_names()
501
+ elif triton: # NVIDIA Triton Inference Server
502
+ LOGGER.info(f'Using {w} as Triton Inference Server...')
503
+ check_requirements('tritonclient[all]')
504
+ from utils.triton import TritonRemoteModel
505
+ model = TritonRemoteModel(url=w)
506
+ nhwc = model.runtime.startswith('tensorflow')
507
+ else:
508
+ raise NotImplementedError(f'ERROR: {w} is not a supported format')
509
+
510
+ # class names
511
+ if 'names' not in locals():
512
+ names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
513
+ if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
514
+ names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
515
+
516
+ self.__dict__.update(locals()) # assign all variables to self
517
+
518
+ def forward(self, im, augment=False, visualize=False):
519
+ # YOLOv5 MultiBackend inference
520
+ b, ch, h, w = im.shape # batch, channel, height, width
521
+ if self.fp16 and im.dtype != torch.float16:
522
+ im = im.half() # to FP16
523
+ if self.nhwc:
524
+ im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
525
+
526
+ if self.pt: # PyTorch
527
+ y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
528
+ elif self.jit: # TorchScript
529
+ y = self.model(im)
530
+ elif self.dnn: # ONNX OpenCV DNN
531
+ im = im.cpu().numpy() # torch to numpy
532
+ self.net.setInput(im)
533
+ y = self.net.forward()
534
+ elif self.onnx: # ONNX Runtime
535
+ im = im.cpu().numpy() # torch to numpy
536
+ y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
537
+ elif self.xml: # OpenVINO
538
+ im = im.cpu().numpy() # FP32
539
+ y = list(self.ov_compiled_model(im).values())
540
+ elif self.engine: # TensorRT
541
+ if self.dynamic and im.shape != self.bindings['images'].shape:
542
+ i = self.model.get_binding_index('images')
543
+ self.context.set_binding_shape(i, im.shape) # reshape if dynamic
544
+ self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
545
+ for name in self.output_names:
546
+ i = self.model.get_binding_index(name)
547
+ self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
548
+ s = self.bindings['images'].shape
549
+ assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
550
+ self.binding_addrs['images'] = int(im.data_ptr())
551
+ self.context.execute_v2(list(self.binding_addrs.values()))
552
+ y = [self.bindings[x].data for x in sorted(self.output_names)]
553
+ elif self.coreml: # CoreML
554
+ im = im.cpu().numpy()
555
+ im = Image.fromarray((im[0] * 255).astype('uint8'))
556
+ # im = im.resize((192, 320), Image.BILINEAR)
557
+ y = self.model.predict({'image': im}) # coordinates are xywh normalized
558
+ if 'confidence' in y:
559
+ box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
560
+ conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
561
+ y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
562
+ else:
563
+ y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
564
+ elif self.paddle: # PaddlePaddle
565
+ im = im.cpu().numpy().astype(np.float32)
566
+ self.input_handle.copy_from_cpu(im)
567
+ self.predictor.run()
568
+ y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
569
+ elif self.triton: # NVIDIA Triton Inference Server
570
+ y = self.model(im)
571
+ else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
572
+ im = im.cpu().numpy()
573
+ if self.saved_model: # SavedModel
574
+ y = self.model(im, training=False) if self.keras else self.model(im)
575
+ elif self.pb: # GraphDef
576
+ y = self.frozen_func(x=self.tf.constant(im))
577
+ else: # Lite or Edge TPU
578
+ input = self.input_details[0]
579
+ int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
580
+ if int8:
581
+ scale, zero_point = input['quantization']
582
+ im = (im / scale + zero_point).astype(np.uint8) # de-scale
583
+ self.interpreter.set_tensor(input['index'], im)
584
+ self.interpreter.invoke()
585
+ y = []
586
+ for output in self.output_details:
587
+ x = self.interpreter.get_tensor(output['index'])
588
+ if int8:
589
+ scale, zero_point = output['quantization']
590
+ x = (x.astype(np.float32) - zero_point) * scale # re-scale
591
+ y.append(x)
592
+ y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
593
+ y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
594
+
595
+ if isinstance(y, (list, tuple)):
596
+ return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
597
+ else:
598
+ return self.from_numpy(y)
599
+
600
+ def from_numpy(self, x):
601
+ return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
602
+
603
+ def warmup(self, imgsz=(1, 3, 640, 640)):
604
+ # Warmup model by running inference once
605
+ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
606
+ if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
607
+ im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
608
+ for _ in range(2 if self.jit else 1): #
609
+ self.forward(im) # warmup
610
+
611
+ @staticmethod
612
+ def _model_type(p='path/to/model.pt'):
613
+ # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
614
+ # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
615
+ from export import export_formats
616
+ from utils.downloads import is_url
617
+ sf = list(export_formats().Suffix) # export suffixes
618
+ if not is_url(p, check=False):
619
+ check_suffix(p, sf) # checks
620
+ url = urlparse(p) # if url may be Triton inference server
621
+ types = [s in Path(p).name for s in sf]
622
+ types[8] &= not types[9] # tflite &= not edgetpu
623
+ triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
624
+ return types + [triton]
625
+
626
+ @staticmethod
627
+ def _load_metadata(f=Path('path/to/meta.yaml')):
628
+ # Load metadata from meta.yaml if it exists
629
+ if f.exists():
630
+ d = yaml_load(f)
631
+ return d['stride'], d['names'] # assign stride, names
632
+ return None, None
633
+
634
+
635
+ class AutoShape(nn.Module):
636
+ # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
637
+ conf = 0.25 # NMS confidence threshold
638
+ iou = 0.45 # NMS IoU threshold
639
+ agnostic = False # NMS class-agnostic
640
+ multi_label = False # NMS multiple labels per box
641
+ classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
642
+ max_det = 1000 # maximum number of detections per image
643
+ amp = False # Automatic Mixed Precision (AMP) inference
644
+
645
+ def __init__(self, model, verbose=True):
646
+ super().__init__()
647
+ if verbose:
648
+ LOGGER.info('Adding AutoShape... ')
649
+ copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
650
+ self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
651
+ self.pt = not self.dmb or model.pt # PyTorch model
652
+ self.model = model.eval()
653
+ if self.pt:
654
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
655
+ m.inplace = False # Detect.inplace=False for safe multithread inference
656
+ m.export = True # do not output loss values
657
+
658
+ def _apply(self, fn):
659
+ # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
660
+ self = super()._apply(fn)
661
+ if self.pt:
662
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
663
+ m.stride = fn(m.stride)
664
+ m.grid = list(map(fn, m.grid))
665
+ if isinstance(m.anchor_grid, list):
666
+ m.anchor_grid = list(map(fn, m.anchor_grid))
667
+ return self
668
+
669
+ @smart_inference_mode()
670
+ def forward(self, ims, size=640, augment=False, profile=False):
671
+ # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
672
+ # file: ims = 'data/images/zidane.jpg' # str or PosixPath
673
+ # URI: = 'https://ultralytics.com/images/zidane.jpg'
674
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
675
+ # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
676
+ # numpy: = np.zeros((640,1280,3)) # HWC
677
+ # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
678
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
679
+
680
+ dt = (Profile(), Profile(), Profile())
681
+ with dt[0]:
682
+ if isinstance(size, int): # expand
683
+ size = (size, size)
684
+ p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
685
+ autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
686
+ if isinstance(ims, torch.Tensor): # torch
687
+ with amp.autocast(autocast):
688
+ return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
689
+
690
+ # Pre-process
691
+ n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
692
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
693
+ for i, im in enumerate(ims):
694
+ f = f'image{i}' # filename
695
+ if isinstance(im, (str, Path)): # filename or uri
696
+ im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
697
+ im = np.asarray(exif_transpose(im))
698
+ elif isinstance(im, Image.Image): # PIL Image
699
+ im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
700
+ files.append(Path(f).with_suffix('.jpg').name)
701
+ if im.shape[0] < 5: # image in CHW
702
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
703
+ im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
704
+ s = im.shape[:2] # HWC
705
+ shape0.append(s) # image shape
706
+ g = max(size) / max(s) # gain
707
+ shape1.append([int(y * g) for y in s])
708
+ ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
709
+ shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
710
+ x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
711
+ x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
712
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
713
+
714
+ with amp.autocast(autocast):
715
+ # Inference
716
+ with dt[1]:
717
+ y = self.model(x, augment=augment) # forward
718
+
719
+ # Post-process
720
+ with dt[2]:
721
+ y = non_max_suppression(y if self.dmb else y[0],
722
+ self.conf,
723
+ self.iou,
724
+ self.classes,
725
+ self.agnostic,
726
+ self.multi_label,
727
+ max_det=self.max_det) # NMS
728
+ for i in range(n):
729
+ scale_boxes(shape1, y[i][:, :4], shape0[i])
730
+
731
+ return Detections(ims, y, files, dt, self.names, x.shape)
732
+
733
+
734
+ class Detections:
735
+ # YOLOv5 detections class for inference results
736
+ def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
737
+ super().__init__()
738
+ d = pred[0].device # device
739
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
740
+ self.ims = ims # list of images as numpy arrays
741
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
742
+ self.names = names # class names
743
+ self.files = files # image filenames
744
+ self.times = times # profiling times
745
+ self.xyxy = pred # xyxy pixels
746
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
747
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
748
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
749
+ self.n = len(self.pred) # number of images (batch size)
750
+ self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
751
+ self.s = tuple(shape) # inference BCHW shape
752
+
753
+ def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
754
+ s, crops = '', []
755
+ for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
756
+ s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
757
+ if pred.shape[0]:
758
+ for c in pred[:, -1].unique():
759
+ n = (pred[:, -1] == c).sum() # detections per class
760
+ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
761
+ s = s.rstrip(', ')
762
+ if show or save or render or crop:
763
+ annotator = Annotator(im, example=str(self.names))
764
+ for *box, conf, cls in reversed(pred): # xyxy, confidence, class
765
+ label = f'{self.names[int(cls)]} {conf:.2f}'
766
+ if crop:
767
+ file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
768
+ crops.append({
769
+ 'box': box,
770
+ 'conf': conf,
771
+ 'cls': cls,
772
+ 'label': label,
773
+ 'im': save_one_box(box, im, file=file, save=save)})
774
+ else: # all others
775
+ annotator.box_label(box, label if labels else '', color=colors(cls))
776
+ im = annotator.im
777
+ else:
778
+ s += '(no detections)'
779
+
780
+ im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
781
+ if show:
782
+ if is_jupyter():
783
+ from IPython.display import display
784
+ display(im)
785
+ else:
786
+ im.show(self.files[i])
787
+ if save:
788
+ f = self.files[i]
789
+ im.save(save_dir / f) # save
790
+ if i == self.n - 1:
791
+ LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
792
+ if render:
793
+ self.ims[i] = np.asarray(im)
794
+ if pprint:
795
+ s = s.lstrip('\n')
796
+ return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
797
+ if crop:
798
+ if save:
799
+ LOGGER.info(f'Saved results to {save_dir}\n')
800
+ return crops
801
+
802
+ @TryExcept('Showing images is not supported in this environment')
803
+ def show(self, labels=True):
804
+ self._run(show=True, labels=labels) # show results
805
+
806
+ def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
807
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
808
+ self._run(save=True, labels=labels, save_dir=save_dir) # save results
809
+
810
+ def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
811
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
812
+ return self._run(crop=True, save=save, save_dir=save_dir) # crop results
813
+
814
+ def render(self, labels=True):
815
+ self._run(render=True, labels=labels) # render results
816
+ return self.ims
817
+
818
+ def pandas(self):
819
+ # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
820
+ new = copy(self) # return copy
821
+ ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
822
+ cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
823
+ for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
824
+ a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
825
+ setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
826
+ return new
827
+
828
+ def tolist(self):
829
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
830
+ r = range(self.n) # iterable
831
+ x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
832
+ # for d in x:
833
+ # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
834
+ # setattr(d, k, getattr(d, k)[0]) # pop out of list
835
+ return x
836
+
837
+ def print(self):
838
+ LOGGER.info(self.__str__())
839
+
840
+ def __len__(self): # override len(results)
841
+ return self.n
842
+
843
+ def __str__(self): # override print(results)
844
+ return self._run(pprint=True) # print results
845
+
846
+ def __repr__(self):
847
+ return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
848
+
849
+
850
+ class Proto(nn.Module):
851
+ # YOLOv5 mask Proto module for segmentation models
852
+ def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
853
+ super().__init__()
854
+ self.cv1 = Conv(c1, c_, k=3)
855
+ self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
856
+ self.cv2 = Conv(c_, c_, k=3)
857
+ self.cv3 = Conv(c_, c2)
858
+
859
+ def forward(self, x):
860
+ return self.cv3(self.cv2(self.upsample(self.cv1(x))))
861
+
862
+
863
+ class Classify(nn.Module):
864
+ # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
865
+ def __init__(self,
866
+ c1,
867
+ c2,
868
+ k=1,
869
+ s=1,
870
+ p=None,
871
+ g=1,
872
+ dropout_p=0.0): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
873
+ super().__init__()
874
+ c_ = 1280 # efficientnet_b0 size
875
+ self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
876
+ self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
877
+ self.drop = nn.Dropout(p=dropout_p, inplace=True)
878
+ self.linear = nn.Linear(c_, c2) # to x(b,c2)
879
+
880
+ def forward(self, x):
881
+ if isinstance(x, list):
882
+ x = torch.cat(x, 1)
883
+ return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
884
+
885
+
886
+ class CAM(nn.Module):
887
+ def __init__(self, inc, fusion='weight'):
888
+ super().__init__()
889
+
890
+ assert fusion in ['weight', 'adaptive', 'concat'] # δΈ‰η§θžεˆζ–ΉεΌ
891
+ self.fusion = fusion
892
+
893
+ self.conv1 = Conv(inc, inc, 3, 1, None, 1, 1)
894
+ self.conv2 = Conv(inc, inc, 3, 1, None, 1, 3)
895
+ self.conv3 = Conv(inc, inc, 3, 1, None, 1, 5)
896
+
897
+ self.fusion_1 = Conv(inc, inc, 1)
898
+ self.fusion_2 = Conv(inc, inc, 1)
899
+ self.fusion_3 = Conv(inc, inc, 1)
900
+
901
+ if self.fusion == 'adaptive':
902
+ self.fusion_4 = Conv(inc * 3, 3, 1)
903
+
904
+ def forward(self, x):
905
+ x1 = self.conv1(x)
906
+ x2 = self.conv2(x)
907
+ x3 = self.conv3(x)
908
+
909
+ if self.fusion == 'weight':
910
+ return self.fusion_1(x1) + self.fusion_2(x2) + self.fusion_3(x3)
911
+ elif self.fusion == 'adaptive':
912
+ fusion = torch.softmax(
913
+ self.fusion_4(torch.cat([self.fusion_1(x1), self.fusion_2(x2), self.fusion_3(x3)], dim=1)), dim=1)
914
+ x1_weight, x2_weight, x3_weight = torch.split(fusion, [1, 1, 1], dim=1)
915
+ return x1 * x1_weight + x2 * x2_weight + x3 * x3_weight
916
+ else:
917
+ return torch.cat([self.fusion_1(x1), self.fusion_2(x2), self.fusion_3(x3)], dim=1)
918
+
models/experimental.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+ """
3
+ Experimental modules
4
+ """
5
+ import math
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+
11
+ from utils.downloads import attempt_download
12
+
13
+
14
+ class Sum(nn.Module):
15
+ # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
16
+ def __init__(self, n, weight=False): # n: number of inputs
17
+ super().__init__()
18
+ self.weight = weight # apply weights boolean
19
+ self.iter = range(n - 1) # iter object
20
+ if weight:
21
+ self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
22
+
23
+ def forward(self, x):
24
+ y = x[0] # no weight
25
+ if self.weight:
26
+ w = torch.sigmoid(self.w) * 2
27
+ for i in self.iter:
28
+ y = y + x[i + 1] * w[i]
29
+ else:
30
+ for i in self.iter:
31
+ y = y + x[i + 1]
32
+ return y
33
+
34
+
35
+ class MixConv2d(nn.Module):
36
+ # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
37
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
38
+ super().__init__()
39
+ n = len(k) # number of convolutions
40
+ if equal_ch: # equal c_ per group
41
+ i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
42
+ c_ = [(i == g).sum() for g in range(n)] # intermediate channels
43
+ else: # equal weight.numel() per group
44
+ b = [c2] + [0] * n
45
+ a = np.eye(n + 1, n, k=-1)
46
+ a -= np.roll(a, 1, axis=1)
47
+ a *= np.array(k) ** 2
48
+ a[0] = 1
49
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
50
+
51
+ self.m = nn.ModuleList([
52
+ nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
53
+ self.bn = nn.BatchNorm2d(c2)
54
+ self.act = nn.SiLU()
55
+
56
+ def forward(self, x):
57
+ return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
58
+
59
+
60
+ class Ensemble(nn.ModuleList):
61
+ # Ensemble of models
62
+ def __init__(self):
63
+ super().__init__()
64
+
65
+ def forward(self, x, augment=False, profile=False, visualize=False):
66
+ y = [module(x, augment, profile, visualize)[0] for module in self]
67
+ # y = torch.stack(y).max(0)[0] # max ensemble
68
+ # y = torch.stack(y).mean(0) # mean ensemble
69
+ y = torch.cat(y, 1) # nms ensemble
70
+ return y, None # inference, train output
71
+
72
+
73
+ def attempt_load(weights, device=None, inplace=True, fuse=True):
74
+ # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
75
+ from models.yolo import Detect, Model
76
+
77
+ model = Ensemble()
78
+ for w in weights if isinstance(weights, list) else [weights]:
79
+ ckpt = torch.load(attempt_download(w), map_location='cpu') # load
80
+ ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
81
+
82
+ # Model compatibility updates
83
+ if not hasattr(ckpt, 'stride'):
84
+ ckpt.stride = torch.tensor([32.])
85
+ if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
86
+ ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
87
+
88
+ model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
89
+
90
+ # Module updates
91
+ for m in model.modules():
92
+ t = type(m)
93
+ if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
94
+ m.inplace = inplace
95
+ if t is Detect and not isinstance(m.anchor_grid, list):
96
+ delattr(m, 'anchor_grid')
97
+ setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
98
+ elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
99
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
100
+
101
+ # Return model
102
+ if len(model) == 1:
103
+ return model[-1]
104
+
105
+ # Return detection ensemble
106
+ print(f'Ensemble created with {weights}\n')
107
+ for k in 'names', 'nc', 'yaml':
108
+ setattr(model, k, getattr(model[0], k))
109
+ model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
110
+ assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
111
+ return model
models/hub/anchors.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+ # Default anchors for COCO data
3
+
4
+
5
+ # P5 -------------------------------------------------------------------------------------------------------------------
6
+ # P5-640:
7
+ anchors_p5_640:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+
13
+ # P6 -------------------------------------------------------------------------------------------------------------------
14
+ # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
15
+ anchors_p6_640:
16
+ - [9,11, 21,19, 17,41] # P3/8
17
+ - [43,32, 39,70, 86,64] # P4/16
18
+ - [65,131, 134,130, 120,265] # P5/32
19
+ - [282,180, 247,354, 512,387] # P6/64
20
+
21
+ # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
22
+ anchors_p6_1280:
23
+ - [19,27, 44,40, 38,94] # P3/8
24
+ - [96,68, 86,152, 180,137] # P4/16
25
+ - [140,301, 303,264, 238,542] # P5/32
26
+ - [436,615, 739,380, 925,792] # P6/64
27
+
28
+ # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
29
+ anchors_p6_1920:
30
+ - [28,41, 67,59, 57,141] # P3/8
31
+ - [144,103, 129,227, 270,205] # P4/16
32
+ - [209,452, 455,396, 358,812] # P5/32
33
+ - [653,922, 1109,570, 1387,1187] # P6/64
34
+
35
+
36
+ # P7 -------------------------------------------------------------------------------------------------------------------
37
+ # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
38
+ anchors_p7_640:
39
+ - [11,11, 13,30, 29,20] # P3/8
40
+ - [30,46, 61,38, 39,92] # P4/16
41
+ - [78,80, 146,66, 79,163] # P5/32
42
+ - [149,150, 321,143, 157,303] # P6/64
43
+ - [257,402, 359,290, 524,372] # P7/128
44
+
45
+ # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
46
+ anchors_p7_1280:
47
+ - [19,22, 54,36, 32,77] # P3/8
48
+ - [70,83, 138,71, 75,173] # P4/16
49
+ - [165,159, 148,334, 375,151] # P5/32
50
+ - [334,317, 251,626, 499,474] # P6/64
51
+ - [750,326, 534,814, 1079,818] # P7/128
52
+
53
+ # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
54
+ anchors_p7_1920:
55
+ - [29,34, 81,55, 47,115] # P3/8
56
+ - [105,124, 207,107, 113,259] # P4/16
57
+ - [247,238, 222,500, 563,227] # P5/32
58
+ - [501,476, 376,939, 749,711] # P6/64
59
+ - [1126,489, 801,1222, 1618,1227] # P7/128
models/hub/yolov3-spp.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3-SPP head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, SPP, [512, [5, 9, 13]]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
models/hub/yolov3-tiny.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,14, 23,27, 37,58] # P4/16
9
+ - [81,82, 135,169, 344,319] # P5/32
10
+
11
+ # YOLOv3-tiny backbone
12
+ backbone:
13
+ # [from, number, module, args]
14
+ [[-1, 1, Conv, [16, 3, 1]], # 0
15
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
16
+ [-1, 1, Conv, [32, 3, 1]],
17
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
18
+ [-1, 1, Conv, [64, 3, 1]],
19
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
20
+ [-1, 1, Conv, [128, 3, 1]],
21
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
22
+ [-1, 1, Conv, [256, 3, 1]],
23
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
24
+ [-1, 1, Conv, [512, 3, 1]],
25
+ [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
26
+ [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
27
+ ]
28
+
29
+ # YOLOv3-tiny head
30
+ head:
31
+ [[-1, 1, Conv, [1024, 3, 1]],
32
+ [-1, 1, Conv, [256, 1, 1]],
33
+ [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
34
+
35
+ [-2, 1, Conv, [128, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
38
+ [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
39
+
40
+ [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
41
+ ]
models/hub/yolov3.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3 head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, Conv, [512, 1, 1]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
models/hub/yolov5-bifpn.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 BiFPN head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48
+ ]
models/hub/yolov5-fpn.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 FPN head
28
+ head:
29
+ [[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
30
+
31
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 3, C3, [512, False]], # 14 (P4/16-medium)
35
+
36
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 3, C3, [256, False]], # 18 (P3/8-small)
40
+
41
+ [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42
+ ]
models/hub/yolov5-p2.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14
+ [-1, 3, C3, [128]],
15
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16
+ [-1, 6, C3, [256]],
17
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18
+ [-1, 9, C3, [512]],
19
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
20
+ [-1, 3, C3, [1024]],
21
+ [-1, 1, SPPF, [1024, 5]], # 9
22
+ ]
23
+
24
+ # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
25
+ head:
26
+ [[-1, 1, Conv, [512, 1, 1]],
27
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
29
+ [-1, 3, C3, [512, False]], # 13
30
+
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
34
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
35
+
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 2], 1, Concat, [1]], # cat backbone P2
39
+ [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
40
+
41
+ [-1, 1, Conv, [128, 3, 2]],
42
+ [[-1, 18], 1, Concat, [1]], # cat head P3
43
+ [-1, 3, C3, [256, False]], # 24 (P3/8-small)
44
+
45
+ [-1, 1, Conv, [256, 3, 2]],
46
+ [[-1, 14], 1, Concat, [1]], # cat head P4
47
+ [-1, 3, C3, [512, False]], # 27 (P4/16-medium)
48
+
49
+ [-1, 1, Conv, [512, 3, 2]],
50
+ [[-1, 10], 1, Concat, [1]], # cat head P5
51
+ [-1, 3, C3, [1024, False]], # 30 (P5/32-large)
52
+
53
+ [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
54
+ ]
models/hub/yolov5-p34.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
13
+ [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
14
+ [ -1, 3, C3, [ 128 ] ],
15
+ [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
16
+ [ -1, 6, C3, [ 256 ] ],
17
+ [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
18
+ [ -1, 9, C3, [ 512 ] ],
19
+ [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
20
+ [ -1, 3, C3, [ 1024 ] ],
21
+ [ -1, 1, SPPF, [ 1024, 5 ] ], # 9
22
+ ]
23
+
24
+ # YOLOv5 v6.0 head with (P3, P4) outputs
25
+ head:
26
+ [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
27
+ [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
28
+ [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
29
+ [ -1, 3, C3, [ 512, False ] ], # 13
30
+
31
+ [ -1, 1, Conv, [ 256, 1, 1 ] ],
32
+ [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
33
+ [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
34
+ [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
35
+
36
+ [ -1, 1, Conv, [ 256, 3, 2 ] ],
37
+ [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
38
+ [ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
39
+
40
+ [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
41
+ ]
models/hub/yolov5-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14
+ [-1, 3, C3, [128]],
15
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16
+ [-1, 6, C3, [256]],
17
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18
+ [-1, 9, C3, [512]],
19
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20
+ [-1, 3, C3, [768]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22
+ [-1, 3, C3, [1024]],
23
+ [-1, 1, SPPF, [1024, 5]], # 11
24
+ ]
25
+
26
+ # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
27
+ head:
28
+ [[-1, 1, Conv, [768, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
31
+ [-1, 3, C3, [768, False]], # 15
32
+
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
36
+ [-1, 3, C3, [512, False]], # 19
37
+
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
41
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
42
+
43
+ [-1, 1, Conv, [256, 3, 2]],
44
+ [[-1, 20], 1, Concat, [1]], # cat head P4
45
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
46
+
47
+ [-1, 1, Conv, [512, 3, 2]],
48
+ [[-1, 16], 1, Concat, [1]], # cat head P5
49
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
50
+
51
+ [-1, 1, Conv, [768, 3, 2]],
52
+ [[-1, 12], 1, Concat, [1]], # cat head P6
53
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
54
+
55
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
56
+ ]
models/hub/yolov5-p7.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14
+ [-1, 3, C3, [128]],
15
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16
+ [-1, 6, C3, [256]],
17
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18
+ [-1, 9, C3, [512]],
19
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20
+ [-1, 3, C3, [768]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22
+ [-1, 3, C3, [1024]],
23
+ [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
24
+ [-1, 3, C3, [1280]],
25
+ [-1, 1, SPPF, [1280, 5]], # 13
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
29
+ head:
30
+ [[-1, 1, Conv, [1024, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32
+ [[-1, 10], 1, Concat, [1]], # cat backbone P6
33
+ [-1, 3, C3, [1024, False]], # 17
34
+
35
+ [-1, 1, Conv, [768, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
38
+ [-1, 3, C3, [768, False]], # 21
39
+
40
+ [-1, 1, Conv, [512, 1, 1]],
41
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
43
+ [-1, 3, C3, [512, False]], # 25
44
+
45
+ [-1, 1, Conv, [256, 1, 1]],
46
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
47
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
48
+ [-1, 3, C3, [256, False]], # 29 (P3/8-small)
49
+
50
+ [-1, 1, Conv, [256, 3, 2]],
51
+ [[-1, 26], 1, Concat, [1]], # cat head P4
52
+ [-1, 3, C3, [512, False]], # 32 (P4/16-medium)
53
+
54
+ [-1, 1, Conv, [512, 3, 2]],
55
+ [[-1, 22], 1, Concat, [1]], # cat head P5
56
+ [-1, 3, C3, [768, False]], # 35 (P5/32-large)
57
+
58
+ [-1, 1, Conv, [768, 3, 2]],
59
+ [[-1, 18], 1, Concat, [1]], # cat head P6
60
+ [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
61
+
62
+ [-1, 1, Conv, [1024, 3, 2]],
63
+ [[-1, 14], 1, Concat, [1]], # cat head P7
64
+ [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
65
+
66
+ [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
67
+ ]
models/hub/yolov5-panet.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 PANet head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48
+ ]
models/hub/yolov5l6.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [19,27, 44,40, 38,94] # P3/8
9
+ - [96,68, 86,152, 180,137] # P4/16
10
+ - [140,301, 303,264, 238,542] # P5/32
11
+ - [436,615, 739,380, 925,792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [768]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26
+ [-1, 3, C3, [1024]],
27
+ [-1, 1, SPPF, [1024, 5]], # 11
28
+ ]
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ [[-1, 1, Conv, [768, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
35
+ [-1, 3, C3, [768, False]], # 15
36
+
37
+ [-1, 1, Conv, [512, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 3, C3, [512, False]], # 19
41
+
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
45
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
+
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, 20], 1, Concat, [1]], # cat head P4
49
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
+
51
+ [-1, 1, Conv, [512, 3, 2]],
52
+ [[-1, 16], 1, Concat, [1]], # cat head P5
53
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
+
55
+ [-1, 1, Conv, [768, 3, 2]],
56
+ [[-1, 12], 1, Concat, [1]], # cat head P6
57
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
+
59
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60
+ ]
models/hub/yolov5m6.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.67 # model depth multiple
6
+ width_multiple: 0.75 # layer channel multiple
7
+ anchors:
8
+ - [19,27, 44,40, 38,94] # P3/8
9
+ - [96,68, 86,152, 180,137] # P4/16
10
+ - [140,301, 303,264, 238,542] # P5/32
11
+ - [436,615, 739,380, 925,792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [768]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26
+ [-1, 3, C3, [1024]],
27
+ [-1, 1, SPPF, [1024, 5]], # 11
28
+ ]
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ [[-1, 1, Conv, [768, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
35
+ [-1, 3, C3, [768, False]], # 15
36
+
37
+ [-1, 1, Conv, [512, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 3, C3, [512, False]], # 19
41
+
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
45
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
+
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, 20], 1, Concat, [1]], # cat head P4
49
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
+
51
+ [-1, 1, Conv, [512, 3, 2]],
52
+ [[-1, 16], 1, Concat, [1]], # cat head P5
53
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
+
55
+ [-1, 1, Conv, [768, 3, 2]],
56
+ [[-1, 12], 1, Concat, [1]], # cat head P6
57
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
+
59
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60
+ ]
models/hub/yolov5n6.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.25 # layer channel multiple
7
+ anchors:
8
+ - [19,27, 44,40, 38,94] # P3/8
9
+ - [96,68, 86,152, 180,137] # P4/16
10
+ - [140,301, 303,264, 238,542] # P5/32
11
+ - [436,615, 739,380, 925,792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [768]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26
+ [-1, 3, C3, [1024]],
27
+ [-1, 1, SPPF, [1024, 5]], # 11
28
+ ]
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ [[-1, 1, Conv, [768, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
35
+ [-1, 3, C3, [768, False]], # 15
36
+
37
+ [-1, 1, Conv, [512, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 3, C3, [512, False]], # 19
41
+
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
45
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
+
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, 20], 1, Concat, [1]], # cat head P4
49
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
+
51
+ [-1, 1, Conv, [512, 3, 2]],
52
+ [[-1, 16], 1, Concat, [1]], # cat head P5
53
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
+
55
+ [-1, 1, Conv, [768, 3, 2]],
56
+ [[-1, 12], 1, Concat, [1]], # cat head P6
57
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
+
59
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60
+ ]
models/hub/yolov5s-LeakyReLU.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
6
+ depth_multiple: 0.33 # model depth multiple
7
+ width_multiple: 0.50 # layer channel multiple
8
+ anchors:
9
+ - [10,13, 16,30, 33,23] # P3/8
10
+ - [30,61, 62,45, 59,119] # P4/16
11
+ - [116,90, 156,198, 373,326] # P5/32
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head:
30
+ [[-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/hub/yolov5s-ghost.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3Ghost, [128]],
18
+ [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3Ghost, [256]],
20
+ [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3Ghost, [512]],
22
+ [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3Ghost, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head
28
+ head:
29
+ [[-1, 1, GhostConv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3Ghost, [512, False]], # 13
33
+
34
+ [-1, 1, GhostConv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, GhostConv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, GhostConv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48
+ ]
models/hub/yolov5s-transformer.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48
+ ]
models/hub/yolov5s6.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [19,27, 44,40, 38,94] # P3/8
9
+ - [96,68, 86,152, 180,137] # P4/16
10
+ - [140,301, 303,264, 238,542] # P5/32
11
+ - [436,615, 739,380, 925,792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [768]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26
+ [-1, 3, C3, [1024]],
27
+ [-1, 1, SPPF, [1024, 5]], # 11
28
+ ]
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ [[-1, 1, Conv, [768, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
35
+ [-1, 3, C3, [768, False]], # 15
36
+
37
+ [-1, 1, Conv, [512, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 3, C3, [512, False]], # 19
41
+
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
45
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
+
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, 20], 1, Concat, [1]], # cat head P4
49
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
+
51
+ [-1, 1, Conv, [512, 3, 2]],
52
+ [[-1, 16], 1, Concat, [1]], # cat head P5
53
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
+
55
+ [-1, 1, Conv, [768, 3, 2]],
56
+ [[-1, 12], 1, Concat, [1]], # cat head P6
57
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
+
59
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60
+ ]
models/hub/yolov5x6.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.33 # model depth multiple
6
+ width_multiple: 1.25 # layer channel multiple
7
+ anchors:
8
+ - [19,27, 44,40, 38,94] # P3/8
9
+ - [96,68, 86,152, 180,137] # P4/16
10
+ - [140,301, 303,264, 238,542] # P5/32
11
+ - [436,615, 739,380, 925,792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [768]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26
+ [-1, 3, C3, [1024]],
27
+ [-1, 1, SPPF, [1024, 5]], # 11
28
+ ]
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ [[-1, 1, Conv, [768, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
35
+ [-1, 3, C3, [768, False]], # 15
36
+
37
+ [-1, 1, Conv, [512, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 3, C3, [512, False]], # 19
41
+
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
45
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46
+
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, 20], 1, Concat, [1]], # cat head P4
49
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50
+
51
+ [-1, 1, Conv, [512, 3, 2]],
52
+ [[-1, 16], 1, Concat, [1]], # cat head P5
53
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54
+
55
+ [-1, 1, Conv, [768, 3, 2]],
56
+ [[-1, 12], 1, Concat, [1]], # cat head P6
57
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58
+
59
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60
+ ]
models/segment/yolov5l-seg.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48
+ ]
models/segment/yolov5m-seg.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.67 # model depth multiple
6
+ width_multiple: 0.75 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48
+ ]
models/segment/yolov5n-seg.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 πŸš€ by Ultralytics, AGPL-3.0 license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.25 # layer channel multiple
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17
+ [-1, 3, C3, [128]],
18
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19
+ [-1, 6, C3, [256]],
20
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21
+ [-1, 9, C3, [512]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 9
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head
28
+ head:
29
+ [[-1, 1, Conv, [512, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
32
+ [-1, 3, C3, [512, False]], # 13
33
+
34
+ [-1, 1, Conv, [256, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
37
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38
+
39
+ [-1, 1, Conv, [256, 3, 2]],
40
+ [[-1, 14], 1, Concat, [1]], # cat head P4
41
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42
+
43
+ [-1, 1, Conv, [512, 3, 2]],
44
+ [[-1, 10], 1, Concat, [1]], # cat head P5
45
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46
+
47
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48
+ ]