Aditya9790 commited on
Commit
8c91daa
1 Parent(s): 226eb53

Upload 103 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. LICENSE.md +674 -0
  3. Tensor go assignment .mp4 +3 -0
  4. cfg/baseline/r50-csp.yaml +49 -0
  5. cfg/baseline/x50-csp.yaml +49 -0
  6. cfg/baseline/yolor-csp-x.yaml +52 -0
  7. cfg/baseline/yolor-csp.yaml +52 -0
  8. cfg/baseline/yolor-d6.yaml +63 -0
  9. cfg/baseline/yolor-e6.yaml +63 -0
  10. cfg/baseline/yolor-p6.yaml +63 -0
  11. cfg/baseline/yolor-w6.yaml +63 -0
  12. cfg/baseline/yolov3-spp.yaml +51 -0
  13. cfg/baseline/yolov3.yaml +51 -0
  14. cfg/baseline/yolov4-csp.yaml +52 -0
  15. cfg/deploy/yolov7-d6.yaml +202 -0
  16. cfg/deploy/yolov7-e6.yaml +180 -0
  17. cfg/deploy/yolov7-e6e.yaml +301 -0
  18. cfg/deploy/yolov7-tiny-silu.yaml +112 -0
  19. cfg/deploy/yolov7-tiny.yaml +112 -0
  20. cfg/deploy/yolov7-w6.yaml +158 -0
  21. cfg/deploy/yolov7.yaml +140 -0
  22. cfg/deploy/yolov7x.yaml +156 -0
  23. cfg/training/yolov7-d6.yaml +207 -0
  24. cfg/training/yolov7-e6.yaml +185 -0
  25. cfg/training/yolov7-e6e.yaml +306 -0
  26. cfg/training/yolov7-tiny.yaml +112 -0
  27. cfg/training/yolov7-w6.yaml +163 -0
  28. cfg/training/yolov7.yaml +140 -0
  29. cfg/training/yolov7x.yaml +156 -0
  30. data/coco.yaml +23 -0
  31. data/hyp.scratch.custom.yaml +31 -0
  32. data/hyp.scratch.p5.yaml +31 -0
  33. data/hyp.scratch.p6.yaml +31 -0
  34. data/hyp.scratch.tiny.yaml +31 -0
  35. deploy/triton-inference-server/README.md +164 -0
  36. deploy/triton-inference-server/boundingbox.py +33 -0
  37. deploy/triton-inference-server/client.py +334 -0
  38. deploy/triton-inference-server/data/dog.jpg +0 -0
  39. deploy/triton-inference-server/data/dog_result.jpg +0 -0
  40. deploy/triton-inference-server/labels.py +83 -0
  41. deploy/triton-inference-server/processing.py +51 -0
  42. deploy/triton-inference-server/render.py +110 -0
  43. detect.py +196 -0
  44. detect_or_track.py +285 -0
  45. export.py +205 -0
  46. figure/horses_prediction.jpg +0 -0
  47. figure/mask.png +0 -0
  48. figure/performance.png +0 -0
  49. figure/pose.png +0 -0
  50. figure/tennis.jpg +0 -0
.gitattributes CHANGED
@@ -32,3 +32,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ paper/yolov7.pdf filter=lfs diff=lfs merge=lfs -text
36
+ street.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ Tensor[[:space:]]go[[:space:]]assignment[[:space:]].mp4 filter=lfs diff=lfs merge=lfs -text
38
+ tools/YOLOv7-Dynamic-Batch-TENSORRT.ipynb filter=lfs diff=lfs merge=lfs -text
LICENSE.md ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU General Public License is a free, copyleft license for
11
+ software and other kinds of works.
12
+
13
+ The licenses for most software and other practical works are designed
14
+ to take away your freedom to share and change the works. By contrast,
15
+ the GNU General Public License is intended to guarantee your freedom to
16
+ share and change all versions of a program--to make sure it remains free
17
+ software for all its users. We, the Free Software Foundation, use the
18
+ GNU General Public License for most of our software; it applies also to
19
+ any other work released this way by its authors. You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ them if you wish), that you receive source code or can get it if you
26
+ want it, that you can change the software or use pieces of it in new
27
+ free programs, and that you know you can do these things.
28
+
29
+ To protect your rights, we need to prevent others from denying you
30
+ these rights or asking you to surrender the rights. Therefore, you have
31
+ certain responsibilities if you distribute copies of the software, or if
32
+ you modify it: responsibilities to respect the freedom of others.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must pass on to the recipients the same
36
+ freedoms that you received. You must make sure that they, too, receive
37
+ or can get the source code. And you must show them these terms so they
38
+ know their rights.
39
+
40
+ Developers that use the GNU GPL protect your rights with two steps:
41
+ (1) assert copyright on the software, and (2) offer you this License
42
+ giving you legal permission to copy, distribute and/or modify it.
43
+
44
+ For the developers' and authors' protection, the GPL clearly explains
45
+ that there is no warranty for this free software. For both users' and
46
+ authors' sake, the GPL requires that modified versions be marked as
47
+ changed, so that their problems will not be attributed erroneously to
48
+ authors of previous versions.
49
+
50
+ Some devices are designed to deny users access to install or run
51
+ modified versions of the software inside them, although the manufacturer
52
+ can do so. This is fundamentally incompatible with the aim of
53
+ protecting users' freedom to change the software. The systematic
54
+ pattern of such abuse occurs in the area of products for individuals to
55
+ use, which is precisely where it is most unacceptable. Therefore, we
56
+ have designed this version of the GPL to prohibit the practice for those
57
+ products. If such problems arise substantially in other domains, we
58
+ stand ready to extend this provision to those domains in future versions
59
+ of the GPL, as needed to protect the freedom of users.
60
+
61
+ Finally, every program is threatened constantly by software patents.
62
+ States should not allow patents to restrict development and use of
63
+ software on general-purpose computers, but in those that do, we wish to
64
+ avoid the special danger that patents applied to a free program could
65
+ make it effectively proprietary. To prevent this, the GPL assures that
66
+ patents cannot be used to render the program non-free.
67
+
68
+ The precise terms and conditions for copying, distribution and
69
+ modification follow.
70
+
71
+ TERMS AND CONDITIONS
72
+
73
+ 0. Definitions.
74
+
75
+ "This License" refers to version 3 of the GNU General Public License.
76
+
77
+ "Copyright" also means copyright-like laws that apply to other kinds of
78
+ works, such as semiconductor masks.
79
+
80
+ "The Program" refers to any copyrightable work licensed under this
81
+ License. Each licensee is addressed as "you". "Licensees" and
82
+ "recipients" may be individuals or organizations.
83
+
84
+ To "modify" a work means to copy from or adapt all or part of the work
85
+ in a fashion requiring copyright permission, other than the making of an
86
+ exact copy. The resulting work is called a "modified version" of the
87
+ earlier work or a work "based on" the earlier work.
88
+
89
+ A "covered work" means either the unmodified Program or a work based
90
+ on the Program.
91
+
92
+ To "propagate" a work means to do anything with it that, without
93
+ permission, would make you directly or secondarily liable for
94
+ infringement under applicable copyright law, except executing it on a
95
+ computer or modifying a private copy. Propagation includes copying,
96
+ distribution (with or without modification), making available to the
97
+ public, and in some countries other activities as well.
98
+
99
+ To "convey" a work means any kind of propagation that enables other
100
+ parties to make or receive copies. Mere interaction with a user through
101
+ a computer network, with no transfer of a copy, is not conveying.
102
+
103
+ An interactive user interface displays "Appropriate Legal Notices"
104
+ to the extent that it includes a convenient and prominently visible
105
+ feature that (1) displays an appropriate copyright notice, and (2)
106
+ tells the user that there is no warranty for the work (except to the
107
+ extent that warranties are provided), that licensees may convey the
108
+ work under this License, and how to view a copy of this License. If
109
+ the interface presents a list of user commands or options, such as a
110
+ menu, a prominent item in the list meets this criterion.
111
+
112
+ 1. Source Code.
113
+
114
+ The "source code" for a work means the preferred form of the work
115
+ for making modifications to it. "Object code" means any non-source
116
+ form of a work.
117
+
118
+ A "Standard Interface" means an interface that either is an official
119
+ standard defined by a recognized standards body, or, in the case of
120
+ interfaces specified for a particular programming language, one that
121
+ is widely used among developers working in that language.
122
+
123
+ The "System Libraries" of an executable work include anything, other
124
+ than the work as a whole, that (a) is included in the normal form of
125
+ packaging a Major Component, but which is not part of that Major
126
+ Component, and (b) serves only to enable use of the work with that
127
+ Major Component, or to implement a Standard Interface for which an
128
+ implementation is available to the public in source code form. A
129
+ "Major Component", in this context, means a major essential component
130
+ (kernel, window system, and so on) of the specific operating system
131
+ (if any) on which the executable work runs, or a compiler used to
132
+ produce the work, or an object code interpreter used to run it.
133
+
134
+ The "Corresponding Source" for a work in object code form means all
135
+ the source code needed to generate, install, and (for an executable
136
+ work) run the object code and to modify the work, including scripts to
137
+ control those activities. However, it does not include the work's
138
+ System Libraries, or general-purpose tools or generally available free
139
+ programs which are used unmodified in performing those activities but
140
+ which are not part of the work. For example, Corresponding Source
141
+ includes interface definition files associated with source files for
142
+ the work, and the source code for shared libraries and dynamically
143
+ linked subprograms that the work is specifically designed to require,
144
+ such as by intimate data communication or control flow between those
145
+ subprograms and other parts of the work.
146
+
147
+ The Corresponding Source need not include anything that users
148
+ can regenerate automatically from other parts of the Corresponding
149
+ Source.
150
+
151
+ The Corresponding Source for a work in source code form is that
152
+ same work.
153
+
154
+ 2. Basic Permissions.
155
+
156
+ All rights granted under this License are granted for the term of
157
+ copyright on the Program, and are irrevocable provided the stated
158
+ conditions are met. This License explicitly affirms your unlimited
159
+ permission to run the unmodified Program. The output from running a
160
+ covered work is covered by this License only if the output, given its
161
+ content, constitutes a covered work. This License acknowledges your
162
+ rights of fair use or other equivalent, as provided by copyright law.
163
+
164
+ You may make, run and propagate covered works that you do not
165
+ convey, without conditions so long as your license otherwise remains
166
+ in force. You may convey covered works to others for the sole purpose
167
+ of having them make modifications exclusively for you, or provide you
168
+ with facilities for running those works, provided that you comply with
169
+ the terms of this License in conveying all material for which you do
170
+ not control copyright. Those thus making or running the covered works
171
+ for you must do so exclusively on your behalf, under your direction
172
+ and control, on terms that prohibit them from making any copies of
173
+ your copyrighted material outside their relationship with you.
174
+
175
+ Conveying under any other circumstances is permitted solely under
176
+ the conditions stated below. Sublicensing is not allowed; section 10
177
+ makes it unnecessary.
178
+
179
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180
+
181
+ No covered work shall be deemed part of an effective technological
182
+ measure under any applicable law fulfilling obligations under article
183
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184
+ similar laws prohibiting or restricting circumvention of such
185
+ measures.
186
+
187
+ When you convey a covered work, you waive any legal power to forbid
188
+ circumvention of technological measures to the extent such circumvention
189
+ is effected by exercising rights under this License with respect to
190
+ the covered work, and you disclaim any intention to limit operation or
191
+ modification of the work as a means of enforcing, against the work's
192
+ users, your or third parties' legal rights to forbid circumvention of
193
+ technological measures.
194
+
195
+ 4. Conveying Verbatim Copies.
196
+
197
+ You may convey verbatim copies of the Program's source code as you
198
+ receive it, in any medium, provided that you conspicuously and
199
+ appropriately publish on each copy an appropriate copyright notice;
200
+ keep intact all notices stating that this License and any
201
+ non-permissive terms added in accord with section 7 apply to the code;
202
+ keep intact all notices of the absence of any warranty; and give all
203
+ recipients a copy of this License along with the Program.
204
+
205
+ You may charge any price or no price for each copy that you convey,
206
+ and you may offer support or warranty protection for a fee.
207
+
208
+ 5. Conveying Modified Source Versions.
209
+
210
+ You may convey a work based on the Program, or the modifications to
211
+ produce it from the Program, in the form of source code under the
212
+ terms of section 4, provided that you also meet all of these conditions:
213
+
214
+ a) The work must carry prominent notices stating that you modified
215
+ it, and giving a relevant date.
216
+
217
+ b) The work must carry prominent notices stating that it is
218
+ released under this License and any conditions added under section
219
+ 7. This requirement modifies the requirement in section 4 to
220
+ "keep intact all notices".
221
+
222
+ c) You must license the entire work, as a whole, under this
223
+ License to anyone who comes into possession of a copy. This
224
+ License will therefore apply, along with any applicable section 7
225
+ additional terms, to the whole of the work, and all its parts,
226
+ regardless of how they are packaged. This License gives no
227
+ permission to license the work in any other way, but it does not
228
+ invalidate such permission if you have separately received it.
229
+
230
+ d) If the work has interactive user interfaces, each must display
231
+ Appropriate Legal Notices; however, if the Program has interactive
232
+ interfaces that do not display Appropriate Legal Notices, your
233
+ work need not make them do so.
234
+
235
+ A compilation of a covered work with other separate and independent
236
+ works, which are not by their nature extensions of the covered work,
237
+ and which are not combined with it such as to form a larger program,
238
+ in or on a volume of a storage or distribution medium, is called an
239
+ "aggregate" if the compilation and its resulting copyright are not
240
+ used to limit the access or legal rights of the compilation's users
241
+ beyond what the individual works permit. Inclusion of a covered work
242
+ in an aggregate does not cause this License to apply to the other
243
+ parts of the aggregate.
244
+
245
+ 6. Conveying Non-Source Forms.
246
+
247
+ You may convey a covered work in object code form under the terms
248
+ of sections 4 and 5, provided that you also convey the
249
+ machine-readable Corresponding Source under the terms of this License,
250
+ in one of these ways:
251
+
252
+ a) Convey the object code in, or embodied in, a physical product
253
+ (including a physical distribution medium), accompanied by the
254
+ Corresponding Source fixed on a durable physical medium
255
+ customarily used for software interchange.
256
+
257
+ b) Convey the object code in, or embodied in, a physical product
258
+ (including a physical distribution medium), accompanied by a
259
+ written offer, valid for at least three years and valid for as
260
+ long as you offer spare parts or customer support for that product
261
+ model, to give anyone who possesses the object code either (1) a
262
+ copy of the Corresponding Source for all the software in the
263
+ product that is covered by this License, on a durable physical
264
+ medium customarily used for software interchange, for a price no
265
+ more than your reasonable cost of physically performing this
266
+ conveying of source, or (2) access to copy the
267
+ Corresponding Source from a network server at no charge.
268
+
269
+ c) Convey individual copies of the object code with a copy of the
270
+ written offer to provide the Corresponding Source. This
271
+ alternative is allowed only occasionally and noncommercially, and
272
+ only if you received the object code with such an offer, in accord
273
+ with subsection 6b.
274
+
275
+ d) Convey the object code by offering access from a designated
276
+ place (gratis or for a charge), and offer equivalent access to the
277
+ Corresponding Source in the same way through the same place at no
278
+ further charge. You need not require recipients to copy the
279
+ Corresponding Source along with the object code. If the place to
280
+ copy the object code is a network server, the Corresponding Source
281
+ may be on a different server (operated by you or a third party)
282
+ that supports equivalent copying facilities, provided you maintain
283
+ clear directions next to the object code saying where to find the
284
+ Corresponding Source. Regardless of what server hosts the
285
+ Corresponding Source, you remain obligated to ensure that it is
286
+ available for as long as needed to satisfy these requirements.
287
+
288
+ e) Convey the object code using peer-to-peer transmission, provided
289
+ you inform other peers where the object code and Corresponding
290
+ Source of the work are being offered to the general public at no
291
+ charge under subsection 6d.
292
+
293
+ A separable portion of the object code, whose source code is excluded
294
+ from the Corresponding Source as a System Library, need not be
295
+ included in conveying the object code work.
296
+
297
+ A "User Product" is either (1) a "consumer product", which means any
298
+ tangible personal property which is normally used for personal, family,
299
+ or household purposes, or (2) anything designed or sold for incorporation
300
+ into a dwelling. In determining whether a product is a consumer product,
301
+ doubtful cases shall be resolved in favor of coverage. For a particular
302
+ product received by a particular user, "normally used" refers to a
303
+ typical or common use of that class of product, regardless of the status
304
+ of the particular user or of the way in which the particular user
305
+ actually uses, or expects or is expected to use, the product. A product
306
+ is a consumer product regardless of whether the product has substantial
307
+ commercial, industrial or non-consumer uses, unless such uses represent
308
+ the only significant mode of use of the product.
309
+
310
+ "Installation Information" for a User Product means any methods,
311
+ procedures, authorization keys, or other information required to install
312
+ and execute modified versions of a covered work in that User Product from
313
+ a modified version of its Corresponding Source. The information must
314
+ suffice to ensure that the continued functioning of the modified object
315
+ code is in no case prevented or interfered with solely because
316
+ modification has been made.
317
+
318
+ If you convey an object code work under this section in, or with, or
319
+ specifically for use in, a User Product, and the conveying occurs as
320
+ part of a transaction in which the right of possession and use of the
321
+ User Product is transferred to the recipient in perpetuity or for a
322
+ fixed term (regardless of how the transaction is characterized), the
323
+ Corresponding Source conveyed under this section must be accompanied
324
+ by the Installation Information. But this requirement does not apply
325
+ if neither you nor any third party retains the ability to install
326
+ modified object code on the User Product (for example, the work has
327
+ been installed in ROM).
328
+
329
+ The requirement to provide Installation Information does not include a
330
+ requirement to continue to provide support service, warranty, or updates
331
+ for a work that has been modified or installed by the recipient, or for
332
+ the User Product in which it has been modified or installed. Access to a
333
+ network may be denied when the modification itself materially and
334
+ adversely affects the operation of the network or violates the rules and
335
+ protocols for communication across the network.
336
+
337
+ Corresponding Source conveyed, and Installation Information provided,
338
+ in accord with this section must be in a format that is publicly
339
+ documented (and with an implementation available to the public in
340
+ source code form), and must require no special password or key for
341
+ unpacking, reading or copying.
342
+
343
+ 7. Additional Terms.
344
+
345
+ "Additional permissions" are terms that supplement the terms of this
346
+ License by making exceptions from one or more of its conditions.
347
+ Additional permissions that are applicable to the entire Program shall
348
+ be treated as though they were included in this License, to the extent
349
+ that they are valid under applicable law. If additional permissions
350
+ apply only to part of the Program, that part may be used separately
351
+ under those permissions, but the entire Program remains governed by
352
+ this License without regard to the additional permissions.
353
+
354
+ When you convey a copy of a covered work, you may at your option
355
+ remove any additional permissions from that copy, or from any part of
356
+ it. (Additional permissions may be written to require their own
357
+ removal in certain cases when you modify the work.) You may place
358
+ additional permissions on material, added by you to a covered work,
359
+ for which you have or can give appropriate copyright permission.
360
+
361
+ Notwithstanding any other provision of this License, for material you
362
+ add to a covered work, you may (if authorized by the copyright holders of
363
+ that material) supplement the terms of this License with terms:
364
+
365
+ a) Disclaiming warranty or limiting liability differently from the
366
+ terms of sections 15 and 16 of this License; or
367
+
368
+ b) Requiring preservation of specified reasonable legal notices or
369
+ author attributions in that material or in the Appropriate Legal
370
+ Notices displayed by works containing it; or
371
+
372
+ c) Prohibiting misrepresentation of the origin of that material, or
373
+ requiring that modified versions of such material be marked in
374
+ reasonable ways as different from the original version; or
375
+
376
+ d) Limiting the use for publicity purposes of names of licensors or
377
+ authors of the material; or
378
+
379
+ e) Declining to grant rights under trademark law for use of some
380
+ trade names, trademarks, or service marks; or
381
+
382
+ f) Requiring indemnification of licensors and authors of that
383
+ material by anyone who conveys the material (or modified versions of
384
+ it) with contractual assumptions of liability to the recipient, for
385
+ any liability that these contractual assumptions directly impose on
386
+ those licensors and authors.
387
+
388
+ All other non-permissive additional terms are considered "further
389
+ restrictions" within the meaning of section 10. If the Program as you
390
+ received it, or any part of it, contains a notice stating that it is
391
+ governed by this License along with a term that is a further
392
+ restriction, you may remove that term. If a license document contains
393
+ a further restriction but permits relicensing or conveying under this
394
+ License, you may add to a covered work material governed by the terms
395
+ of that license document, provided that the further restriction does
396
+ not survive such relicensing or conveying.
397
+
398
+ If you add terms to a covered work in accord with this section, you
399
+ must place, in the relevant source files, a statement of the
400
+ additional terms that apply to those files, or a notice indicating
401
+ where to find the applicable terms.
402
+
403
+ Additional terms, permissive or non-permissive, may be stated in the
404
+ form of a separately written license, or stated as exceptions;
405
+ the above requirements apply either way.
406
+
407
+ 8. Termination.
408
+
409
+ You may not propagate or modify a covered work except as expressly
410
+ provided under this License. Any attempt otherwise to propagate or
411
+ modify it is void, and will automatically terminate your rights under
412
+ this License (including any patent licenses granted under the third
413
+ paragraph of section 11).
414
+
415
+ However, if you cease all violation of this License, then your
416
+ license from a particular copyright holder is reinstated (a)
417
+ provisionally, unless and until the copyright holder explicitly and
418
+ finally terminates your license, and (b) permanently, if the copyright
419
+ holder fails to notify you of the violation by some reasonable means
420
+ prior to 60 days after the cessation.
421
+
422
+ Moreover, your license from a particular copyright holder is
423
+ reinstated permanently if the copyright holder notifies you of the
424
+ violation by some reasonable means, this is the first time you have
425
+ received notice of violation of this License (for any work) from that
426
+ copyright holder, and you cure the violation prior to 30 days after
427
+ your receipt of the notice.
428
+
429
+ Termination of your rights under this section does not terminate the
430
+ licenses of parties who have received copies or rights from you under
431
+ this License. If your rights have been terminated and not permanently
432
+ reinstated, you do not qualify to receive new licenses for the same
433
+ material under section 10.
434
+
435
+ 9. Acceptance Not Required for Having Copies.
436
+
437
+ You are not required to accept this License in order to receive or
438
+ run a copy of the Program. Ancillary propagation of a covered work
439
+ occurring solely as a consequence of using peer-to-peer transmission
440
+ to receive a copy likewise does not require acceptance. However,
441
+ nothing other than this License grants you permission to propagate or
442
+ modify any covered work. These actions infringe copyright if you do
443
+ not accept this License. Therefore, by modifying or propagating a
444
+ covered work, you indicate your acceptance of this License to do so.
445
+
446
+ 10. Automatic Licensing of Downstream Recipients.
447
+
448
+ Each time you convey a covered work, the recipient automatically
449
+ receives a license from the original licensors, to run, modify and
450
+ propagate that work, subject to this License. You are not responsible
451
+ for enforcing compliance by third parties with this License.
452
+
453
+ An "entity transaction" is a transaction transferring control of an
454
+ organization, or substantially all assets of one, or subdividing an
455
+ organization, or merging organizations. If propagation of a covered
456
+ work results from an entity transaction, each party to that
457
+ transaction who receives a copy of the work also receives whatever
458
+ licenses to the work the party's predecessor in interest had or could
459
+ give under the previous paragraph, plus a right to possession of the
460
+ Corresponding Source of the work from the predecessor in interest, if
461
+ the predecessor has it or can get it with reasonable efforts.
462
+
463
+ You may not impose any further restrictions on the exercise of the
464
+ rights granted or affirmed under this License. For example, you may
465
+ not impose a license fee, royalty, or other charge for exercise of
466
+ rights granted under this License, and you may not initiate litigation
467
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
468
+ any patent claim is infringed by making, using, selling, offering for
469
+ sale, or importing the Program or any portion of it.
470
+
471
+ 11. Patents.
472
+
473
+ A "contributor" is a copyright holder who authorizes use under this
474
+ License of the Program or a work on which the Program is based. The
475
+ work thus licensed is called the contributor's "contributor version".
476
+
477
+ A contributor's "essential patent claims" are all patent claims
478
+ owned or controlled by the contributor, whether already acquired or
479
+ hereafter acquired, that would be infringed by some manner, permitted
480
+ by this License, of making, using, or selling its contributor version,
481
+ but do not include claims that would be infringed only as a
482
+ consequence of further modification of the contributor version. For
483
+ purposes of this definition, "control" includes the right to grant
484
+ patent sublicenses in a manner consistent with the requirements of
485
+ this License.
486
+
487
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
488
+ patent license under the contributor's essential patent claims, to
489
+ make, use, sell, offer for sale, import and otherwise run, modify and
490
+ propagate the contents of its contributor version.
491
+
492
+ In the following three paragraphs, a "patent license" is any express
493
+ agreement or commitment, however denominated, not to enforce a patent
494
+ (such as an express permission to practice a patent or covenant not to
495
+ sue for patent infringement). To "grant" such a patent license to a
496
+ party means to make such an agreement or commitment not to enforce a
497
+ patent against the party.
498
+
499
+ If you convey a covered work, knowingly relying on a patent license,
500
+ and the Corresponding Source of the work is not available for anyone
501
+ to copy, free of charge and under the terms of this License, through a
502
+ publicly available network server or other readily accessible means,
503
+ then you must either (1) cause the Corresponding Source to be so
504
+ available, or (2) arrange to deprive yourself of the benefit of the
505
+ patent license for this particular work, or (3) arrange, in a manner
506
+ consistent with the requirements of this License, to extend the patent
507
+ license to downstream recipients. "Knowingly relying" means you have
508
+ actual knowledge that, but for the patent license, your conveying the
509
+ covered work in a country, or your recipient's use of the covered work
510
+ in a country, would infringe one or more identifiable patents in that
511
+ country that you have reason to believe are valid.
512
+
513
+ If, pursuant to or in connection with a single transaction or
514
+ arrangement, you convey, or propagate by procuring conveyance of, a
515
+ covered work, and grant a patent license to some of the parties
516
+ receiving the covered work authorizing them to use, propagate, modify
517
+ or convey a specific copy of the covered work, then the patent license
518
+ you grant is automatically extended to all recipients of the covered
519
+ work and works based on it.
520
+
521
+ A patent license is "discriminatory" if it does not include within
522
+ the scope of its coverage, prohibits the exercise of, or is
523
+ conditioned on the non-exercise of one or more of the rights that are
524
+ specifically granted under this License. You may not convey a covered
525
+ work if you are a party to an arrangement with a third party that is
526
+ in the business of distributing software, under which you make payment
527
+ to the third party based on the extent of your activity of conveying
528
+ the work, and under which the third party grants, to any of the
529
+ parties who would receive the covered work from you, a discriminatory
530
+ patent license (a) in connection with copies of the covered work
531
+ conveyed by you (or copies made from those copies), or (b) primarily
532
+ for and in connection with specific products or compilations that
533
+ contain the covered work, unless you entered into that arrangement,
534
+ or that patent license was granted, prior to 28 March 2007.
535
+
536
+ Nothing in this License shall be construed as excluding or limiting
537
+ any implied license or other defenses to infringement that may
538
+ otherwise be available to you under applicable patent law.
539
+
540
+ 12. No Surrender of Others' Freedom.
541
+
542
+ If conditions are imposed on you (whether by court order, agreement or
543
+ otherwise) that contradict the conditions of this License, they do not
544
+ excuse you from the conditions of this License. If you cannot convey a
545
+ covered work so as to satisfy simultaneously your obligations under this
546
+ License and any other pertinent obligations, then as a consequence you may
547
+ not convey it at all. For example, if you agree to terms that obligate you
548
+ to collect a royalty for further conveying from those to whom you convey
549
+ the Program, the only way you could satisfy both those terms and this
550
+ License would be to refrain entirely from conveying the Program.
551
+
552
+ 13. Use with the GNU Affero General Public License.
553
+
554
+ Notwithstanding any other provision of this License, you have
555
+ permission to link or combine any covered work with a work licensed
556
+ under version 3 of the GNU Affero General Public License into a single
557
+ combined work, and to convey the resulting work. The terms of this
558
+ License will continue to apply to the part which is the covered work,
559
+ but the special requirements of the GNU Affero General Public License,
560
+ section 13, concerning interaction through a network will apply to the
561
+ combination as such.
562
+
563
+ 14. Revised Versions of this License.
564
+
565
+ The Free Software Foundation may publish revised and/or new versions of
566
+ the GNU General Public License from time to time. Such new versions will
567
+ be similar in spirit to the present version, but may differ in detail to
568
+ address new problems or concerns.
569
+
570
+ Each version is given a distinguishing version number. If the
571
+ Program specifies that a certain numbered version of the GNU General
572
+ Public License "or any later version" applies to it, you have the
573
+ option of following the terms and conditions either of that numbered
574
+ version or of any later version published by the Free Software
575
+ Foundation. If the Program does not specify a version number of the
576
+ GNU General Public License, you may choose any version ever published
577
+ by the Free Software Foundation.
578
+
579
+ If the Program specifies that a proxy can decide which future
580
+ versions of the GNU General Public License can be used, that proxy's
581
+ public statement of acceptance of a version permanently authorizes you
582
+ to choose that version for the Program.
583
+
584
+ Later license versions may give you additional or different
585
+ permissions. However, no additional obligations are imposed on any
586
+ author or copyright holder as a result of your choosing to follow a
587
+ later version.
588
+
589
+ 15. Disclaimer of Warranty.
590
+
591
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
+
600
+ 16. Limitation of Liability.
601
+
602
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
+ SUCH DAMAGES.
611
+
612
+ 17. Interpretation of Sections 15 and 16.
613
+
614
+ If the disclaimer of warranty and limitation of liability provided
615
+ above cannot be given local legal effect according to their terms,
616
+ reviewing courts shall apply local law that most closely approximates
617
+ an absolute waiver of all civil liability in connection with the
618
+ Program, unless a warranty or assumption of liability accompanies a
619
+ copy of the Program in return for a fee.
620
+
621
+ END OF TERMS AND CONDITIONS
622
+
623
+ How to Apply These Terms to Your New Programs
624
+
625
+ If you develop a new program, and you want it to be of the greatest
626
+ possible use to the public, the best way to achieve this is to make it
627
+ free software which everyone can redistribute and change under these terms.
628
+
629
+ To do so, attach the following notices to the program. It is safest
630
+ to attach them to the start of each source file to most effectively
631
+ state the exclusion of warranty; and each file should have at least
632
+ the "copyright" line and a pointer to where the full notice is found.
633
+
634
+ <one line to give the program's name and a brief idea of what it does.>
635
+ Copyright (C) <year> <name of author>
636
+
637
+ This program is free software: you can redistribute it and/or modify
638
+ it under the terms of the GNU General Public License as published by
639
+ the Free Software Foundation, either version 3 of the License, or
640
+ (at your option) any later version.
641
+
642
+ This program is distributed in the hope that it will be useful,
643
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
644
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
+ GNU General Public License for more details.
646
+
647
+ You should have received a copy of the GNU General Public License
648
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
649
+
650
+ Also add information on how to contact you by electronic and paper mail.
651
+
652
+ If the program does terminal interaction, make it output a short
653
+ notice like this when it starts in an interactive mode:
654
+
655
+ <program> Copyright (C) <year> <name of author>
656
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657
+ This is free software, and you are welcome to redistribute it
658
+ under certain conditions; type `show c' for details.
659
+
660
+ The hypothetical commands `show w' and `show c' should show the appropriate
661
+ parts of the General Public License. Of course, your program's commands
662
+ might be different; for a GUI interface, you would use an "about box".
663
+
664
+ You should also get your employer (if you work as a programmer) or school,
665
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
666
+ For more information on this, and how to apply and follow the GNU GPL, see
667
+ <https://www.gnu.org/licenses/>.
668
+
669
+ The GNU General Public License does not permit incorporating your program
670
+ into proprietary programs. If your program is a subroutine library, you
671
+ may consider it more useful to permit linking proprietary applications with
672
+ the library. If this is what you want to do, use the GNU Lesser General
673
+ Public License instead of this License. But first, please read
674
+ <https://www.gnu.org/licenses/why-not-lgpl.html>.
Tensor go assignment .mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceb2e4fae8a029004115a6e9e2f21a6f9d13541589c3347bfeb86591ea207a8
3
+ size 22437942
cfg/baseline/r50-csp.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-ResNet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Stem, [128]], # 0-P1/2
16
+ [-1, 3, ResCSPC, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
18
+ [-1, 4, ResCSPC, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 4-P3/8
20
+ [-1, 6, ResCSPC, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8
22
+ [-1, 3, ResCSPC, [1024]], # 7
23
+ ]
24
+
25
+ # CSP-Res-PAN head
26
+ head:
27
+ [[-1, 1, SPPCSPC, [512]], # 8
28
+ [-1, 1, Conv, [256, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [5, 1, Conv, [256, 1, 1]], # route backbone P4
31
+ [[-1, -2], 1, Concat, [1]],
32
+ [-1, 2, ResCSPB, [256]], # 13
33
+ [-1, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [3, 1, Conv, [128, 1, 1]], # route backbone P3
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 2, ResCSPB, [128]], # 18
38
+ [-1, 1, Conv, [256, 3, 1]],
39
+ [-2, 1, Conv, [256, 3, 2]],
40
+ [[-1, 13], 1, Concat, [1]], # cat
41
+ [-1, 2, ResCSPB, [256]], # 22
42
+ [-1, 1, Conv, [512, 3, 1]],
43
+ [-2, 1, Conv, [512, 3, 2]],
44
+ [[-1, 8], 1, Concat, [1]], # cat
45
+ [-1, 2, ResCSPB, [512]], # 26
46
+ [-1, 1, Conv, [1024, 3, 1]],
47
+
48
+ [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
cfg/baseline/x50-csp.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-ResNeXt backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Stem, [128]], # 0-P1/2
16
+ [-1, 3, ResXCSPC, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
18
+ [-1, 4, ResXCSPC, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 4-P3/8
20
+ [-1, 6, ResXCSPC, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8
22
+ [-1, 3, ResXCSPC, [1024]], # 7
23
+ ]
24
+
25
+ # CSP-ResX-PAN head
26
+ head:
27
+ [[-1, 1, SPPCSPC, [512]], # 8
28
+ [-1, 1, Conv, [256, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [5, 1, Conv, [256, 1, 1]], # route backbone P4
31
+ [[-1, -2], 1, Concat, [1]],
32
+ [-1, 2, ResXCSPB, [256]], # 13
33
+ [-1, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [3, 1, Conv, [128, 1, 1]], # route backbone P3
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 2, ResXCSPB, [128]], # 18
38
+ [-1, 1, Conv, [256, 3, 1]],
39
+ [-2, 1, Conv, [256, 3, 2]],
40
+ [[-1, 13], 1, Concat, [1]], # cat
41
+ [-1, 2, ResXCSPB, [256]], # 22
42
+ [-1, 1, Conv, [512, 3, 1]],
43
+ [-2, 1, Conv, [512, 3, 2]],
44
+ [[-1, 8], 1, Concat, [1]], # cat
45
+ [-1, 2, ResXCSPB, [512]], # 26
46
+ [-1, 1, Conv, [1024, 3, 1]],
47
+
48
+ [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
cfg/baseline/yolor-csp-x.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.33 # model depth multiple
4
+ width_multiple: 1.25 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/baseline/yolor-csp.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/baseline/yolor-d6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.25 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, DownC, [128]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, DownC, [256]], # 4-P3/8
21
+ [-1, 15, BottleneckCSPA, [256]],
22
+ [-1, 1, DownC, [512]], # 6-P4/16
23
+ [-1, 15, BottleneckCSPA, [512]],
24
+ [-1, 1, DownC, [768]], # 8-P5/32
25
+ [-1, 7, BottleneckCSPA, [768]],
26
+ [-1, 1, DownC, [1024]], # 10-P6/64
27
+ [-1, 7, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, DownC, [256]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, DownC, [384]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, DownC, [512]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-e6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.25 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, DownC, [128]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, DownC, [256]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, DownC, [512]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [512]],
24
+ [-1, 1, DownC, [768]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [768]],
26
+ [-1, 1, DownC, [1024]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, DownC, [256]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, DownC, [384]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, DownC, [512]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-p6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.0 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, Conv, [384, 3, 2]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [384]],
24
+ [-1, 1, Conv, [512, 3, 2]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [512]],
26
+ [-1, 1, Conv, [640, 3, 2]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [640]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [320]], # 12
33
+ [-1, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [256, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [256]], # 17
38
+ [-1, 1, Conv, [192, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [192, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [192]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, Conv, [192, 3, 2]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [192]], # 31
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-2, 1, Conv, [256, 3, 2]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [256]], # 35
56
+ [-1, 1, Conv, [512, 3, 1]],
57
+ [-2, 1, Conv, [320, 3, 2]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [320]], # 39
60
+ [-1, 1, Conv, [640, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-w6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.0 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, Conv, [256, 3, 2]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, Conv, [384, 3, 2]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, Conv, [512, 3, 2]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolov3-spp.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3-SPP head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, SPP, [512, [5, 9, 13]]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
cfg/baseline/yolov3.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3 head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, Conv, [512, [1, 1]]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
cfg/baseline/yolov4-csp.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/deploy/yolov7-d6.yaml ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-d6 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [96, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [192]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
31
+ [-1, 1, Conv, [192, 1, 1]], # 14
32
+
33
+ [-1, 1, DownC, [384]], # 15-P3/8
34
+ [-1, 1, Conv, [128, 1, 1]],
35
+ [-2, 1, Conv, [128, 1, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
45
+ [-1, 1, Conv, [384, 1, 1]], # 27
46
+
47
+ [-1, 1, DownC, [768]], # 28-P4/16
48
+ [-1, 1, Conv, [256, 1, 1]],
49
+ [-2, 1, Conv, [256, 1, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
59
+ [-1, 1, Conv, [768, 1, 1]], # 40
60
+
61
+ [-1, 1, DownC, [1152]], # 41-P5/32
62
+ [-1, 1, Conv, [384, 1, 1]],
63
+ [-2, 1, Conv, [384, 1, 1]],
64
+ [-1, 1, Conv, [384, 3, 1]],
65
+ [-1, 1, Conv, [384, 3, 1]],
66
+ [-1, 1, Conv, [384, 3, 1]],
67
+ [-1, 1, Conv, [384, 3, 1]],
68
+ [-1, 1, Conv, [384, 3, 1]],
69
+ [-1, 1, Conv, [384, 3, 1]],
70
+ [-1, 1, Conv, [384, 3, 1]],
71
+ [-1, 1, Conv, [384, 3, 1]],
72
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
73
+ [-1, 1, Conv, [1152, 1, 1]], # 53
74
+
75
+ [-1, 1, DownC, [1536]], # 54-P6/64
76
+ [-1, 1, Conv, [512, 1, 1]],
77
+ [-2, 1, Conv, [512, 1, 1]],
78
+ [-1, 1, Conv, [512, 3, 1]],
79
+ [-1, 1, Conv, [512, 3, 1]],
80
+ [-1, 1, Conv, [512, 3, 1]],
81
+ [-1, 1, Conv, [512, 3, 1]],
82
+ [-1, 1, Conv, [512, 3, 1]],
83
+ [-1, 1, Conv, [512, 3, 1]],
84
+ [-1, 1, Conv, [512, 3, 1]],
85
+ [-1, 1, Conv, [512, 3, 1]],
86
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
87
+ [-1, 1, Conv, [1536, 1, 1]], # 66
88
+ ]
89
+
90
+ # yolov7-d6 head
91
+ head:
92
+ [[-1, 1, SPPCSPC, [768]], # 67
93
+
94
+ [-1, 1, Conv, [576, 1, 1]],
95
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
96
+ [53, 1, Conv, [576, 1, 1]], # route backbone P5
97
+ [[-1, -2], 1, Concat, [1]],
98
+
99
+ [-1, 1, Conv, [384, 1, 1]],
100
+ [-2, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [192, 3, 1]],
102
+ [-1, 1, Conv, [192, 3, 1]],
103
+ [-1, 1, Conv, [192, 3, 1]],
104
+ [-1, 1, Conv, [192, 3, 1]],
105
+ [-1, 1, Conv, [192, 3, 1]],
106
+ [-1, 1, Conv, [192, 3, 1]],
107
+ [-1, 1, Conv, [192, 3, 1]],
108
+ [-1, 1, Conv, [192, 3, 1]],
109
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110
+ [-1, 1, Conv, [576, 1, 1]], # 83
111
+
112
+ [-1, 1, Conv, [384, 1, 1]],
113
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114
+ [40, 1, Conv, [384, 1, 1]], # route backbone P4
115
+ [[-1, -2], 1, Concat, [1]],
116
+
117
+ [-1, 1, Conv, [256, 1, 1]],
118
+ [-2, 1, Conv, [256, 1, 1]],
119
+ [-1, 1, Conv, [128, 3, 1]],
120
+ [-1, 1, Conv, [128, 3, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [-1, 1, Conv, [128, 3, 1]],
126
+ [-1, 1, Conv, [128, 3, 1]],
127
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128
+ [-1, 1, Conv, [384, 1, 1]], # 99
129
+
130
+ [-1, 1, Conv, [192, 1, 1]],
131
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132
+ [27, 1, Conv, [192, 1, 1]], # route backbone P3
133
+ [[-1, -2], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [128, 1, 1]],
136
+ [-2, 1, Conv, [128, 1, 1]],
137
+ [-1, 1, Conv, [64, 3, 1]],
138
+ [-1, 1, Conv, [64, 3, 1]],
139
+ [-1, 1, Conv, [64, 3, 1]],
140
+ [-1, 1, Conv, [64, 3, 1]],
141
+ [-1, 1, Conv, [64, 3, 1]],
142
+ [-1, 1, Conv, [64, 3, 1]],
143
+ [-1, 1, Conv, [64, 3, 1]],
144
+ [-1, 1, Conv, [64, 3, 1]],
145
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146
+ [-1, 1, Conv, [192, 1, 1]], # 115
147
+
148
+ [-1, 1, DownC, [384]],
149
+ [[-1, 99], 1, Concat, [1]],
150
+
151
+ [-1, 1, Conv, [256, 1, 1]],
152
+ [-2, 1, Conv, [256, 1, 1]],
153
+ [-1, 1, Conv, [128, 3, 1]],
154
+ [-1, 1, Conv, [128, 3, 1]],
155
+ [-1, 1, Conv, [128, 3, 1]],
156
+ [-1, 1, Conv, [128, 3, 1]],
157
+ [-1, 1, Conv, [128, 3, 1]],
158
+ [-1, 1, Conv, [128, 3, 1]],
159
+ [-1, 1, Conv, [128, 3, 1]],
160
+ [-1, 1, Conv, [128, 3, 1]],
161
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162
+ [-1, 1, Conv, [384, 1, 1]], # 129
163
+
164
+ [-1, 1, DownC, [576]],
165
+ [[-1, 83], 1, Concat, [1]],
166
+
167
+ [-1, 1, Conv, [384, 1, 1]],
168
+ [-2, 1, Conv, [384, 1, 1]],
169
+ [-1, 1, Conv, [192, 3, 1]],
170
+ [-1, 1, Conv, [192, 3, 1]],
171
+ [-1, 1, Conv, [192, 3, 1]],
172
+ [-1, 1, Conv, [192, 3, 1]],
173
+ [-1, 1, Conv, [192, 3, 1]],
174
+ [-1, 1, Conv, [192, 3, 1]],
175
+ [-1, 1, Conv, [192, 3, 1]],
176
+ [-1, 1, Conv, [192, 3, 1]],
177
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178
+ [-1, 1, Conv, [576, 1, 1]], # 143
179
+
180
+ [-1, 1, DownC, [768]],
181
+ [[-1, 67], 1, Concat, [1]],
182
+
183
+ [-1, 1, Conv, [512, 1, 1]],
184
+ [-2, 1, Conv, [512, 1, 1]],
185
+ [-1, 1, Conv, [256, 3, 1]],
186
+ [-1, 1, Conv, [256, 3, 1]],
187
+ [-1, 1, Conv, [256, 3, 1]],
188
+ [-1, 1, Conv, [256, 3, 1]],
189
+ [-1, 1, Conv, [256, 3, 1]],
190
+ [-1, 1, Conv, [256, 3, 1]],
191
+ [-1, 1, Conv, [256, 3, 1]],
192
+ [-1, 1, Conv, [256, 3, 1]],
193
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194
+ [-1, 1, Conv, [768, 1, 1]], # 157
195
+
196
+ [115, 1, Conv, [384, 3, 1]],
197
+ [129, 1, Conv, [768, 3, 1]],
198
+ [143, 1, Conv, [1152, 3, 1]],
199
+ [157, 1, Conv, [1536, 3, 1]],
200
+
201
+ [[158,159,160,161], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
202
+ ]
cfg/deploy/yolov7-e6.yaml ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-e6 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+
31
+ [-1, 1, DownC, [320]], # 13-P3/8
32
+ [-1, 1, Conv, [128, 1, 1]],
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
41
+ [-1, 1, Conv, [320, 1, 1]], # 23
42
+
43
+ [-1, 1, DownC, [640]], # 24-P4/16
44
+ [-1, 1, Conv, [256, 1, 1]],
45
+ [-2, 1, Conv, [256, 1, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [-1, 1, Conv, [256, 3, 1]],
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-1, 1, Conv, [256, 3, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
53
+ [-1, 1, Conv, [640, 1, 1]], # 34
54
+
55
+ [-1, 1, DownC, [960]], # 35-P5/32
56
+ [-1, 1, Conv, [384, 1, 1]],
57
+ [-2, 1, Conv, [384, 1, 1]],
58
+ [-1, 1, Conv, [384, 3, 1]],
59
+ [-1, 1, Conv, [384, 3, 1]],
60
+ [-1, 1, Conv, [384, 3, 1]],
61
+ [-1, 1, Conv, [384, 3, 1]],
62
+ [-1, 1, Conv, [384, 3, 1]],
63
+ [-1, 1, Conv, [384, 3, 1]],
64
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
65
+ [-1, 1, Conv, [960, 1, 1]], # 45
66
+
67
+ [-1, 1, DownC, [1280]], # 46-P6/64
68
+ [-1, 1, Conv, [512, 1, 1]],
69
+ [-2, 1, Conv, [512, 1, 1]],
70
+ [-1, 1, Conv, [512, 3, 1]],
71
+ [-1, 1, Conv, [512, 3, 1]],
72
+ [-1, 1, Conv, [512, 3, 1]],
73
+ [-1, 1, Conv, [512, 3, 1]],
74
+ [-1, 1, Conv, [512, 3, 1]],
75
+ [-1, 1, Conv, [512, 3, 1]],
76
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
77
+ [-1, 1, Conv, [1280, 1, 1]], # 56
78
+ ]
79
+
80
+ # yolov7-e6 head
81
+ head:
82
+ [[-1, 1, SPPCSPC, [640]], # 57
83
+
84
+ [-1, 1, Conv, [480, 1, 1]],
85
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
86
+ [45, 1, Conv, [480, 1, 1]], # route backbone P5
87
+ [[-1, -2], 1, Concat, [1]],
88
+
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [192, 3, 1]],
92
+ [-1, 1, Conv, [192, 3, 1]],
93
+ [-1, 1, Conv, [192, 3, 1]],
94
+ [-1, 1, Conv, [192, 3, 1]],
95
+ [-1, 1, Conv, [192, 3, 1]],
96
+ [-1, 1, Conv, [192, 3, 1]],
97
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [480, 1, 1]], # 71
99
+
100
+ [-1, 1, Conv, [320, 1, 1]],
101
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102
+ [34, 1, Conv, [320, 1, 1]], # route backbone P4
103
+ [[-1, -2], 1, Concat, [1]],
104
+
105
+ [-1, 1, Conv, [256, 1, 1]],
106
+ [-2, 1, Conv, [256, 1, 1]],
107
+ [-1, 1, Conv, [128, 3, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114
+ [-1, 1, Conv, [320, 1, 1]], # 85
115
+
116
+ [-1, 1, Conv, [160, 1, 1]],
117
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118
+ [23, 1, Conv, [160, 1, 1]], # route backbone P3
119
+ [[-1, -2], 1, Concat, [1]],
120
+
121
+ [-1, 1, Conv, [128, 1, 1]],
122
+ [-2, 1, Conv, [128, 1, 1]],
123
+ [-1, 1, Conv, [64, 3, 1]],
124
+ [-1, 1, Conv, [64, 3, 1]],
125
+ [-1, 1, Conv, [64, 3, 1]],
126
+ [-1, 1, Conv, [64, 3, 1]],
127
+ [-1, 1, Conv, [64, 3, 1]],
128
+ [-1, 1, Conv, [64, 3, 1]],
129
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130
+ [-1, 1, Conv, [160, 1, 1]], # 99
131
+
132
+ [-1, 1, DownC, [320]],
133
+ [[-1, 85], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [256, 1, 1]],
136
+ [-2, 1, Conv, [256, 1, 1]],
137
+ [-1, 1, Conv, [128, 3, 1]],
138
+ [-1, 1, Conv, [128, 3, 1]],
139
+ [-1, 1, Conv, [128, 3, 1]],
140
+ [-1, 1, Conv, [128, 3, 1]],
141
+ [-1, 1, Conv, [128, 3, 1]],
142
+ [-1, 1, Conv, [128, 3, 1]],
143
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144
+ [-1, 1, Conv, [320, 1, 1]], # 111
145
+
146
+ [-1, 1, DownC, [480]],
147
+ [[-1, 71], 1, Concat, [1]],
148
+
149
+ [-1, 1, Conv, [384, 1, 1]],
150
+ [-2, 1, Conv, [384, 1, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [-1, 1, Conv, [192, 3, 1]],
153
+ [-1, 1, Conv, [192, 3, 1]],
154
+ [-1, 1, Conv, [192, 3, 1]],
155
+ [-1, 1, Conv, [192, 3, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158
+ [-1, 1, Conv, [480, 1, 1]], # 123
159
+
160
+ [-1, 1, DownC, [640]],
161
+ [[-1, 57], 1, Concat, [1]],
162
+
163
+ [-1, 1, Conv, [512, 1, 1]],
164
+ [-2, 1, Conv, [512, 1, 1]],
165
+ [-1, 1, Conv, [256, 3, 1]],
166
+ [-1, 1, Conv, [256, 3, 1]],
167
+ [-1, 1, Conv, [256, 3, 1]],
168
+ [-1, 1, Conv, [256, 3, 1]],
169
+ [-1, 1, Conv, [256, 3, 1]],
170
+ [-1, 1, Conv, [256, 3, 1]],
171
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172
+ [-1, 1, Conv, [640, 1, 1]], # 135
173
+
174
+ [99, 1, Conv, [320, 3, 1]],
175
+ [111, 1, Conv, [640, 3, 1]],
176
+ [123, 1, Conv, [960, 3, 1]],
177
+ [135, 1, Conv, [1280, 3, 1]],
178
+
179
+ [[136,137,138,139], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
180
+ ]
cfg/deploy/yolov7-e6e.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-e6e backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+ [-11, 1, Conv, [64, 1, 1]],
31
+ [-12, 1, Conv, [64, 1, 1]],
32
+ [-1, 1, Conv, [64, 3, 1]],
33
+ [-1, 1, Conv, [64, 3, 1]],
34
+ [-1, 1, Conv, [64, 3, 1]],
35
+ [-1, 1, Conv, [64, 3, 1]],
36
+ [-1, 1, Conv, [64, 3, 1]],
37
+ [-1, 1, Conv, [64, 3, 1]],
38
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
39
+ [-1, 1, Conv, [160, 1, 1]], # 22
40
+ [[-1, -11], 1, Shortcut, [1]], # 23
41
+
42
+ [-1, 1, DownC, [320]], # 24-P3/8
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, Conv, [128, 3, 1]],
46
+ [-1, 1, Conv, [128, 3, 1]],
47
+ [-1, 1, Conv, [128, 3, 1]],
48
+ [-1, 1, Conv, [128, 3, 1]],
49
+ [-1, 1, Conv, [128, 3, 1]],
50
+ [-1, 1, Conv, [128, 3, 1]],
51
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
52
+ [-1, 1, Conv, [320, 1, 1]], # 34
53
+ [-11, 1, Conv, [128, 1, 1]],
54
+ [-12, 1, Conv, [128, 1, 1]],
55
+ [-1, 1, Conv, [128, 3, 1]],
56
+ [-1, 1, Conv, [128, 3, 1]],
57
+ [-1, 1, Conv, [128, 3, 1]],
58
+ [-1, 1, Conv, [128, 3, 1]],
59
+ [-1, 1, Conv, [128, 3, 1]],
60
+ [-1, 1, Conv, [128, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [320, 1, 1]], # 44
63
+ [[-1, -11], 1, Shortcut, [1]], # 45
64
+
65
+ [-1, 1, DownC, [640]], # 46-P4/16
66
+ [-1, 1, Conv, [256, 1, 1]],
67
+ [-2, 1, Conv, [256, 1, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [-1, 1, Conv, [256, 3, 1]],
70
+ [-1, 1, Conv, [256, 3, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
75
+ [-1, 1, Conv, [640, 1, 1]], # 56
76
+ [-11, 1, Conv, [256, 1, 1]],
77
+ [-12, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, Conv, [256, 3, 1]],
79
+ [-1, 1, Conv, [256, 3, 1]],
80
+ [-1, 1, Conv, [256, 3, 1]],
81
+ [-1, 1, Conv, [256, 3, 1]],
82
+ [-1, 1, Conv, [256, 3, 1]],
83
+ [-1, 1, Conv, [256, 3, 1]],
84
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
85
+ [-1, 1, Conv, [640, 1, 1]], # 66
86
+ [[-1, -11], 1, Shortcut, [1]], # 67
87
+
88
+ [-1, 1, DownC, [960]], # 68-P5/32
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [384, 3, 1]],
92
+ [-1, 1, Conv, [384, 3, 1]],
93
+ [-1, 1, Conv, [384, 3, 1]],
94
+ [-1, 1, Conv, [384, 3, 1]],
95
+ [-1, 1, Conv, [384, 3, 1]],
96
+ [-1, 1, Conv, [384, 3, 1]],
97
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [960, 1, 1]], # 78
99
+ [-11, 1, Conv, [384, 1, 1]],
100
+ [-12, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [384, 3, 1]],
102
+ [-1, 1, Conv, [384, 3, 1]],
103
+ [-1, 1, Conv, [384, 3, 1]],
104
+ [-1, 1, Conv, [384, 3, 1]],
105
+ [-1, 1, Conv, [384, 3, 1]],
106
+ [-1, 1, Conv, [384, 3, 1]],
107
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108
+ [-1, 1, Conv, [960, 1, 1]], # 88
109
+ [[-1, -11], 1, Shortcut, [1]], # 89
110
+
111
+ [-1, 1, DownC, [1280]], # 90-P6/64
112
+ [-1, 1, Conv, [512, 1, 1]],
113
+ [-2, 1, Conv, [512, 1, 1]],
114
+ [-1, 1, Conv, [512, 3, 1]],
115
+ [-1, 1, Conv, [512, 3, 1]],
116
+ [-1, 1, Conv, [512, 3, 1]],
117
+ [-1, 1, Conv, [512, 3, 1]],
118
+ [-1, 1, Conv, [512, 3, 1]],
119
+ [-1, 1, Conv, [512, 3, 1]],
120
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121
+ [-1, 1, Conv, [1280, 1, 1]], # 100
122
+ [-11, 1, Conv, [512, 1, 1]],
123
+ [-12, 1, Conv, [512, 1, 1]],
124
+ [-1, 1, Conv, [512, 3, 1]],
125
+ [-1, 1, Conv, [512, 3, 1]],
126
+ [-1, 1, Conv, [512, 3, 1]],
127
+ [-1, 1, Conv, [512, 3, 1]],
128
+ [-1, 1, Conv, [512, 3, 1]],
129
+ [-1, 1, Conv, [512, 3, 1]],
130
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131
+ [-1, 1, Conv, [1280, 1, 1]], # 110
132
+ [[-1, -11], 1, Shortcut, [1]], # 111
133
+ ]
134
+
135
+ # yolov7-e6e head
136
+ head:
137
+ [[-1, 1, SPPCSPC, [640]], # 112
138
+
139
+ [-1, 1, Conv, [480, 1, 1]],
140
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141
+ [89, 1, Conv, [480, 1, 1]], # route backbone P5
142
+ [[-1, -2], 1, Concat, [1]],
143
+
144
+ [-1, 1, Conv, [384, 1, 1]],
145
+ [-2, 1, Conv, [384, 1, 1]],
146
+ [-1, 1, Conv, [192, 3, 1]],
147
+ [-1, 1, Conv, [192, 3, 1]],
148
+ [-1, 1, Conv, [192, 3, 1]],
149
+ [-1, 1, Conv, [192, 3, 1]],
150
+ [-1, 1, Conv, [192, 3, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153
+ [-1, 1, Conv, [480, 1, 1]], # 126
154
+ [-11, 1, Conv, [384, 1, 1]],
155
+ [-12, 1, Conv, [384, 1, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [-1, 1, Conv, [192, 3, 1]],
158
+ [-1, 1, Conv, [192, 3, 1]],
159
+ [-1, 1, Conv, [192, 3, 1]],
160
+ [-1, 1, Conv, [192, 3, 1]],
161
+ [-1, 1, Conv, [192, 3, 1]],
162
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163
+ [-1, 1, Conv, [480, 1, 1]], # 136
164
+ [[-1, -11], 1, Shortcut, [1]], # 137
165
+
166
+ [-1, 1, Conv, [320, 1, 1]],
167
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168
+ [67, 1, Conv, [320, 1, 1]], # route backbone P4
169
+ [[-1, -2], 1, Concat, [1]],
170
+
171
+ [-1, 1, Conv, [256, 1, 1]],
172
+ [-2, 1, Conv, [256, 1, 1]],
173
+ [-1, 1, Conv, [128, 3, 1]],
174
+ [-1, 1, Conv, [128, 3, 1]],
175
+ [-1, 1, Conv, [128, 3, 1]],
176
+ [-1, 1, Conv, [128, 3, 1]],
177
+ [-1, 1, Conv, [128, 3, 1]],
178
+ [-1, 1, Conv, [128, 3, 1]],
179
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180
+ [-1, 1, Conv, [320, 1, 1]], # 151
181
+ [-11, 1, Conv, [256, 1, 1]],
182
+ [-12, 1, Conv, [256, 1, 1]],
183
+ [-1, 1, Conv, [128, 3, 1]],
184
+ [-1, 1, Conv, [128, 3, 1]],
185
+ [-1, 1, Conv, [128, 3, 1]],
186
+ [-1, 1, Conv, [128, 3, 1]],
187
+ [-1, 1, Conv, [128, 3, 1]],
188
+ [-1, 1, Conv, [128, 3, 1]],
189
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190
+ [-1, 1, Conv, [320, 1, 1]], # 161
191
+ [[-1, -11], 1, Shortcut, [1]], # 162
192
+
193
+ [-1, 1, Conv, [160, 1, 1]],
194
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195
+ [45, 1, Conv, [160, 1, 1]], # route backbone P3
196
+ [[-1, -2], 1, Concat, [1]],
197
+
198
+ [-1, 1, Conv, [128, 1, 1]],
199
+ [-2, 1, Conv, [128, 1, 1]],
200
+ [-1, 1, Conv, [64, 3, 1]],
201
+ [-1, 1, Conv, [64, 3, 1]],
202
+ [-1, 1, Conv, [64, 3, 1]],
203
+ [-1, 1, Conv, [64, 3, 1]],
204
+ [-1, 1, Conv, [64, 3, 1]],
205
+ [-1, 1, Conv, [64, 3, 1]],
206
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207
+ [-1, 1, Conv, [160, 1, 1]], # 176
208
+ [-11, 1, Conv, [128, 1, 1]],
209
+ [-12, 1, Conv, [128, 1, 1]],
210
+ [-1, 1, Conv, [64, 3, 1]],
211
+ [-1, 1, Conv, [64, 3, 1]],
212
+ [-1, 1, Conv, [64, 3, 1]],
213
+ [-1, 1, Conv, [64, 3, 1]],
214
+ [-1, 1, Conv, [64, 3, 1]],
215
+ [-1, 1, Conv, [64, 3, 1]],
216
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217
+ [-1, 1, Conv, [160, 1, 1]], # 186
218
+ [[-1, -11], 1, Shortcut, [1]], # 187
219
+
220
+ [-1, 1, DownC, [320]],
221
+ [[-1, 162], 1, Concat, [1]],
222
+
223
+ [-1, 1, Conv, [256, 1, 1]],
224
+ [-2, 1, Conv, [256, 1, 1]],
225
+ [-1, 1, Conv, [128, 3, 1]],
226
+ [-1, 1, Conv, [128, 3, 1]],
227
+ [-1, 1, Conv, [128, 3, 1]],
228
+ [-1, 1, Conv, [128, 3, 1]],
229
+ [-1, 1, Conv, [128, 3, 1]],
230
+ [-1, 1, Conv, [128, 3, 1]],
231
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232
+ [-1, 1, Conv, [320, 1, 1]], # 199
233
+ [-11, 1, Conv, [256, 1, 1]],
234
+ [-12, 1, Conv, [256, 1, 1]],
235
+ [-1, 1, Conv, [128, 3, 1]],
236
+ [-1, 1, Conv, [128, 3, 1]],
237
+ [-1, 1, Conv, [128, 3, 1]],
238
+ [-1, 1, Conv, [128, 3, 1]],
239
+ [-1, 1, Conv, [128, 3, 1]],
240
+ [-1, 1, Conv, [128, 3, 1]],
241
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242
+ [-1, 1, Conv, [320, 1, 1]], # 209
243
+ [[-1, -11], 1, Shortcut, [1]], # 210
244
+
245
+ [-1, 1, DownC, [480]],
246
+ [[-1, 137], 1, Concat, [1]],
247
+
248
+ [-1, 1, Conv, [384, 1, 1]],
249
+ [-2, 1, Conv, [384, 1, 1]],
250
+ [-1, 1, Conv, [192, 3, 1]],
251
+ [-1, 1, Conv, [192, 3, 1]],
252
+ [-1, 1, Conv, [192, 3, 1]],
253
+ [-1, 1, Conv, [192, 3, 1]],
254
+ [-1, 1, Conv, [192, 3, 1]],
255
+ [-1, 1, Conv, [192, 3, 1]],
256
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257
+ [-1, 1, Conv, [480, 1, 1]], # 222
258
+ [-11, 1, Conv, [384, 1, 1]],
259
+ [-12, 1, Conv, [384, 1, 1]],
260
+ [-1, 1, Conv, [192, 3, 1]],
261
+ [-1, 1, Conv, [192, 3, 1]],
262
+ [-1, 1, Conv, [192, 3, 1]],
263
+ [-1, 1, Conv, [192, 3, 1]],
264
+ [-1, 1, Conv, [192, 3, 1]],
265
+ [-1, 1, Conv, [192, 3, 1]],
266
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267
+ [-1, 1, Conv, [480, 1, 1]], # 232
268
+ [[-1, -11], 1, Shortcut, [1]], # 233
269
+
270
+ [-1, 1, DownC, [640]],
271
+ [[-1, 112], 1, Concat, [1]],
272
+
273
+ [-1, 1, Conv, [512, 1, 1]],
274
+ [-2, 1, Conv, [512, 1, 1]],
275
+ [-1, 1, Conv, [256, 3, 1]],
276
+ [-1, 1, Conv, [256, 3, 1]],
277
+ [-1, 1, Conv, [256, 3, 1]],
278
+ [-1, 1, Conv, [256, 3, 1]],
279
+ [-1, 1, Conv, [256, 3, 1]],
280
+ [-1, 1, Conv, [256, 3, 1]],
281
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282
+ [-1, 1, Conv, [640, 1, 1]], # 245
283
+ [-11, 1, Conv, [512, 1, 1]],
284
+ [-12, 1, Conv, [512, 1, 1]],
285
+ [-1, 1, Conv, [256, 3, 1]],
286
+ [-1, 1, Conv, [256, 3, 1]],
287
+ [-1, 1, Conv, [256, 3, 1]],
288
+ [-1, 1, Conv, [256, 3, 1]],
289
+ [-1, 1, Conv, [256, 3, 1]],
290
+ [-1, 1, Conv, [256, 3, 1]],
291
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292
+ [-1, 1, Conv, [640, 1, 1]], # 255
293
+ [[-1, -11], 1, Shortcut, [1]], # 256
294
+
295
+ [187, 1, Conv, [320, 3, 1]],
296
+ [210, 1, Conv, [640, 3, 1]],
297
+ [233, 1, Conv, [960, 3, 1]],
298
+ [256, 1, Conv, [1280, 3, 1]],
299
+
300
+ [[257,258,259,260], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
301
+ ]
cfg/deploy/yolov7-tiny-silu.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1]],
20
+ [-2, 1, Conv, [32, 1, 1]],
21
+ [-1, 1, Conv, [32, 3, 1]],
22
+ [-1, 1, Conv, [32, 3, 1]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1]],
28
+ [-2, 1, Conv, [64, 1, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [-1, 1, Conv, [64, 3, 1]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-2, 1, Conv, [256, 1, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1]], # 28
49
+ ]
50
+
51
+ # YOLOv7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1]],
69
+ [-2, 1, Conv, [64, 1, 1]],
70
+ [-1, 1, Conv, [64, 3, 1]],
71
+ [-1, 1, Conv, [64, 3, 1]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1]],
81
+ [-2, 1, Conv, [32, 1, 1]],
82
+ [-1, 1, Conv, [32, 3, 1]],
83
+ [-1, 1, Conv, [32, 3, 1]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1]],
91
+ [-2, 1, Conv, [64, 1, 1]],
92
+ [-1, 1, Conv, [64, 3, 1]],
93
+ [-1, 1, Conv, [64, 3, 1]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1]],
101
+ [-2, 1, Conv, [128, 1, 1]],
102
+ [-1, 1, Conv, [128, 3, 1]],
103
+ [-1, 1, Conv, [128, 3, 1]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1]],
108
+ [65, 1, Conv, [256, 3, 1]],
109
+ [73, 1, Conv, [512, 3, 1]],
110
+
111
+ [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/deploy/yolov7-tiny.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # yolov7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
15
+ [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
20
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
21
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
22
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
28
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
29
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
30
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
36
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
37
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
38
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
44
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
45
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
46
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28
49
+ ]
50
+
51
+ # yolov7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
54
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
69
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
70
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
71
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
81
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
82
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
83
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
91
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
92
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
93
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108
+ [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109
+ [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110
+
111
+ [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/deploy/yolov7-w6.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-w6 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [[-1, -3, -5, -6], 1, Concat, [1]],
27
+ [-1, 1, Conv, [128, 1, 1]], # 10
28
+
29
+ [-1, 1, Conv, [256, 3, 2]], # 11-P3/8
30
+ [-1, 1, Conv, [128, 1, 1]],
31
+ [-2, 1, Conv, [128, 1, 1]],
32
+ [-1, 1, Conv, [128, 3, 1]],
33
+ [-1, 1, Conv, [128, 3, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [[-1, -3, -5, -6], 1, Concat, [1]],
37
+ [-1, 1, Conv, [256, 1, 1]], # 19
38
+
39
+ [-1, 1, Conv, [512, 3, 2]], # 20-P4/16
40
+ [-1, 1, Conv, [256, 1, 1]],
41
+ [-2, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [256, 3, 1]],
43
+ [-1, 1, Conv, [256, 3, 1]],
44
+ [-1, 1, Conv, [256, 3, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [[-1, -3, -5, -6], 1, Concat, [1]],
47
+ [-1, 1, Conv, [512, 1, 1]], # 28
48
+
49
+ [-1, 1, Conv, [768, 3, 2]], # 29-P5/32
50
+ [-1, 1, Conv, [384, 1, 1]],
51
+ [-2, 1, Conv, [384, 1, 1]],
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-1, 1, Conv, [384, 3, 1]],
54
+ [-1, 1, Conv, [384, 3, 1]],
55
+ [-1, 1, Conv, [384, 3, 1]],
56
+ [[-1, -3, -5, -6], 1, Concat, [1]],
57
+ [-1, 1, Conv, [768, 1, 1]], # 37
58
+
59
+ [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64
60
+ [-1, 1, Conv, [512, 1, 1]],
61
+ [-2, 1, Conv, [512, 1, 1]],
62
+ [-1, 1, Conv, [512, 3, 1]],
63
+ [-1, 1, Conv, [512, 3, 1]],
64
+ [-1, 1, Conv, [512, 3, 1]],
65
+ [-1, 1, Conv, [512, 3, 1]],
66
+ [[-1, -3, -5, -6], 1, Concat, [1]],
67
+ [-1, 1, Conv, [1024, 1, 1]], # 46
68
+ ]
69
+
70
+ # yolov7-w6 head
71
+ head:
72
+ [[-1, 1, SPPCSPC, [512]], # 47
73
+
74
+ [-1, 1, Conv, [384, 1, 1]],
75
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
76
+ [37, 1, Conv, [384, 1, 1]], # route backbone P5
77
+ [[-1, -2], 1, Concat, [1]],
78
+
79
+ [-1, 1, Conv, [384, 1, 1]],
80
+ [-2, 1, Conv, [384, 1, 1]],
81
+ [-1, 1, Conv, [192, 3, 1]],
82
+ [-1, 1, Conv, [192, 3, 1]],
83
+ [-1, 1, Conv, [192, 3, 1]],
84
+ [-1, 1, Conv, [192, 3, 1]],
85
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
86
+ [-1, 1, Conv, [384, 1, 1]], # 59
87
+
88
+ [-1, 1, Conv, [256, 1, 1]],
89
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
90
+ [28, 1, Conv, [256, 1, 1]], # route backbone P4
91
+ [[-1, -2], 1, Concat, [1]],
92
+
93
+ [-1, 1, Conv, [256, 1, 1]],
94
+ [-2, 1, Conv, [256, 1, 1]],
95
+ [-1, 1, Conv, [128, 3, 1]],
96
+ [-1, 1, Conv, [128, 3, 1]],
97
+ [-1, 1, Conv, [128, 3, 1]],
98
+ [-1, 1, Conv, [128, 3, 1]],
99
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100
+ [-1, 1, Conv, [256, 1, 1]], # 71
101
+
102
+ [-1, 1, Conv, [128, 1, 1]],
103
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104
+ [19, 1, Conv, [128, 1, 1]], # route backbone P3
105
+ [[-1, -2], 1, Concat, [1]],
106
+
107
+ [-1, 1, Conv, [128, 1, 1]],
108
+ [-2, 1, Conv, [128, 1, 1]],
109
+ [-1, 1, Conv, [64, 3, 1]],
110
+ [-1, 1, Conv, [64, 3, 1]],
111
+ [-1, 1, Conv, [64, 3, 1]],
112
+ [-1, 1, Conv, [64, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114
+ [-1, 1, Conv, [128, 1, 1]], # 83
115
+
116
+ [-1, 1, Conv, [256, 3, 2]],
117
+ [[-1, 71], 1, Concat, [1]], # cat
118
+
119
+ [-1, 1, Conv, [256, 1, 1]],
120
+ [-2, 1, Conv, [256, 1, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126
+ [-1, 1, Conv, [256, 1, 1]], # 93
127
+
128
+ [-1, 1, Conv, [384, 3, 2]],
129
+ [[-1, 59], 1, Concat, [1]], # cat
130
+
131
+ [-1, 1, Conv, [384, 1, 1]],
132
+ [-2, 1, Conv, [384, 1, 1]],
133
+ [-1, 1, Conv, [192, 3, 1]],
134
+ [-1, 1, Conv, [192, 3, 1]],
135
+ [-1, 1, Conv, [192, 3, 1]],
136
+ [-1, 1, Conv, [192, 3, 1]],
137
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138
+ [-1, 1, Conv, [384, 1, 1]], # 103
139
+
140
+ [-1, 1, Conv, [512, 3, 2]],
141
+ [[-1, 47], 1, Concat, [1]], # cat
142
+
143
+ [-1, 1, Conv, [512, 1, 1]],
144
+ [-2, 1, Conv, [512, 1, 1]],
145
+ [-1, 1, Conv, [256, 3, 1]],
146
+ [-1, 1, Conv, [256, 3, 1]],
147
+ [-1, 1, Conv, [256, 3, 1]],
148
+ [-1, 1, Conv, [256, 3, 1]],
149
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150
+ [-1, 1, Conv, [512, 1, 1]], # 113
151
+
152
+ [83, 1, Conv, [256, 3, 1]],
153
+ [93, 1, Conv, [512, 3, 1]],
154
+ [103, 1, Conv, [768, 3, 1]],
155
+ [113, 1, Conv, [1024, 3, 1]],
156
+
157
+ [[114,115,116,117], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
158
+ ]
cfg/deploy/yolov7.yaml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [64, 3, 1]],
19
+
20
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [[-1, -3, -5, -6], 1, Concat, [1]],
28
+ [-1, 1, Conv, [256, 1, 1]], # 11
29
+
30
+ [-1, 1, MP, []],
31
+ [-1, 1, Conv, [128, 1, 1]],
32
+ [-3, 1, Conv, [128, 1, 1]],
33
+ [-1, 1, Conv, [128, 3, 2]],
34
+ [[-1, -3], 1, Concat, [1]], # 16-P3/8
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [[-1, -3, -5, -6], 1, Concat, [1]],
42
+ [-1, 1, Conv, [512, 1, 1]], # 24
43
+
44
+ [-1, 1, MP, []],
45
+ [-1, 1, Conv, [256, 1, 1]],
46
+ [-3, 1, Conv, [256, 1, 1]],
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, -3], 1, Concat, [1]], # 29-P4/16
49
+ [-1, 1, Conv, [256, 1, 1]],
50
+ [-2, 1, Conv, [256, 1, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [[-1, -3, -5, -6], 1, Concat, [1]],
56
+ [-1, 1, Conv, [1024, 1, 1]], # 37
57
+
58
+ [-1, 1, MP, []],
59
+ [-1, 1, Conv, [512, 1, 1]],
60
+ [-3, 1, Conv, [512, 1, 1]],
61
+ [-1, 1, Conv, [512, 3, 2]],
62
+ [[-1, -3], 1, Concat, [1]], # 42-P5/32
63
+ [-1, 1, Conv, [256, 1, 1]],
64
+ [-2, 1, Conv, [256, 1, 1]],
65
+ [-1, 1, Conv, [256, 3, 1]],
66
+ [-1, 1, Conv, [256, 3, 1]],
67
+ [-1, 1, Conv, [256, 3, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [[-1, -3, -5, -6], 1, Concat, [1]],
70
+ [-1, 1, Conv, [1024, 1, 1]], # 50
71
+ ]
72
+
73
+ # yolov7 head
74
+ head:
75
+ [[-1, 1, SPPCSPC, [512]], # 51
76
+
77
+ [-1, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
79
+ [37, 1, Conv, [256, 1, 1]], # route backbone P4
80
+ [[-1, -2], 1, Concat, [1]],
81
+
82
+ [-1, 1, Conv, [256, 1, 1]],
83
+ [-2, 1, Conv, [256, 1, 1]],
84
+ [-1, 1, Conv, [128, 3, 1]],
85
+ [-1, 1, Conv, [128, 3, 1]],
86
+ [-1, 1, Conv, [128, 3, 1]],
87
+ [-1, 1, Conv, [128, 3, 1]],
88
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
89
+ [-1, 1, Conv, [256, 1, 1]], # 63
90
+
91
+ [-1, 1, Conv, [128, 1, 1]],
92
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
93
+ [24, 1, Conv, [128, 1, 1]], # route backbone P3
94
+ [[-1, -2], 1, Concat, [1]],
95
+
96
+ [-1, 1, Conv, [128, 1, 1]],
97
+ [-2, 1, Conv, [128, 1, 1]],
98
+ [-1, 1, Conv, [64, 3, 1]],
99
+ [-1, 1, Conv, [64, 3, 1]],
100
+ [-1, 1, Conv, [64, 3, 1]],
101
+ [-1, 1, Conv, [64, 3, 1]],
102
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103
+ [-1, 1, Conv, [128, 1, 1]], # 75
104
+
105
+ [-1, 1, MP, []],
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-3, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 2]],
109
+ [[-1, -3, 63], 1, Concat, [1]],
110
+
111
+ [-1, 1, Conv, [256, 1, 1]],
112
+ [-2, 1, Conv, [256, 1, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [-1, 1, Conv, [128, 3, 1]],
115
+ [-1, 1, Conv, [128, 3, 1]],
116
+ [-1, 1, Conv, [128, 3, 1]],
117
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118
+ [-1, 1, Conv, [256, 1, 1]], # 88
119
+
120
+ [-1, 1, MP, []],
121
+ [-1, 1, Conv, [256, 1, 1]],
122
+ [-3, 1, Conv, [256, 1, 1]],
123
+ [-1, 1, Conv, [256, 3, 2]],
124
+ [[-1, -3, 51], 1, Concat, [1]],
125
+
126
+ [-1, 1, Conv, [512, 1, 1]],
127
+ [-2, 1, Conv, [512, 1, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [-1, 1, Conv, [256, 3, 1]],
132
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133
+ [-1, 1, Conv, [512, 1, 1]], # 101
134
+
135
+ [75, 1, RepConv, [256, 3, 1]],
136
+ [88, 1, RepConv, [512, 3, 1]],
137
+ [101, 1, RepConv, [1024, 3, 1]],
138
+
139
+ [[102,103,104], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
140
+ ]
cfg/deploy/yolov7x.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7x backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [40, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [80, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [80, 3, 1]],
19
+
20
+ [-1, 1, Conv, [160, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
30
+ [-1, 1, Conv, [320, 1, 1]], # 13
31
+
32
+ [-1, 1, MP, []],
33
+ [-1, 1, Conv, [160, 1, 1]],
34
+ [-3, 1, Conv, [160, 1, 1]],
35
+ [-1, 1, Conv, [160, 3, 2]],
36
+ [[-1, -3], 1, Concat, [1]], # 18-P3/8
37
+ [-1, 1, Conv, [128, 1, 1]],
38
+ [-2, 1, Conv, [128, 1, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [-1, 1, Conv, [128, 3, 1]],
45
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
46
+ [-1, 1, Conv, [640, 1, 1]], # 28
47
+
48
+ [-1, 1, MP, []],
49
+ [-1, 1, Conv, [320, 1, 1]],
50
+ [-3, 1, Conv, [320, 1, 1]],
51
+ [-1, 1, Conv, [320, 3, 2]],
52
+ [[-1, -3], 1, Concat, [1]], # 33-P4/16
53
+ [-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [-1, 1, Conv, [256, 3, 1]],
59
+ [-1, 1, Conv, [256, 3, 1]],
60
+ [-1, 1, Conv, [256, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [1280, 1, 1]], # 43
63
+
64
+ [-1, 1, MP, []],
65
+ [-1, 1, Conv, [640, 1, 1]],
66
+ [-3, 1, Conv, [640, 1, 1]],
67
+ [-1, 1, Conv, [640, 3, 2]],
68
+ [[-1, -3], 1, Concat, [1]], # 48-P5/32
69
+ [-1, 1, Conv, [256, 1, 1]],
70
+ [-2, 1, Conv, [256, 1, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [-1, 1, Conv, [256, 3, 1]],
75
+ [-1, 1, Conv, [256, 3, 1]],
76
+ [-1, 1, Conv, [256, 3, 1]],
77
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
78
+ [-1, 1, Conv, [1280, 1, 1]], # 58
79
+ ]
80
+
81
+ # yolov7x head
82
+ head:
83
+ [[-1, 1, SPPCSPC, [640]], # 59
84
+
85
+ [-1, 1, Conv, [320, 1, 1]],
86
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
87
+ [43, 1, Conv, [320, 1, 1]], # route backbone P4
88
+ [[-1, -2], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [256, 1, 1]],
91
+ [-2, 1, Conv, [256, 1, 1]],
92
+ [-1, 1, Conv, [256, 3, 1]],
93
+ [-1, 1, Conv, [256, 3, 1]],
94
+ [-1, 1, Conv, [256, 3, 1]],
95
+ [-1, 1, Conv, [256, 3, 1]],
96
+ [-1, 1, Conv, [256, 3, 1]],
97
+ [-1, 1, Conv, [256, 3, 1]],
98
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
99
+ [-1, 1, Conv, [320, 1, 1]], # 73
100
+
101
+ [-1, 1, Conv, [160, 1, 1]],
102
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103
+ [28, 1, Conv, [160, 1, 1]], # route backbone P3
104
+ [[-1, -2], 1, Concat, [1]],
105
+
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-2, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115
+ [-1, 1, Conv, [160, 1, 1]], # 87
116
+
117
+ [-1, 1, MP, []],
118
+ [-1, 1, Conv, [160, 1, 1]],
119
+ [-3, 1, Conv, [160, 1, 1]],
120
+ [-1, 1, Conv, [160, 3, 2]],
121
+ [[-1, -3, 73], 1, Concat, [1]],
122
+
123
+ [-1, 1, Conv, [256, 1, 1]],
124
+ [-2, 1, Conv, [256, 1, 1]],
125
+ [-1, 1, Conv, [256, 3, 1]],
126
+ [-1, 1, Conv, [256, 3, 1]],
127
+ [-1, 1, Conv, [256, 3, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132
+ [-1, 1, Conv, [320, 1, 1]], # 102
133
+
134
+ [-1, 1, MP, []],
135
+ [-1, 1, Conv, [320, 1, 1]],
136
+ [-3, 1, Conv, [320, 1, 1]],
137
+ [-1, 1, Conv, [320, 3, 2]],
138
+ [[-1, -3, 59], 1, Concat, [1]],
139
+
140
+ [-1, 1, Conv, [512, 1, 1]],
141
+ [-2, 1, Conv, [512, 1, 1]],
142
+ [-1, 1, Conv, [512, 3, 1]],
143
+ [-1, 1, Conv, [512, 3, 1]],
144
+ [-1, 1, Conv, [512, 3, 1]],
145
+ [-1, 1, Conv, [512, 3, 1]],
146
+ [-1, 1, Conv, [512, 3, 1]],
147
+ [-1, 1, Conv, [512, 3, 1]],
148
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149
+ [-1, 1, Conv, [640, 1, 1]], # 117
150
+
151
+ [87, 1, Conv, [320, 3, 1]],
152
+ [102, 1, Conv, [640, 3, 1]],
153
+ [117, 1, Conv, [1280, 3, 1]],
154
+
155
+ [[118,119,120], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
156
+ ]
cfg/training/yolov7-d6.yaml ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [96, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [192]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
31
+ [-1, 1, Conv, [192, 1, 1]], # 14
32
+
33
+ [-1, 1, DownC, [384]], # 15-P3/8
34
+ [-1, 1, Conv, [128, 1, 1]],
35
+ [-2, 1, Conv, [128, 1, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
45
+ [-1, 1, Conv, [384, 1, 1]], # 27
46
+
47
+ [-1, 1, DownC, [768]], # 28-P4/16
48
+ [-1, 1, Conv, [256, 1, 1]],
49
+ [-2, 1, Conv, [256, 1, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
59
+ [-1, 1, Conv, [768, 1, 1]], # 40
60
+
61
+ [-1, 1, DownC, [1152]], # 41-P5/32
62
+ [-1, 1, Conv, [384, 1, 1]],
63
+ [-2, 1, Conv, [384, 1, 1]],
64
+ [-1, 1, Conv, [384, 3, 1]],
65
+ [-1, 1, Conv, [384, 3, 1]],
66
+ [-1, 1, Conv, [384, 3, 1]],
67
+ [-1, 1, Conv, [384, 3, 1]],
68
+ [-1, 1, Conv, [384, 3, 1]],
69
+ [-1, 1, Conv, [384, 3, 1]],
70
+ [-1, 1, Conv, [384, 3, 1]],
71
+ [-1, 1, Conv, [384, 3, 1]],
72
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
73
+ [-1, 1, Conv, [1152, 1, 1]], # 53
74
+
75
+ [-1, 1, DownC, [1536]], # 54-P6/64
76
+ [-1, 1, Conv, [512, 1, 1]],
77
+ [-2, 1, Conv, [512, 1, 1]],
78
+ [-1, 1, Conv, [512, 3, 1]],
79
+ [-1, 1, Conv, [512, 3, 1]],
80
+ [-1, 1, Conv, [512, 3, 1]],
81
+ [-1, 1, Conv, [512, 3, 1]],
82
+ [-1, 1, Conv, [512, 3, 1]],
83
+ [-1, 1, Conv, [512, 3, 1]],
84
+ [-1, 1, Conv, [512, 3, 1]],
85
+ [-1, 1, Conv, [512, 3, 1]],
86
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
87
+ [-1, 1, Conv, [1536, 1, 1]], # 66
88
+ ]
89
+
90
+ # yolov7 head
91
+ head:
92
+ [[-1, 1, SPPCSPC, [768]], # 67
93
+
94
+ [-1, 1, Conv, [576, 1, 1]],
95
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
96
+ [53, 1, Conv, [576, 1, 1]], # route backbone P5
97
+ [[-1, -2], 1, Concat, [1]],
98
+
99
+ [-1, 1, Conv, [384, 1, 1]],
100
+ [-2, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [192, 3, 1]],
102
+ [-1, 1, Conv, [192, 3, 1]],
103
+ [-1, 1, Conv, [192, 3, 1]],
104
+ [-1, 1, Conv, [192, 3, 1]],
105
+ [-1, 1, Conv, [192, 3, 1]],
106
+ [-1, 1, Conv, [192, 3, 1]],
107
+ [-1, 1, Conv, [192, 3, 1]],
108
+ [-1, 1, Conv, [192, 3, 1]],
109
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110
+ [-1, 1, Conv, [576, 1, 1]], # 83
111
+
112
+ [-1, 1, Conv, [384, 1, 1]],
113
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114
+ [40, 1, Conv, [384, 1, 1]], # route backbone P4
115
+ [[-1, -2], 1, Concat, [1]],
116
+
117
+ [-1, 1, Conv, [256, 1, 1]],
118
+ [-2, 1, Conv, [256, 1, 1]],
119
+ [-1, 1, Conv, [128, 3, 1]],
120
+ [-1, 1, Conv, [128, 3, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [-1, 1, Conv, [128, 3, 1]],
126
+ [-1, 1, Conv, [128, 3, 1]],
127
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128
+ [-1, 1, Conv, [384, 1, 1]], # 99
129
+
130
+ [-1, 1, Conv, [192, 1, 1]],
131
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132
+ [27, 1, Conv, [192, 1, 1]], # route backbone P3
133
+ [[-1, -2], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [128, 1, 1]],
136
+ [-2, 1, Conv, [128, 1, 1]],
137
+ [-1, 1, Conv, [64, 3, 1]],
138
+ [-1, 1, Conv, [64, 3, 1]],
139
+ [-1, 1, Conv, [64, 3, 1]],
140
+ [-1, 1, Conv, [64, 3, 1]],
141
+ [-1, 1, Conv, [64, 3, 1]],
142
+ [-1, 1, Conv, [64, 3, 1]],
143
+ [-1, 1, Conv, [64, 3, 1]],
144
+ [-1, 1, Conv, [64, 3, 1]],
145
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146
+ [-1, 1, Conv, [192, 1, 1]], # 115
147
+
148
+ [-1, 1, DownC, [384]],
149
+ [[-1, 99], 1, Concat, [1]],
150
+
151
+ [-1, 1, Conv, [256, 1, 1]],
152
+ [-2, 1, Conv, [256, 1, 1]],
153
+ [-1, 1, Conv, [128, 3, 1]],
154
+ [-1, 1, Conv, [128, 3, 1]],
155
+ [-1, 1, Conv, [128, 3, 1]],
156
+ [-1, 1, Conv, [128, 3, 1]],
157
+ [-1, 1, Conv, [128, 3, 1]],
158
+ [-1, 1, Conv, [128, 3, 1]],
159
+ [-1, 1, Conv, [128, 3, 1]],
160
+ [-1, 1, Conv, [128, 3, 1]],
161
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162
+ [-1, 1, Conv, [384, 1, 1]], # 129
163
+
164
+ [-1, 1, DownC, [576]],
165
+ [[-1, 83], 1, Concat, [1]],
166
+
167
+ [-1, 1, Conv, [384, 1, 1]],
168
+ [-2, 1, Conv, [384, 1, 1]],
169
+ [-1, 1, Conv, [192, 3, 1]],
170
+ [-1, 1, Conv, [192, 3, 1]],
171
+ [-1, 1, Conv, [192, 3, 1]],
172
+ [-1, 1, Conv, [192, 3, 1]],
173
+ [-1, 1, Conv, [192, 3, 1]],
174
+ [-1, 1, Conv, [192, 3, 1]],
175
+ [-1, 1, Conv, [192, 3, 1]],
176
+ [-1, 1, Conv, [192, 3, 1]],
177
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178
+ [-1, 1, Conv, [576, 1, 1]], # 143
179
+
180
+ [-1, 1, DownC, [768]],
181
+ [[-1, 67], 1, Concat, [1]],
182
+
183
+ [-1, 1, Conv, [512, 1, 1]],
184
+ [-2, 1, Conv, [512, 1, 1]],
185
+ [-1, 1, Conv, [256, 3, 1]],
186
+ [-1, 1, Conv, [256, 3, 1]],
187
+ [-1, 1, Conv, [256, 3, 1]],
188
+ [-1, 1, Conv, [256, 3, 1]],
189
+ [-1, 1, Conv, [256, 3, 1]],
190
+ [-1, 1, Conv, [256, 3, 1]],
191
+ [-1, 1, Conv, [256, 3, 1]],
192
+ [-1, 1, Conv, [256, 3, 1]],
193
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194
+ [-1, 1, Conv, [768, 1, 1]], # 157
195
+
196
+ [115, 1, Conv, [384, 3, 1]],
197
+ [129, 1, Conv, [768, 3, 1]],
198
+ [143, 1, Conv, [1152, 3, 1]],
199
+ [157, 1, Conv, [1536, 3, 1]],
200
+
201
+ [115, 1, Conv, [384, 3, 1]],
202
+ [99, 1, Conv, [768, 3, 1]],
203
+ [83, 1, Conv, [1152, 3, 1]],
204
+ [67, 1, Conv, [1536, 3, 1]],
205
+
206
+ [[158,159,160,161,162,163,164,165], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
207
+ ]
cfg/training/yolov7-e6.yaml ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+
31
+ [-1, 1, DownC, [320]], # 13-P3/8
32
+ [-1, 1, Conv, [128, 1, 1]],
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
41
+ [-1, 1, Conv, [320, 1, 1]], # 23
42
+
43
+ [-1, 1, DownC, [640]], # 24-P4/16
44
+ [-1, 1, Conv, [256, 1, 1]],
45
+ [-2, 1, Conv, [256, 1, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [-1, 1, Conv, [256, 3, 1]],
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-1, 1, Conv, [256, 3, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
53
+ [-1, 1, Conv, [640, 1, 1]], # 34
54
+
55
+ [-1, 1, DownC, [960]], # 35-P5/32
56
+ [-1, 1, Conv, [384, 1, 1]],
57
+ [-2, 1, Conv, [384, 1, 1]],
58
+ [-1, 1, Conv, [384, 3, 1]],
59
+ [-1, 1, Conv, [384, 3, 1]],
60
+ [-1, 1, Conv, [384, 3, 1]],
61
+ [-1, 1, Conv, [384, 3, 1]],
62
+ [-1, 1, Conv, [384, 3, 1]],
63
+ [-1, 1, Conv, [384, 3, 1]],
64
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
65
+ [-1, 1, Conv, [960, 1, 1]], # 45
66
+
67
+ [-1, 1, DownC, [1280]], # 46-P6/64
68
+ [-1, 1, Conv, [512, 1, 1]],
69
+ [-2, 1, Conv, [512, 1, 1]],
70
+ [-1, 1, Conv, [512, 3, 1]],
71
+ [-1, 1, Conv, [512, 3, 1]],
72
+ [-1, 1, Conv, [512, 3, 1]],
73
+ [-1, 1, Conv, [512, 3, 1]],
74
+ [-1, 1, Conv, [512, 3, 1]],
75
+ [-1, 1, Conv, [512, 3, 1]],
76
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
77
+ [-1, 1, Conv, [1280, 1, 1]], # 56
78
+ ]
79
+
80
+ # yolov7 head
81
+ head:
82
+ [[-1, 1, SPPCSPC, [640]], # 57
83
+
84
+ [-1, 1, Conv, [480, 1, 1]],
85
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
86
+ [45, 1, Conv, [480, 1, 1]], # route backbone P5
87
+ [[-1, -2], 1, Concat, [1]],
88
+
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [192, 3, 1]],
92
+ [-1, 1, Conv, [192, 3, 1]],
93
+ [-1, 1, Conv, [192, 3, 1]],
94
+ [-1, 1, Conv, [192, 3, 1]],
95
+ [-1, 1, Conv, [192, 3, 1]],
96
+ [-1, 1, Conv, [192, 3, 1]],
97
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [480, 1, 1]], # 71
99
+
100
+ [-1, 1, Conv, [320, 1, 1]],
101
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102
+ [34, 1, Conv, [320, 1, 1]], # route backbone P4
103
+ [[-1, -2], 1, Concat, [1]],
104
+
105
+ [-1, 1, Conv, [256, 1, 1]],
106
+ [-2, 1, Conv, [256, 1, 1]],
107
+ [-1, 1, Conv, [128, 3, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114
+ [-1, 1, Conv, [320, 1, 1]], # 85
115
+
116
+ [-1, 1, Conv, [160, 1, 1]],
117
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118
+ [23, 1, Conv, [160, 1, 1]], # route backbone P3
119
+ [[-1, -2], 1, Concat, [1]],
120
+
121
+ [-1, 1, Conv, [128, 1, 1]],
122
+ [-2, 1, Conv, [128, 1, 1]],
123
+ [-1, 1, Conv, [64, 3, 1]],
124
+ [-1, 1, Conv, [64, 3, 1]],
125
+ [-1, 1, Conv, [64, 3, 1]],
126
+ [-1, 1, Conv, [64, 3, 1]],
127
+ [-1, 1, Conv, [64, 3, 1]],
128
+ [-1, 1, Conv, [64, 3, 1]],
129
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130
+ [-1, 1, Conv, [160, 1, 1]], # 99
131
+
132
+ [-1, 1, DownC, [320]],
133
+ [[-1, 85], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [256, 1, 1]],
136
+ [-2, 1, Conv, [256, 1, 1]],
137
+ [-1, 1, Conv, [128, 3, 1]],
138
+ [-1, 1, Conv, [128, 3, 1]],
139
+ [-1, 1, Conv, [128, 3, 1]],
140
+ [-1, 1, Conv, [128, 3, 1]],
141
+ [-1, 1, Conv, [128, 3, 1]],
142
+ [-1, 1, Conv, [128, 3, 1]],
143
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144
+ [-1, 1, Conv, [320, 1, 1]], # 111
145
+
146
+ [-1, 1, DownC, [480]],
147
+ [[-1, 71], 1, Concat, [1]],
148
+
149
+ [-1, 1, Conv, [384, 1, 1]],
150
+ [-2, 1, Conv, [384, 1, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [-1, 1, Conv, [192, 3, 1]],
153
+ [-1, 1, Conv, [192, 3, 1]],
154
+ [-1, 1, Conv, [192, 3, 1]],
155
+ [-1, 1, Conv, [192, 3, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158
+ [-1, 1, Conv, [480, 1, 1]], # 123
159
+
160
+ [-1, 1, DownC, [640]],
161
+ [[-1, 57], 1, Concat, [1]],
162
+
163
+ [-1, 1, Conv, [512, 1, 1]],
164
+ [-2, 1, Conv, [512, 1, 1]],
165
+ [-1, 1, Conv, [256, 3, 1]],
166
+ [-1, 1, Conv, [256, 3, 1]],
167
+ [-1, 1, Conv, [256, 3, 1]],
168
+ [-1, 1, Conv, [256, 3, 1]],
169
+ [-1, 1, Conv, [256, 3, 1]],
170
+ [-1, 1, Conv, [256, 3, 1]],
171
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172
+ [-1, 1, Conv, [640, 1, 1]], # 135
173
+
174
+ [99, 1, Conv, [320, 3, 1]],
175
+ [111, 1, Conv, [640, 3, 1]],
176
+ [123, 1, Conv, [960, 3, 1]],
177
+ [135, 1, Conv, [1280, 3, 1]],
178
+
179
+ [99, 1, Conv, [320, 3, 1]],
180
+ [85, 1, Conv, [640, 3, 1]],
181
+ [71, 1, Conv, [960, 3, 1]],
182
+ [57, 1, Conv, [1280, 3, 1]],
183
+
184
+ [[136,137,138,139,140,141,142,143], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
185
+ ]
cfg/training/yolov7-e6e.yaml ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+ [-11, 1, Conv, [64, 1, 1]],
31
+ [-12, 1, Conv, [64, 1, 1]],
32
+ [-1, 1, Conv, [64, 3, 1]],
33
+ [-1, 1, Conv, [64, 3, 1]],
34
+ [-1, 1, Conv, [64, 3, 1]],
35
+ [-1, 1, Conv, [64, 3, 1]],
36
+ [-1, 1, Conv, [64, 3, 1]],
37
+ [-1, 1, Conv, [64, 3, 1]],
38
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
39
+ [-1, 1, Conv, [160, 1, 1]], # 22
40
+ [[-1, -11], 1, Shortcut, [1]], # 23
41
+
42
+ [-1, 1, DownC, [320]], # 24-P3/8
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, Conv, [128, 3, 1]],
46
+ [-1, 1, Conv, [128, 3, 1]],
47
+ [-1, 1, Conv, [128, 3, 1]],
48
+ [-1, 1, Conv, [128, 3, 1]],
49
+ [-1, 1, Conv, [128, 3, 1]],
50
+ [-1, 1, Conv, [128, 3, 1]],
51
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
52
+ [-1, 1, Conv, [320, 1, 1]], # 34
53
+ [-11, 1, Conv, [128, 1, 1]],
54
+ [-12, 1, Conv, [128, 1, 1]],
55
+ [-1, 1, Conv, [128, 3, 1]],
56
+ [-1, 1, Conv, [128, 3, 1]],
57
+ [-1, 1, Conv, [128, 3, 1]],
58
+ [-1, 1, Conv, [128, 3, 1]],
59
+ [-1, 1, Conv, [128, 3, 1]],
60
+ [-1, 1, Conv, [128, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [320, 1, 1]], # 44
63
+ [[-1, -11], 1, Shortcut, [1]], # 45
64
+
65
+ [-1, 1, DownC, [640]], # 46-P4/16
66
+ [-1, 1, Conv, [256, 1, 1]],
67
+ [-2, 1, Conv, [256, 1, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [-1, 1, Conv, [256, 3, 1]],
70
+ [-1, 1, Conv, [256, 3, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
75
+ [-1, 1, Conv, [640, 1, 1]], # 56
76
+ [-11, 1, Conv, [256, 1, 1]],
77
+ [-12, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, Conv, [256, 3, 1]],
79
+ [-1, 1, Conv, [256, 3, 1]],
80
+ [-1, 1, Conv, [256, 3, 1]],
81
+ [-1, 1, Conv, [256, 3, 1]],
82
+ [-1, 1, Conv, [256, 3, 1]],
83
+ [-1, 1, Conv, [256, 3, 1]],
84
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
85
+ [-1, 1, Conv, [640, 1, 1]], # 66
86
+ [[-1, -11], 1, Shortcut, [1]], # 67
87
+
88
+ [-1, 1, DownC, [960]], # 68-P5/32
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [384, 3, 1]],
92
+ [-1, 1, Conv, [384, 3, 1]],
93
+ [-1, 1, Conv, [384, 3, 1]],
94
+ [-1, 1, Conv, [384, 3, 1]],
95
+ [-1, 1, Conv, [384, 3, 1]],
96
+ [-1, 1, Conv, [384, 3, 1]],
97
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [960, 1, 1]], # 78
99
+ [-11, 1, Conv, [384, 1, 1]],
100
+ [-12, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [384, 3, 1]],
102
+ [-1, 1, Conv, [384, 3, 1]],
103
+ [-1, 1, Conv, [384, 3, 1]],
104
+ [-1, 1, Conv, [384, 3, 1]],
105
+ [-1, 1, Conv, [384, 3, 1]],
106
+ [-1, 1, Conv, [384, 3, 1]],
107
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108
+ [-1, 1, Conv, [960, 1, 1]], # 88
109
+ [[-1, -11], 1, Shortcut, [1]], # 89
110
+
111
+ [-1, 1, DownC, [1280]], # 90-P6/64
112
+ [-1, 1, Conv, [512, 1, 1]],
113
+ [-2, 1, Conv, [512, 1, 1]],
114
+ [-1, 1, Conv, [512, 3, 1]],
115
+ [-1, 1, Conv, [512, 3, 1]],
116
+ [-1, 1, Conv, [512, 3, 1]],
117
+ [-1, 1, Conv, [512, 3, 1]],
118
+ [-1, 1, Conv, [512, 3, 1]],
119
+ [-1, 1, Conv, [512, 3, 1]],
120
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121
+ [-1, 1, Conv, [1280, 1, 1]], # 100
122
+ [-11, 1, Conv, [512, 1, 1]],
123
+ [-12, 1, Conv, [512, 1, 1]],
124
+ [-1, 1, Conv, [512, 3, 1]],
125
+ [-1, 1, Conv, [512, 3, 1]],
126
+ [-1, 1, Conv, [512, 3, 1]],
127
+ [-1, 1, Conv, [512, 3, 1]],
128
+ [-1, 1, Conv, [512, 3, 1]],
129
+ [-1, 1, Conv, [512, 3, 1]],
130
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131
+ [-1, 1, Conv, [1280, 1, 1]], # 110
132
+ [[-1, -11], 1, Shortcut, [1]], # 111
133
+ ]
134
+
135
+ # yolov7 head
136
+ head:
137
+ [[-1, 1, SPPCSPC, [640]], # 112
138
+
139
+ [-1, 1, Conv, [480, 1, 1]],
140
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141
+ [89, 1, Conv, [480, 1, 1]], # route backbone P5
142
+ [[-1, -2], 1, Concat, [1]],
143
+
144
+ [-1, 1, Conv, [384, 1, 1]],
145
+ [-2, 1, Conv, [384, 1, 1]],
146
+ [-1, 1, Conv, [192, 3, 1]],
147
+ [-1, 1, Conv, [192, 3, 1]],
148
+ [-1, 1, Conv, [192, 3, 1]],
149
+ [-1, 1, Conv, [192, 3, 1]],
150
+ [-1, 1, Conv, [192, 3, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153
+ [-1, 1, Conv, [480, 1, 1]], # 126
154
+ [-11, 1, Conv, [384, 1, 1]],
155
+ [-12, 1, Conv, [384, 1, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [-1, 1, Conv, [192, 3, 1]],
158
+ [-1, 1, Conv, [192, 3, 1]],
159
+ [-1, 1, Conv, [192, 3, 1]],
160
+ [-1, 1, Conv, [192, 3, 1]],
161
+ [-1, 1, Conv, [192, 3, 1]],
162
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163
+ [-1, 1, Conv, [480, 1, 1]], # 136
164
+ [[-1, -11], 1, Shortcut, [1]], # 137
165
+
166
+ [-1, 1, Conv, [320, 1, 1]],
167
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168
+ [67, 1, Conv, [320, 1, 1]], # route backbone P4
169
+ [[-1, -2], 1, Concat, [1]],
170
+
171
+ [-1, 1, Conv, [256, 1, 1]],
172
+ [-2, 1, Conv, [256, 1, 1]],
173
+ [-1, 1, Conv, [128, 3, 1]],
174
+ [-1, 1, Conv, [128, 3, 1]],
175
+ [-1, 1, Conv, [128, 3, 1]],
176
+ [-1, 1, Conv, [128, 3, 1]],
177
+ [-1, 1, Conv, [128, 3, 1]],
178
+ [-1, 1, Conv, [128, 3, 1]],
179
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180
+ [-1, 1, Conv, [320, 1, 1]], # 151
181
+ [-11, 1, Conv, [256, 1, 1]],
182
+ [-12, 1, Conv, [256, 1, 1]],
183
+ [-1, 1, Conv, [128, 3, 1]],
184
+ [-1, 1, Conv, [128, 3, 1]],
185
+ [-1, 1, Conv, [128, 3, 1]],
186
+ [-1, 1, Conv, [128, 3, 1]],
187
+ [-1, 1, Conv, [128, 3, 1]],
188
+ [-1, 1, Conv, [128, 3, 1]],
189
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190
+ [-1, 1, Conv, [320, 1, 1]], # 161
191
+ [[-1, -11], 1, Shortcut, [1]], # 162
192
+
193
+ [-1, 1, Conv, [160, 1, 1]],
194
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195
+ [45, 1, Conv, [160, 1, 1]], # route backbone P3
196
+ [[-1, -2], 1, Concat, [1]],
197
+
198
+ [-1, 1, Conv, [128, 1, 1]],
199
+ [-2, 1, Conv, [128, 1, 1]],
200
+ [-1, 1, Conv, [64, 3, 1]],
201
+ [-1, 1, Conv, [64, 3, 1]],
202
+ [-1, 1, Conv, [64, 3, 1]],
203
+ [-1, 1, Conv, [64, 3, 1]],
204
+ [-1, 1, Conv, [64, 3, 1]],
205
+ [-1, 1, Conv, [64, 3, 1]],
206
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207
+ [-1, 1, Conv, [160, 1, 1]], # 176
208
+ [-11, 1, Conv, [128, 1, 1]],
209
+ [-12, 1, Conv, [128, 1, 1]],
210
+ [-1, 1, Conv, [64, 3, 1]],
211
+ [-1, 1, Conv, [64, 3, 1]],
212
+ [-1, 1, Conv, [64, 3, 1]],
213
+ [-1, 1, Conv, [64, 3, 1]],
214
+ [-1, 1, Conv, [64, 3, 1]],
215
+ [-1, 1, Conv, [64, 3, 1]],
216
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217
+ [-1, 1, Conv, [160, 1, 1]], # 186
218
+ [[-1, -11], 1, Shortcut, [1]], # 187
219
+
220
+ [-1, 1, DownC, [320]],
221
+ [[-1, 162], 1, Concat, [1]],
222
+
223
+ [-1, 1, Conv, [256, 1, 1]],
224
+ [-2, 1, Conv, [256, 1, 1]],
225
+ [-1, 1, Conv, [128, 3, 1]],
226
+ [-1, 1, Conv, [128, 3, 1]],
227
+ [-1, 1, Conv, [128, 3, 1]],
228
+ [-1, 1, Conv, [128, 3, 1]],
229
+ [-1, 1, Conv, [128, 3, 1]],
230
+ [-1, 1, Conv, [128, 3, 1]],
231
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232
+ [-1, 1, Conv, [320, 1, 1]], # 199
233
+ [-11, 1, Conv, [256, 1, 1]],
234
+ [-12, 1, Conv, [256, 1, 1]],
235
+ [-1, 1, Conv, [128, 3, 1]],
236
+ [-1, 1, Conv, [128, 3, 1]],
237
+ [-1, 1, Conv, [128, 3, 1]],
238
+ [-1, 1, Conv, [128, 3, 1]],
239
+ [-1, 1, Conv, [128, 3, 1]],
240
+ [-1, 1, Conv, [128, 3, 1]],
241
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242
+ [-1, 1, Conv, [320, 1, 1]], # 209
243
+ [[-1, -11], 1, Shortcut, [1]], # 210
244
+
245
+ [-1, 1, DownC, [480]],
246
+ [[-1, 137], 1, Concat, [1]],
247
+
248
+ [-1, 1, Conv, [384, 1, 1]],
249
+ [-2, 1, Conv, [384, 1, 1]],
250
+ [-1, 1, Conv, [192, 3, 1]],
251
+ [-1, 1, Conv, [192, 3, 1]],
252
+ [-1, 1, Conv, [192, 3, 1]],
253
+ [-1, 1, Conv, [192, 3, 1]],
254
+ [-1, 1, Conv, [192, 3, 1]],
255
+ [-1, 1, Conv, [192, 3, 1]],
256
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257
+ [-1, 1, Conv, [480, 1, 1]], # 222
258
+ [-11, 1, Conv, [384, 1, 1]],
259
+ [-12, 1, Conv, [384, 1, 1]],
260
+ [-1, 1, Conv, [192, 3, 1]],
261
+ [-1, 1, Conv, [192, 3, 1]],
262
+ [-1, 1, Conv, [192, 3, 1]],
263
+ [-1, 1, Conv, [192, 3, 1]],
264
+ [-1, 1, Conv, [192, 3, 1]],
265
+ [-1, 1, Conv, [192, 3, 1]],
266
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267
+ [-1, 1, Conv, [480, 1, 1]], # 232
268
+ [[-1, -11], 1, Shortcut, [1]], # 233
269
+
270
+ [-1, 1, DownC, [640]],
271
+ [[-1, 112], 1, Concat, [1]],
272
+
273
+ [-1, 1, Conv, [512, 1, 1]],
274
+ [-2, 1, Conv, [512, 1, 1]],
275
+ [-1, 1, Conv, [256, 3, 1]],
276
+ [-1, 1, Conv, [256, 3, 1]],
277
+ [-1, 1, Conv, [256, 3, 1]],
278
+ [-1, 1, Conv, [256, 3, 1]],
279
+ [-1, 1, Conv, [256, 3, 1]],
280
+ [-1, 1, Conv, [256, 3, 1]],
281
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282
+ [-1, 1, Conv, [640, 1, 1]], # 245
283
+ [-11, 1, Conv, [512, 1, 1]],
284
+ [-12, 1, Conv, [512, 1, 1]],
285
+ [-1, 1, Conv, [256, 3, 1]],
286
+ [-1, 1, Conv, [256, 3, 1]],
287
+ [-1, 1, Conv, [256, 3, 1]],
288
+ [-1, 1, Conv, [256, 3, 1]],
289
+ [-1, 1, Conv, [256, 3, 1]],
290
+ [-1, 1, Conv, [256, 3, 1]],
291
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292
+ [-1, 1, Conv, [640, 1, 1]], # 255
293
+ [[-1, -11], 1, Shortcut, [1]], # 256
294
+
295
+ [187, 1, Conv, [320, 3, 1]],
296
+ [210, 1, Conv, [640, 3, 1]],
297
+ [233, 1, Conv, [960, 3, 1]],
298
+ [256, 1, Conv, [1280, 3, 1]],
299
+
300
+ [186, 1, Conv, [320, 3, 1]],
301
+ [161, 1, Conv, [640, 3, 1]],
302
+ [136, 1, Conv, [960, 3, 1]],
303
+ [112, 1, Conv, [1280, 3, 1]],
304
+
305
+ [[257,258,259,260,261,262,263,264], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
306
+ ]
cfg/training/yolov7-tiny.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # yolov7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
15
+ [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
20
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
21
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
22
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
28
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
29
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
30
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
36
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
37
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
38
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
44
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
45
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
46
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28
49
+ ]
50
+
51
+ # yolov7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
54
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
69
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
70
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
71
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
81
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
82
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
83
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
91
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
92
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
93
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108
+ [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109
+ [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110
+
111
+ [[74,75,76], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/training/yolov7-w6.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [[-1, -3, -5, -6], 1, Concat, [1]],
27
+ [-1, 1, Conv, [128, 1, 1]], # 10
28
+
29
+ [-1, 1, Conv, [256, 3, 2]], # 11-P3/8
30
+ [-1, 1, Conv, [128, 1, 1]],
31
+ [-2, 1, Conv, [128, 1, 1]],
32
+ [-1, 1, Conv, [128, 3, 1]],
33
+ [-1, 1, Conv, [128, 3, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [[-1, -3, -5, -6], 1, Concat, [1]],
37
+ [-1, 1, Conv, [256, 1, 1]], # 19
38
+
39
+ [-1, 1, Conv, [512, 3, 2]], # 20-P4/16
40
+ [-1, 1, Conv, [256, 1, 1]],
41
+ [-2, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [256, 3, 1]],
43
+ [-1, 1, Conv, [256, 3, 1]],
44
+ [-1, 1, Conv, [256, 3, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [[-1, -3, -5, -6], 1, Concat, [1]],
47
+ [-1, 1, Conv, [512, 1, 1]], # 28
48
+
49
+ [-1, 1, Conv, [768, 3, 2]], # 29-P5/32
50
+ [-1, 1, Conv, [384, 1, 1]],
51
+ [-2, 1, Conv, [384, 1, 1]],
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-1, 1, Conv, [384, 3, 1]],
54
+ [-1, 1, Conv, [384, 3, 1]],
55
+ [-1, 1, Conv, [384, 3, 1]],
56
+ [[-1, -3, -5, -6], 1, Concat, [1]],
57
+ [-1, 1, Conv, [768, 1, 1]], # 37
58
+
59
+ [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64
60
+ [-1, 1, Conv, [512, 1, 1]],
61
+ [-2, 1, Conv, [512, 1, 1]],
62
+ [-1, 1, Conv, [512, 3, 1]],
63
+ [-1, 1, Conv, [512, 3, 1]],
64
+ [-1, 1, Conv, [512, 3, 1]],
65
+ [-1, 1, Conv, [512, 3, 1]],
66
+ [[-1, -3, -5, -6], 1, Concat, [1]],
67
+ [-1, 1, Conv, [1024, 1, 1]], # 46
68
+ ]
69
+
70
+ # yolov7 head
71
+ head:
72
+ [[-1, 1, SPPCSPC, [512]], # 47
73
+
74
+ [-1, 1, Conv, [384, 1, 1]],
75
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
76
+ [37, 1, Conv, [384, 1, 1]], # route backbone P5
77
+ [[-1, -2], 1, Concat, [1]],
78
+
79
+ [-1, 1, Conv, [384, 1, 1]],
80
+ [-2, 1, Conv, [384, 1, 1]],
81
+ [-1, 1, Conv, [192, 3, 1]],
82
+ [-1, 1, Conv, [192, 3, 1]],
83
+ [-1, 1, Conv, [192, 3, 1]],
84
+ [-1, 1, Conv, [192, 3, 1]],
85
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
86
+ [-1, 1, Conv, [384, 1, 1]], # 59
87
+
88
+ [-1, 1, Conv, [256, 1, 1]],
89
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
90
+ [28, 1, Conv, [256, 1, 1]], # route backbone P4
91
+ [[-1, -2], 1, Concat, [1]],
92
+
93
+ [-1, 1, Conv, [256, 1, 1]],
94
+ [-2, 1, Conv, [256, 1, 1]],
95
+ [-1, 1, Conv, [128, 3, 1]],
96
+ [-1, 1, Conv, [128, 3, 1]],
97
+ [-1, 1, Conv, [128, 3, 1]],
98
+ [-1, 1, Conv, [128, 3, 1]],
99
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100
+ [-1, 1, Conv, [256, 1, 1]], # 71
101
+
102
+ [-1, 1, Conv, [128, 1, 1]],
103
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104
+ [19, 1, Conv, [128, 1, 1]], # route backbone P3
105
+ [[-1, -2], 1, Concat, [1]],
106
+
107
+ [-1, 1, Conv, [128, 1, 1]],
108
+ [-2, 1, Conv, [128, 1, 1]],
109
+ [-1, 1, Conv, [64, 3, 1]],
110
+ [-1, 1, Conv, [64, 3, 1]],
111
+ [-1, 1, Conv, [64, 3, 1]],
112
+ [-1, 1, Conv, [64, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114
+ [-1, 1, Conv, [128, 1, 1]], # 83
115
+
116
+ [-1, 1, Conv, [256, 3, 2]],
117
+ [[-1, 71], 1, Concat, [1]], # cat
118
+
119
+ [-1, 1, Conv, [256, 1, 1]],
120
+ [-2, 1, Conv, [256, 1, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126
+ [-1, 1, Conv, [256, 1, 1]], # 93
127
+
128
+ [-1, 1, Conv, [384, 3, 2]],
129
+ [[-1, 59], 1, Concat, [1]], # cat
130
+
131
+ [-1, 1, Conv, [384, 1, 1]],
132
+ [-2, 1, Conv, [384, 1, 1]],
133
+ [-1, 1, Conv, [192, 3, 1]],
134
+ [-1, 1, Conv, [192, 3, 1]],
135
+ [-1, 1, Conv, [192, 3, 1]],
136
+ [-1, 1, Conv, [192, 3, 1]],
137
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138
+ [-1, 1, Conv, [384, 1, 1]], # 103
139
+
140
+ [-1, 1, Conv, [512, 3, 2]],
141
+ [[-1, 47], 1, Concat, [1]], # cat
142
+
143
+ [-1, 1, Conv, [512, 1, 1]],
144
+ [-2, 1, Conv, [512, 1, 1]],
145
+ [-1, 1, Conv, [256, 3, 1]],
146
+ [-1, 1, Conv, [256, 3, 1]],
147
+ [-1, 1, Conv, [256, 3, 1]],
148
+ [-1, 1, Conv, [256, 3, 1]],
149
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150
+ [-1, 1, Conv, [512, 1, 1]], # 113
151
+
152
+ [83, 1, Conv, [256, 3, 1]],
153
+ [93, 1, Conv, [512, 3, 1]],
154
+ [103, 1, Conv, [768, 3, 1]],
155
+ [113, 1, Conv, [1024, 3, 1]],
156
+
157
+ [83, 1, Conv, [320, 3, 1]],
158
+ [71, 1, Conv, [640, 3, 1]],
159
+ [59, 1, Conv, [960, 3, 1]],
160
+ [47, 1, Conv, [1280, 3, 1]],
161
+
162
+ [[114,115,116,117,118,119,120,121], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
163
+ ]
cfg/training/yolov7.yaml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [64, 3, 1]],
19
+
20
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [[-1, -3, -5, -6], 1, Concat, [1]],
28
+ [-1, 1, Conv, [256, 1, 1]], # 11
29
+
30
+ [-1, 1, MP, []],
31
+ [-1, 1, Conv, [128, 1, 1]],
32
+ [-3, 1, Conv, [128, 1, 1]],
33
+ [-1, 1, Conv, [128, 3, 2]],
34
+ [[-1, -3], 1, Concat, [1]], # 16-P3/8
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [[-1, -3, -5, -6], 1, Concat, [1]],
42
+ [-1, 1, Conv, [512, 1, 1]], # 24
43
+
44
+ [-1, 1, MP, []],
45
+ [-1, 1, Conv, [256, 1, 1]],
46
+ [-3, 1, Conv, [256, 1, 1]],
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, -3], 1, Concat, [1]], # 29-P4/16
49
+ [-1, 1, Conv, [256, 1, 1]],
50
+ [-2, 1, Conv, [256, 1, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [[-1, -3, -5, -6], 1, Concat, [1]],
56
+ [-1, 1, Conv, [1024, 1, 1]], # 37
57
+
58
+ [-1, 1, MP, []],
59
+ [-1, 1, Conv, [512, 1, 1]],
60
+ [-3, 1, Conv, [512, 1, 1]],
61
+ [-1, 1, Conv, [512, 3, 2]],
62
+ [[-1, -3], 1, Concat, [1]], # 42-P5/32
63
+ [-1, 1, Conv, [256, 1, 1]],
64
+ [-2, 1, Conv, [256, 1, 1]],
65
+ [-1, 1, Conv, [256, 3, 1]],
66
+ [-1, 1, Conv, [256, 3, 1]],
67
+ [-1, 1, Conv, [256, 3, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [[-1, -3, -5, -6], 1, Concat, [1]],
70
+ [-1, 1, Conv, [1024, 1, 1]], # 50
71
+ ]
72
+
73
+ # yolov7 head
74
+ head:
75
+ [[-1, 1, SPPCSPC, [512]], # 51
76
+
77
+ [-1, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
79
+ [37, 1, Conv, [256, 1, 1]], # route backbone P4
80
+ [[-1, -2], 1, Concat, [1]],
81
+
82
+ [-1, 1, Conv, [256, 1, 1]],
83
+ [-2, 1, Conv, [256, 1, 1]],
84
+ [-1, 1, Conv, [128, 3, 1]],
85
+ [-1, 1, Conv, [128, 3, 1]],
86
+ [-1, 1, Conv, [128, 3, 1]],
87
+ [-1, 1, Conv, [128, 3, 1]],
88
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
89
+ [-1, 1, Conv, [256, 1, 1]], # 63
90
+
91
+ [-1, 1, Conv, [128, 1, 1]],
92
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
93
+ [24, 1, Conv, [128, 1, 1]], # route backbone P3
94
+ [[-1, -2], 1, Concat, [1]],
95
+
96
+ [-1, 1, Conv, [128, 1, 1]],
97
+ [-2, 1, Conv, [128, 1, 1]],
98
+ [-1, 1, Conv, [64, 3, 1]],
99
+ [-1, 1, Conv, [64, 3, 1]],
100
+ [-1, 1, Conv, [64, 3, 1]],
101
+ [-1, 1, Conv, [64, 3, 1]],
102
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103
+ [-1, 1, Conv, [128, 1, 1]], # 75
104
+
105
+ [-1, 1, MP, []],
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-3, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 2]],
109
+ [[-1, -3, 63], 1, Concat, [1]],
110
+
111
+ [-1, 1, Conv, [256, 1, 1]],
112
+ [-2, 1, Conv, [256, 1, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [-1, 1, Conv, [128, 3, 1]],
115
+ [-1, 1, Conv, [128, 3, 1]],
116
+ [-1, 1, Conv, [128, 3, 1]],
117
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118
+ [-1, 1, Conv, [256, 1, 1]], # 88
119
+
120
+ [-1, 1, MP, []],
121
+ [-1, 1, Conv, [256, 1, 1]],
122
+ [-3, 1, Conv, [256, 1, 1]],
123
+ [-1, 1, Conv, [256, 3, 2]],
124
+ [[-1, -3, 51], 1, Concat, [1]],
125
+
126
+ [-1, 1, Conv, [512, 1, 1]],
127
+ [-2, 1, Conv, [512, 1, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [-1, 1, Conv, [256, 3, 1]],
132
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133
+ [-1, 1, Conv, [512, 1, 1]], # 101
134
+
135
+ [75, 1, RepConv, [256, 3, 1]],
136
+ [88, 1, RepConv, [512, 3, 1]],
137
+ [101, 1, RepConv, [1024, 3, 1]],
138
+
139
+ [[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
140
+ ]
cfg/training/yolov7x.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [40, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [80, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [80, 3, 1]],
19
+
20
+ [-1, 1, Conv, [160, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
30
+ [-1, 1, Conv, [320, 1, 1]], # 13
31
+
32
+ [-1, 1, MP, []],
33
+ [-1, 1, Conv, [160, 1, 1]],
34
+ [-3, 1, Conv, [160, 1, 1]],
35
+ [-1, 1, Conv, [160, 3, 2]],
36
+ [[-1, -3], 1, Concat, [1]], # 18-P3/8
37
+ [-1, 1, Conv, [128, 1, 1]],
38
+ [-2, 1, Conv, [128, 1, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [-1, 1, Conv, [128, 3, 1]],
45
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
46
+ [-1, 1, Conv, [640, 1, 1]], # 28
47
+
48
+ [-1, 1, MP, []],
49
+ [-1, 1, Conv, [320, 1, 1]],
50
+ [-3, 1, Conv, [320, 1, 1]],
51
+ [-1, 1, Conv, [320, 3, 2]],
52
+ [[-1, -3], 1, Concat, [1]], # 33-P4/16
53
+ [-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [-1, 1, Conv, [256, 3, 1]],
59
+ [-1, 1, Conv, [256, 3, 1]],
60
+ [-1, 1, Conv, [256, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [1280, 1, 1]], # 43
63
+
64
+ [-1, 1, MP, []],
65
+ [-1, 1, Conv, [640, 1, 1]],
66
+ [-3, 1, Conv, [640, 1, 1]],
67
+ [-1, 1, Conv, [640, 3, 2]],
68
+ [[-1, -3], 1, Concat, [1]], # 48-P5/32
69
+ [-1, 1, Conv, [256, 1, 1]],
70
+ [-2, 1, Conv, [256, 1, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [-1, 1, Conv, [256, 3, 1]],
75
+ [-1, 1, Conv, [256, 3, 1]],
76
+ [-1, 1, Conv, [256, 3, 1]],
77
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
78
+ [-1, 1, Conv, [1280, 1, 1]], # 58
79
+ ]
80
+
81
+ # yolov7 head
82
+ head:
83
+ [[-1, 1, SPPCSPC, [640]], # 59
84
+
85
+ [-1, 1, Conv, [320, 1, 1]],
86
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
87
+ [43, 1, Conv, [320, 1, 1]], # route backbone P4
88
+ [[-1, -2], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [256, 1, 1]],
91
+ [-2, 1, Conv, [256, 1, 1]],
92
+ [-1, 1, Conv, [256, 3, 1]],
93
+ [-1, 1, Conv, [256, 3, 1]],
94
+ [-1, 1, Conv, [256, 3, 1]],
95
+ [-1, 1, Conv, [256, 3, 1]],
96
+ [-1, 1, Conv, [256, 3, 1]],
97
+ [-1, 1, Conv, [256, 3, 1]],
98
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
99
+ [-1, 1, Conv, [320, 1, 1]], # 73
100
+
101
+ [-1, 1, Conv, [160, 1, 1]],
102
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103
+ [28, 1, Conv, [160, 1, 1]], # route backbone P3
104
+ [[-1, -2], 1, Concat, [1]],
105
+
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-2, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115
+ [-1, 1, Conv, [160, 1, 1]], # 87
116
+
117
+ [-1, 1, MP, []],
118
+ [-1, 1, Conv, [160, 1, 1]],
119
+ [-3, 1, Conv, [160, 1, 1]],
120
+ [-1, 1, Conv, [160, 3, 2]],
121
+ [[-1, -3, 73], 1, Concat, [1]],
122
+
123
+ [-1, 1, Conv, [256, 1, 1]],
124
+ [-2, 1, Conv, [256, 1, 1]],
125
+ [-1, 1, Conv, [256, 3, 1]],
126
+ [-1, 1, Conv, [256, 3, 1]],
127
+ [-1, 1, Conv, [256, 3, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132
+ [-1, 1, Conv, [320, 1, 1]], # 102
133
+
134
+ [-1, 1, MP, []],
135
+ [-1, 1, Conv, [320, 1, 1]],
136
+ [-3, 1, Conv, [320, 1, 1]],
137
+ [-1, 1, Conv, [320, 3, 2]],
138
+ [[-1, -3, 59], 1, Concat, [1]],
139
+
140
+ [-1, 1, Conv, [512, 1, 1]],
141
+ [-2, 1, Conv, [512, 1, 1]],
142
+ [-1, 1, Conv, [512, 3, 1]],
143
+ [-1, 1, Conv, [512, 3, 1]],
144
+ [-1, 1, Conv, [512, 3, 1]],
145
+ [-1, 1, Conv, [512, 3, 1]],
146
+ [-1, 1, Conv, [512, 3, 1]],
147
+ [-1, 1, Conv, [512, 3, 1]],
148
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149
+ [-1, 1, Conv, [640, 1, 1]], # 117
150
+
151
+ [87, 1, Conv, [320, 3, 1]],
152
+ [102, 1, Conv, [640, 3, 1]],
153
+ [117, 1, Conv, [1280, 3, 1]],
154
+
155
+ [[118,119,120], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
156
+ ]
data/coco.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # COCO 2017 dataset http://cocodataset.org
2
+
3
+ # download command/URL (optional)
4
+ download: bash ./scripts/get_coco.sh
5
+
6
+ # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
7
+ train: ./coco/train2017.txt # 118287 images
8
+ val: ./coco/val2017.txt # 5000 images
9
+ test: ./coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
10
+
11
+ # number of classes
12
+ nc: 80
13
+
14
+ # class names
15
+ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
16
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
17
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
18
+ 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
19
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
20
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
21
+ 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
22
+ 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
23
+ 'hair drier', 'toothbrush' ]
data/hyp.scratch.custom.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.5 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.0 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.0 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.p5.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.9 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.15 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.15 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.p6.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.9 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.15 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.15 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.tiny.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.5 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 1.0 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.1 # image translation (+/- fraction)
22
+ scale: 0.5 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.05 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.05 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
deploy/triton-inference-server/README.md ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv7 on Triton Inference Server
2
+
3
+ Instructions to deploy YOLOv7 as TensorRT engine to [Triton Inference Server](https://github.com/NVIDIA/triton-inference-server).
4
+
5
+ Triton Inference Server takes care of model deployment with many out-of-the-box benefits, like a GRPC and HTTP interface, automatic scheduling on multiple GPUs, shared memory (even on GPU), dynamic server-side batching, health metrics and memory resource management.
6
+
7
+ There are no additional dependencies needed to run this deployment, except a working docker daemon with GPU support.
8
+
9
+ ## Export TensorRT
10
+
11
+ See https://github.com/WongKinYiu/yolov7#export for more info.
12
+
13
+ ```bash
14
+ #install onnx-simplifier not listed in general yolov7 requirements.txt
15
+ pip3 install onnx-simplifier
16
+
17
+ # Pytorch Yolov7 -> ONNX with grid, EfficientNMS plugin and dynamic batch size
18
+ python export.py --weights ./yolov7.pt --grid --end2end --dynamic-batch --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
19
+ # ONNX -> TensorRT with trtexec and docker
20
+ docker run -it --rm --gpus=all nvcr.io/nvidia/tensorrt:22.06-py3
21
+ # Copy onnx -> container: docker cp yolov7.onnx <container-id>:/workspace/
22
+ # Export with FP16 precision, min batch 1, opt batch 8 and max batch 8
23
+ ./tensorrt/bin/trtexec --onnx=yolov7.onnx --minShapes=images:1x3x640x640 --optShapes=images:8x3x640x640 --maxShapes=images:8x3x640x640 --fp16 --workspace=4096 --saveEngine=yolov7-fp16-1x8x8.engine --timingCacheFile=timing.cache
24
+ # Test engine
25
+ ./tensorrt/bin/trtexec --loadEngine=yolov7-fp16-1x8x8.engine
26
+ # Copy engine -> host: docker cp <container-id>:/workspace/yolov7-fp16-1x8x8.engine .
27
+ ```
28
+
29
+ Example output of test with RTX 3090.
30
+
31
+ ```
32
+ [I] === Performance summary ===
33
+ [I] Throughput: 73.4985 qps
34
+ [I] Latency: min = 14.8578 ms, max = 15.8344 ms, mean = 15.07 ms, median = 15.0422 ms, percentile(99%) = 15.7443 ms
35
+ [I] End-to-End Host Latency: min = 25.8715 ms, max = 28.4102 ms, mean = 26.672 ms, median = 26.6082 ms, percentile(99%) = 27.8314 ms
36
+ [I] Enqueue Time: min = 0.793701 ms, max = 1.47144 ms, mean = 1.2008 ms, median = 1.28644 ms, percentile(99%) = 1.38965 ms
37
+ [I] H2D Latency: min = 1.50073 ms, max = 1.52454 ms, mean = 1.51225 ms, median = 1.51404 ms, percentile(99%) = 1.51941 ms
38
+ [I] GPU Compute Time: min = 13.3386 ms, max = 14.3186 ms, mean = 13.5448 ms, median = 13.5178 ms, percentile(99%) = 14.2151 ms
39
+ [I] D2H Latency: min = 0.00878906 ms, max = 0.0172729 ms, mean = 0.0128844 ms, median = 0.0125732 ms, percentile(99%) = 0.0166016 ms
40
+ [I] Total Host Walltime: 3.04768 s
41
+ [I] Total GPU Compute Time: 3.03404 s
42
+ [I] Explanations of the performance metrics are printed in the verbose logs.
43
+ ```
44
+ Note: 73.5 qps x batch 8 = 588 fps @ ~15ms latency.
45
+
46
+ ## Model Repository
47
+
48
+ See [Triton Model Repository Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md#model-repository) for more info.
49
+
50
+ ```bash
51
+ # Create folder structure
52
+ mkdir -p triton-deploy/models/yolov7/1/
53
+ touch triton-deploy/models/yolov7/config.pbtxt
54
+ # Place model
55
+ mv yolov7-fp16-1x8x8.engine triton-deploy/models/yolov7/1/model.plan
56
+ ```
57
+
58
+ ## Model Configuration
59
+
60
+ See [Triton Model Configuration Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#model-configuration) for more info.
61
+
62
+ Minimal configuration for `triton-deploy/models/yolov7/config.pbtxt`:
63
+
64
+ ```
65
+ name: "yolov7"
66
+ platform: "tensorrt_plan"
67
+ max_batch_size: 8
68
+ dynamic_batching { }
69
+ ```
70
+
71
+ Example repository:
72
+
73
+ ```bash
74
+ $ tree triton-deploy/
75
+ triton-deploy/
76
+ └── models
77
+ └── yolov7
78
+ ├── 1
79
+ │   └── model.plan
80
+ └── config.pbtxt
81
+
82
+ 3 directories, 2 files
83
+ ```
84
+
85
+ ## Start Triton Inference Server
86
+
87
+ ```
88
+ docker run --gpus all --rm --ipc=host --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/triton-deploy/models:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models --strict-model-config=false --log-verbose 1
89
+ ```
90
+
91
+ In the log you should see:
92
+
93
+ ```
94
+ +--------+---------+--------+
95
+ | Model | Version | Status |
96
+ +--------+---------+--------+
97
+ | yolov7 | 1 | READY |
98
+ +--------+---------+--------+
99
+ ```
100
+
101
+ ## Performance with Model Analyzer
102
+
103
+ See [Triton Model Analyzer Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_analyzer.md#model-analyzer) for more info.
104
+
105
+ Performance numbers @ RTX 3090 + AMD Ryzen 9 5950X
106
+
107
+ Example test for 16 concurrent clients using shared memory, each with batch size 1 requests:
108
+
109
+ ```bash
110
+ docker run -it --ipc=host --net=host nvcr.io/nvidia/tritonserver:22.06-py3-sdk /bin/bash
111
+
112
+ ./install/bin/perf_analyzer -m yolov7 -u 127.0.0.1:8001 -i grpc --shared-memory system --concurrency-range 16
113
+
114
+ # Result (truncated)
115
+ Concurrency: 16, throughput: 590.119 infer/sec, latency 27080 usec
116
+ ```
117
+
118
+ Throughput for 16 clients with batch size 1 is the same as for a single thread running the engine at 16 batch size locally thanks to Triton [Dynamic Batching Strategy](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#dynamic-batcher). Result without dynamic batching (disable in model configuration) considerably worse:
119
+
120
+ ```bash
121
+ # Result (truncated)
122
+ Concurrency: 16, throughput: 335.587 infer/sec, latency 47616 usec
123
+ ```
124
+
125
+ ## How to run model in your code
126
+
127
+ Example client can be found in client.py. It can run dummy input, images and videos.
128
+
129
+ ```bash
130
+ pip3 install tritonclient[all] opencv-python
131
+ python3 client.py image data/dog.jpg
132
+ ```
133
+
134
+ ![exemplary output result](data/dog_result.jpg)
135
+
136
+ ```
137
+ $ python3 client.py --help
138
+ usage: client.py [-h] [-m MODEL] [--width WIDTH] [--height HEIGHT] [-u URL] [-o OUT] [-f FPS] [-i] [-v] [-t CLIENT_TIMEOUT] [-s] [-r ROOT_CERTIFICATES] [-p PRIVATE_KEY] [-x CERTIFICATE_CHAIN] {dummy,image,video} [input]
139
+
140
+ positional arguments:
141
+ {dummy,image,video} Run mode. 'dummy' will send an emtpy buffer to the server to test if inference works. 'image' will process an image. 'video' will process a video.
142
+ input Input file to load from in image or video mode
143
+
144
+ optional arguments:
145
+ -h, --help show this help message and exit
146
+ -m MODEL, --model MODEL
147
+ Inference model name, default yolov7
148
+ --width WIDTH Inference model input width, default 640
149
+ --height HEIGHT Inference model input height, default 640
150
+ -u URL, --url URL Inference server URL, default localhost:8001
151
+ -o OUT, --out OUT Write output into file instead of displaying it
152
+ -f FPS, --fps FPS Video output fps, default 24.0 FPS
153
+ -i, --model-info Print model status, configuration and statistics
154
+ -v, --verbose Enable verbose client output
155
+ -t CLIENT_TIMEOUT, --client-timeout CLIENT_TIMEOUT
156
+ Client timeout in seconds, default no timeout
157
+ -s, --ssl Enable SSL encrypted channel to the server
158
+ -r ROOT_CERTIFICATES, --root-certificates ROOT_CERTIFICATES
159
+ File holding PEM-encoded root certificates, default none
160
+ -p PRIVATE_KEY, --private-key PRIVATE_KEY
161
+ File holding PEM-encoded private key, default is none
162
+ -x CERTIFICATE_CHAIN, --certificate-chain CERTIFICATE_CHAIN
163
+ File holding PEM-encoded certicate chain default is none
164
+ ```
deploy/triton-inference-server/boundingbox.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BoundingBox:
2
+ def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height):
3
+ self.classID = classID
4
+ self.confidence = confidence
5
+ self.x1 = x1
6
+ self.x2 = x2
7
+ self.y1 = y1
8
+ self.y2 = y2
9
+ self.u1 = x1 / image_width
10
+ self.u2 = x2 / image_width
11
+ self.v1 = y1 / image_height
12
+ self.v2 = y2 / image_height
13
+
14
+ def box(self):
15
+ return (self.x1, self.y1, self.x2, self.y2)
16
+
17
+ def width(self):
18
+ return self.x2 - self.x1
19
+
20
+ def height(self):
21
+ return self.y2 - self.y1
22
+
23
+ def center_absolute(self):
24
+ return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2))
25
+
26
+ def center_normalized(self):
27
+ return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2))
28
+
29
+ def size_absolute(self):
30
+ return (self.x2 - self.x1, self.y2 - self.y1)
31
+
32
+ def size_normalized(self):
33
+ return (self.u2 - self.u1, self.v2 - self.v1)
deploy/triton-inference-server/client.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import argparse
4
+ import numpy as np
5
+ import sys
6
+ import cv2
7
+
8
+ import tritonclient.grpc as grpcclient
9
+ from tritonclient.utils import InferenceServerException
10
+
11
+ from processing import preprocess, postprocess
12
+ from render import render_box, render_filled_box, get_text_size, render_text, RAND_COLORS
13
+ from labels import COCOLabels
14
+
15
+ INPUT_NAMES = ["images"]
16
+ OUTPUT_NAMES = ["num_dets", "det_boxes", "det_scores", "det_classes"]
17
+
18
+ if __name__ == '__main__':
19
+ parser = argparse.ArgumentParser()
20
+ parser.add_argument('mode',
21
+ choices=['dummy', 'image', 'video'],
22
+ default='dummy',
23
+ help='Run mode. \'dummy\' will send an emtpy buffer to the server to test if inference works. \'image\' will process an image. \'video\' will process a video.')
24
+ parser.add_argument('input',
25
+ type=str,
26
+ nargs='?',
27
+ help='Input file to load from in image or video mode')
28
+ parser.add_argument('-m',
29
+ '--model',
30
+ type=str,
31
+ required=False,
32
+ default='yolov7',
33
+ help='Inference model name, default yolov7')
34
+ parser.add_argument('--width',
35
+ type=int,
36
+ required=False,
37
+ default=640,
38
+ help='Inference model input width, default 640')
39
+ parser.add_argument('--height',
40
+ type=int,
41
+ required=False,
42
+ default=640,
43
+ help='Inference model input height, default 640')
44
+ parser.add_argument('-u',
45
+ '--url',
46
+ type=str,
47
+ required=False,
48
+ default='localhost:8001',
49
+ help='Inference server URL, default localhost:8001')
50
+ parser.add_argument('-o',
51
+ '--out',
52
+ type=str,
53
+ required=False,
54
+ default='',
55
+ help='Write output into file instead of displaying it')
56
+ parser.add_argument('-f',
57
+ '--fps',
58
+ type=float,
59
+ required=False,
60
+ default=24.0,
61
+ help='Video output fps, default 24.0 FPS')
62
+ parser.add_argument('-i',
63
+ '--model-info',
64
+ action="store_true",
65
+ required=False,
66
+ default=False,
67
+ help='Print model status, configuration and statistics')
68
+ parser.add_argument('-v',
69
+ '--verbose',
70
+ action="store_true",
71
+ required=False,
72
+ default=False,
73
+ help='Enable verbose client output')
74
+ parser.add_argument('-t',
75
+ '--client-timeout',
76
+ type=float,
77
+ required=False,
78
+ default=None,
79
+ help='Client timeout in seconds, default no timeout')
80
+ parser.add_argument('-s',
81
+ '--ssl',
82
+ action="store_true",
83
+ required=False,
84
+ default=False,
85
+ help='Enable SSL encrypted channel to the server')
86
+ parser.add_argument('-r',
87
+ '--root-certificates',
88
+ type=str,
89
+ required=False,
90
+ default=None,
91
+ help='File holding PEM-encoded root certificates, default none')
92
+ parser.add_argument('-p',
93
+ '--private-key',
94
+ type=str,
95
+ required=False,
96
+ default=None,
97
+ help='File holding PEM-encoded private key, default is none')
98
+ parser.add_argument('-x',
99
+ '--certificate-chain',
100
+ type=str,
101
+ required=False,
102
+ default=None,
103
+ help='File holding PEM-encoded certicate chain default is none')
104
+
105
+ FLAGS = parser.parse_args()
106
+
107
+ # Create server context
108
+ try:
109
+ triton_client = grpcclient.InferenceServerClient(
110
+ url=FLAGS.url,
111
+ verbose=FLAGS.verbose,
112
+ ssl=FLAGS.ssl,
113
+ root_certificates=FLAGS.root_certificates,
114
+ private_key=FLAGS.private_key,
115
+ certificate_chain=FLAGS.certificate_chain)
116
+ except Exception as e:
117
+ print("context creation failed: " + str(e))
118
+ sys.exit()
119
+
120
+ # Health check
121
+ if not triton_client.is_server_live():
122
+ print("FAILED : is_server_live")
123
+ sys.exit(1)
124
+
125
+ if not triton_client.is_server_ready():
126
+ print("FAILED : is_server_ready")
127
+ sys.exit(1)
128
+
129
+ if not triton_client.is_model_ready(FLAGS.model):
130
+ print("FAILED : is_model_ready")
131
+ sys.exit(1)
132
+
133
+ if FLAGS.model_info:
134
+ # Model metadata
135
+ try:
136
+ metadata = triton_client.get_model_metadata(FLAGS.model)
137
+ print(metadata)
138
+ except InferenceServerException as ex:
139
+ if "Request for unknown model" not in ex.message():
140
+ print("FAILED : get_model_metadata")
141
+ print("Got: {}".format(ex.message()))
142
+ sys.exit(1)
143
+ else:
144
+ print("FAILED : get_model_metadata")
145
+ sys.exit(1)
146
+
147
+ # Model configuration
148
+ try:
149
+ config = triton_client.get_model_config(FLAGS.model)
150
+ if not (config.config.name == FLAGS.model):
151
+ print("FAILED: get_model_config")
152
+ sys.exit(1)
153
+ print(config)
154
+ except InferenceServerException as ex:
155
+ print("FAILED : get_model_config")
156
+ print("Got: {}".format(ex.message()))
157
+ sys.exit(1)
158
+
159
+ # DUMMY MODE
160
+ if FLAGS.mode == 'dummy':
161
+ print("Running in 'dummy' mode")
162
+ print("Creating emtpy buffer filled with ones...")
163
+ inputs = []
164
+ outputs = []
165
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
166
+ inputs[0].set_data_from_numpy(np.ones(shape=(1, 3, FLAGS.width, FLAGS.height), dtype=np.float32))
167
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
168
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
169
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
170
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
171
+
172
+ print("Invoking inference...")
173
+ results = triton_client.infer(model_name=FLAGS.model,
174
+ inputs=inputs,
175
+ outputs=outputs,
176
+ client_timeout=FLAGS.client_timeout)
177
+ if FLAGS.model_info:
178
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
179
+ if len(statistics.model_stats) != 1:
180
+ print("FAILED: get_inference_statistics")
181
+ sys.exit(1)
182
+ print(statistics)
183
+ print("Done")
184
+
185
+ for output in OUTPUT_NAMES:
186
+ result = results.as_numpy(output)
187
+ print(f"Received result buffer \"{output}\" of size {result.shape}")
188
+ print(f"Naive buffer sum: {np.sum(result)}")
189
+
190
+ # IMAGE MODE
191
+ if FLAGS.mode == 'image':
192
+ print("Running in 'image' mode")
193
+ if not FLAGS.input:
194
+ print("FAILED: no input image")
195
+ sys.exit(1)
196
+
197
+ inputs = []
198
+ outputs = []
199
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
200
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
201
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
202
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
203
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
204
+
205
+ print("Creating buffer from image file...")
206
+ input_image = cv2.imread(str(FLAGS.input))
207
+ if input_image is None:
208
+ print(f"FAILED: could not load input image {str(FLAGS.input)}")
209
+ sys.exit(1)
210
+ input_image_buffer = preprocess(input_image, [FLAGS.width, FLAGS.height])
211
+ input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
212
+
213
+ inputs[0].set_data_from_numpy(input_image_buffer)
214
+
215
+ print("Invoking inference...")
216
+ results = triton_client.infer(model_name=FLAGS.model,
217
+ inputs=inputs,
218
+ outputs=outputs,
219
+ client_timeout=FLAGS.client_timeout)
220
+ if FLAGS.model_info:
221
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
222
+ if len(statistics.model_stats) != 1:
223
+ print("FAILED: get_inference_statistics")
224
+ sys.exit(1)
225
+ print(statistics)
226
+ print("Done")
227
+
228
+ for output in OUTPUT_NAMES:
229
+ result = results.as_numpy(output)
230
+ print(f"Received result buffer \"{output}\" of size {result.shape}")
231
+ print(f"Naive buffer sum: {np.sum(result)}")
232
+
233
+ num_dets = results.as_numpy(OUTPUT_NAMES[0])
234
+ det_boxes = results.as_numpy(OUTPUT_NAMES[1])
235
+ det_scores = results.as_numpy(OUTPUT_NAMES[2])
236
+ det_classes = results.as_numpy(OUTPUT_NAMES[3])
237
+ detected_objects = postprocess(num_dets, det_boxes, det_scores, det_classes, input_image.shape[1], input_image.shape[0], [FLAGS.width, FLAGS.height])
238
+ print(f"Detected objects: {len(detected_objects)}")
239
+
240
+ for box in detected_objects:
241
+ print(f"{COCOLabels(box.classID).name}: {box.confidence}")
242
+ input_image = render_box(input_image, box.box(), color=tuple(RAND_COLORS[box.classID % 64].tolist()))
243
+ size = get_text_size(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", normalised_scaling=0.6)
244
+ input_image = render_filled_box(input_image, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), color=(220, 220, 220))
245
+ input_image = render_text(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", (box.x1, box.y1), color=(30, 30, 30), normalised_scaling=0.5)
246
+
247
+ if FLAGS.out:
248
+ cv2.imwrite(FLAGS.out, input_image)
249
+ print(f"Saved result to {FLAGS.out}")
250
+ else:
251
+ cv2.imshow('image', input_image)
252
+ cv2.waitKey(0)
253
+ cv2.destroyAllWindows()
254
+
255
+ # VIDEO MODE
256
+ if FLAGS.mode == 'video':
257
+ print("Running in 'video' mode")
258
+ if not FLAGS.input:
259
+ print("FAILED: no input video")
260
+ sys.exit(1)
261
+
262
+ inputs = []
263
+ outputs = []
264
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
265
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
266
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
267
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
268
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
269
+
270
+ print("Opening input video stream...")
271
+ cap = cv2.VideoCapture(FLAGS.input)
272
+ if not cap.isOpened():
273
+ print(f"FAILED: cannot open video {FLAGS.input}")
274
+ sys.exit(1)
275
+
276
+ counter = 0
277
+ out = None
278
+ print("Invoking inference...")
279
+ while True:
280
+ ret, frame = cap.read()
281
+ if not ret:
282
+ print("failed to fetch next frame")
283
+ break
284
+
285
+ if counter == 0 and FLAGS.out:
286
+ print("Opening output video stream...")
287
+ fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
288
+ out = cv2.VideoWriter(FLAGS.out, fourcc, FLAGS.fps, (frame.shape[1], frame.shape[0]))
289
+
290
+ input_image_buffer = preprocess(frame, [FLAGS.width, FLAGS.height])
291
+ input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
292
+
293
+ inputs[0].set_data_from_numpy(input_image_buffer)
294
+
295
+ results = triton_client.infer(model_name=FLAGS.model,
296
+ inputs=inputs,
297
+ outputs=outputs,
298
+ client_timeout=FLAGS.client_timeout)
299
+
300
+ num_dets = results.as_numpy("num_dets")
301
+ det_boxes = results.as_numpy("det_boxes")
302
+ det_scores = results.as_numpy("det_scores")
303
+ det_classes = results.as_numpy("det_classes")
304
+ detected_objects = postprocess(num_dets, det_boxes, det_scores, det_classes, frame.shape[1], frame.shape[0], [FLAGS.width, FLAGS.height])
305
+ print(f"Frame {counter}: {len(detected_objects)} objects")
306
+ counter += 1
307
+
308
+ for box in detected_objects:
309
+ print(f"{COCOLabels(box.classID).name}: {box.confidence}")
310
+ frame = render_box(frame, box.box(), color=tuple(RAND_COLORS[box.classID % 64].tolist()))
311
+ size = get_text_size(frame, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", normalised_scaling=0.6)
312
+ frame = render_filled_box(frame, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), color=(220, 220, 220))
313
+ frame = render_text(frame, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", (box.x1, box.y1), color=(30, 30, 30), normalised_scaling=0.5)
314
+
315
+ if FLAGS.out:
316
+ out.write(frame)
317
+ else:
318
+ cv2.imshow('image', frame)
319
+ if cv2.waitKey(1) == ord('q'):
320
+ break
321
+
322
+ if FLAGS.model_info:
323
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
324
+ if len(statistics.model_stats) != 1:
325
+ print("FAILED: get_inference_statistics")
326
+ sys.exit(1)
327
+ print(statistics)
328
+ print("Done")
329
+
330
+ cap.release()
331
+ if FLAGS.out:
332
+ out.release()
333
+ else:
334
+ cv2.destroyAllWindows()
deploy/triton-inference-server/data/dog.jpg ADDED
deploy/triton-inference-server/data/dog_result.jpg ADDED
deploy/triton-inference-server/labels.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ class COCOLabels(Enum):
4
+ PERSON = 0
5
+ BICYCLE = 1
6
+ CAR = 2
7
+ MOTORBIKE = 3
8
+ AEROPLANE = 4
9
+ BUS = 5
10
+ TRAIN = 6
11
+ TRUCK = 7
12
+ BOAT = 8
13
+ TRAFFIC_LIGHT = 9
14
+ FIRE_HYDRANT = 10
15
+ STOP_SIGN = 11
16
+ PARKING_METER = 12
17
+ BENCH = 13
18
+ BIRD = 14
19
+ CAT = 15
20
+ DOG = 16
21
+ HORSE = 17
22
+ SHEEP = 18
23
+ COW = 19
24
+ ELEPHANT = 20
25
+ BEAR = 21
26
+ ZEBRA = 22
27
+ GIRAFFE = 23
28
+ BACKPACK = 24
29
+ UMBRELLA = 25
30
+ HANDBAG = 26
31
+ TIE = 27
32
+ SUITCASE = 28
33
+ FRISBEE = 29
34
+ SKIS = 30
35
+ SNOWBOARD = 31
36
+ SPORTS_BALL = 32
37
+ KITE = 33
38
+ BASEBALL_BAT = 34
39
+ BASEBALL_GLOVE = 35
40
+ SKATEBOARD = 36
41
+ SURFBOARD = 37
42
+ TENNIS_RACKET = 38
43
+ BOTTLE = 39
44
+ WINE_GLASS = 40
45
+ CUP = 41
46
+ FORK = 42
47
+ KNIFE = 43
48
+ SPOON = 44
49
+ BOWL = 45
50
+ BANANA = 46
51
+ APPLE = 47
52
+ SANDWICH = 48
53
+ ORANGE = 49
54
+ BROCCOLI = 50
55
+ CARROT = 51
56
+ HOT_DOG = 52
57
+ PIZZA = 53
58
+ DONUT = 54
59
+ CAKE = 55
60
+ CHAIR = 56
61
+ SOFA = 57
62
+ POTTEDPLANT = 58
63
+ BED = 59
64
+ DININGTABLE = 60
65
+ TOILET = 61
66
+ TVMONITOR = 62
67
+ LAPTOP = 63
68
+ MOUSE = 64
69
+ REMOTE = 65
70
+ KEYBOARD = 66
71
+ CELL_PHONE = 67
72
+ MICROWAVE = 68
73
+ OVEN = 69
74
+ TOASTER = 70
75
+ SINK = 71
76
+ REFRIGERATOR = 72
77
+ BOOK = 73
78
+ CLOCK = 74
79
+ VASE = 75
80
+ SCISSORS = 76
81
+ TEDDY_BEAR = 77
82
+ HAIR_DRIER = 78
83
+ TOOTHBRUSH = 79
deploy/triton-inference-server/processing.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from boundingbox import BoundingBox
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+ def preprocess(img, input_shape, letter_box=True):
7
+ if letter_box:
8
+ img_h, img_w, _ = img.shape
9
+ new_h, new_w = input_shape[0], input_shape[1]
10
+ offset_h, offset_w = 0, 0
11
+ if (new_w / img_w) <= (new_h / img_h):
12
+ new_h = int(img_h * new_w / img_w)
13
+ offset_h = (input_shape[0] - new_h) // 2
14
+ else:
15
+ new_w = int(img_w * new_h / img_h)
16
+ offset_w = (input_shape[1] - new_w) // 2
17
+ resized = cv2.resize(img, (new_w, new_h))
18
+ img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8)
19
+ img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized
20
+ else:
21
+ img = cv2.resize(img, (input_shape[1], input_shape[0]))
22
+
23
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
24
+ img = img.transpose((2, 0, 1)).astype(np.float32)
25
+ img /= 255.0
26
+ return img
27
+
28
+ def postprocess(num_dets, det_boxes, det_scores, det_classes, img_w, img_h, input_shape, letter_box=True):
29
+ boxes = det_boxes[0, :num_dets[0][0]] / np.array([input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=np.float32)
30
+ scores = det_scores[0, :num_dets[0][0]]
31
+ classes = det_classes[0, :num_dets[0][0]].astype(np.int)
32
+
33
+ old_h, old_w = img_h, img_w
34
+ offset_h, offset_w = 0, 0
35
+ if letter_box:
36
+ if (img_w / input_shape[1]) >= (img_h / input_shape[0]):
37
+ old_h = int(input_shape[0] * img_w / input_shape[1])
38
+ offset_h = (old_h - img_h) // 2
39
+ else:
40
+ old_w = int(input_shape[1] * img_h / input_shape[0])
41
+ offset_w = (old_w - img_w) // 2
42
+
43
+ boxes = boxes * np.array([old_w, old_h, old_w, old_h], dtype=np.float32)
44
+ if letter_box:
45
+ boxes -= np.array([offset_w, offset_h, offset_w, offset_h], dtype=np.float32)
46
+ boxes = boxes.astype(np.int)
47
+
48
+ detected_objects = []
49
+ for box, score, label in zip(boxes, scores, classes):
50
+ detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], img_w, img_h))
51
+ return detected_objects
deploy/triton-inference-server/render.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ import cv2
4
+
5
+ from math import sqrt
6
+
7
+ _LINE_THICKNESS_SCALING = 500.0
8
+
9
+ np.random.seed(0)
10
+ RAND_COLORS = np.random.randint(50, 255, (64, 3), "int") # used for class visu
11
+ RAND_COLORS[0] = [220, 220, 220]
12
+
13
+ def render_box(img, box, color=(200, 200, 200)):
14
+ """
15
+ Render a box. Calculates scaling and thickness automatically.
16
+ :param img: image to render into
17
+ :param box: (x1, y1, x2, y2) - box coordinates
18
+ :param color: (b, g, r) - box color
19
+ :return: updated image
20
+ """
21
+ x1, y1, x2, y2 = box
22
+ thickness = int(
23
+ round(
24
+ (img.shape[0] * img.shape[1])
25
+ / (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING)
26
+ )
27
+ )
28
+ thickness = max(1, thickness)
29
+ img = cv2.rectangle(
30
+ img,
31
+ (int(x1), int(y1)),
32
+ (int(x2), int(y2)),
33
+ color,
34
+ thickness=thickness
35
+ )
36
+ return img
37
+
38
+ def render_filled_box(img, box, color=(200, 200, 200)):
39
+ """
40
+ Render a box. Calculates scaling and thickness automatically.
41
+ :param img: image to render into
42
+ :param box: (x1, y1, x2, y2) - box coordinates
43
+ :param color: (b, g, r) - box color
44
+ :return: updated image
45
+ """
46
+ x1, y1, x2, y2 = box
47
+ img = cv2.rectangle(
48
+ img,
49
+ (int(x1), int(y1)),
50
+ (int(x2), int(y2)),
51
+ color,
52
+ thickness=cv2.FILLED
53
+ )
54
+ return img
55
+
56
+ _TEXT_THICKNESS_SCALING = 700.0
57
+ _TEXT_SCALING = 520.0
58
+
59
+
60
+ def get_text_size(img, text, normalised_scaling=1.0):
61
+ """
62
+ Get calculated text size (as box width and height)
63
+ :param img: image reference, used to determine appropriate text scaling
64
+ :param text: text to display
65
+ :param normalised_scaling: additional normalised scaling. Default 1.0.
66
+ :return: (width, height) - width and height of text box
67
+ """
68
+ thickness = int(
69
+ round(
70
+ (img.shape[0] * img.shape[1])
71
+ / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
72
+ )
73
+ * normalised_scaling
74
+ )
75
+ thickness = max(1, thickness)
76
+ scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
77
+ return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0]
78
+
79
+
80
+ def render_text(img, text, pos, color=(200, 200, 200), normalised_scaling=1.0):
81
+ """
82
+ Render a text into the image. Calculates scaling and thickness automatically.
83
+ :param img: image to render into
84
+ :param text: text to display
85
+ :param pos: (x, y) - upper left coordinates of render position
86
+ :param color: (b, g, r) - text color
87
+ :param normalised_scaling: additional normalised scaling. Default 1.0.
88
+ :return: updated image
89
+ """
90
+ x, y = pos
91
+ thickness = int(
92
+ round(
93
+ (img.shape[0] * img.shape[1])
94
+ / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
95
+ )
96
+ * normalised_scaling
97
+ )
98
+ thickness = max(1, thickness)
99
+ scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
100
+ size = get_text_size(img, text, normalised_scaling)
101
+ cv2.putText(
102
+ img,
103
+ text,
104
+ (int(x), int(y + size[1])),
105
+ cv2.FONT_HERSHEY_SIMPLEX,
106
+ scaling,
107
+ color,
108
+ thickness=thickness,
109
+ )
110
+ return img
detect.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import time
3
+ from pathlib import Path
4
+
5
+ import cv2
6
+ import torch
7
+ import torch.backends.cudnn as cudnn
8
+ from numpy import random
9
+
10
+ from models.experimental import attempt_load
11
+ from utils.datasets import LoadStreams, LoadImages
12
+ from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
13
+ scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
14
+ from utils.plots import plot_one_box
15
+ from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
16
+
17
+
18
+ def detect(save_img=False):
19
+ source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
20
+ save_img = not opt.nosave and not source.endswith('.txt') # save inference images
21
+ webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
22
+ ('rtsp://', 'rtmp://', 'http://', 'https://'))
23
+
24
+ # Directories
25
+ save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
26
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
27
+
28
+ # Initialize
29
+ set_logging()
30
+ device = select_device(opt.device)
31
+ half = device.type != 'cpu' # half precision only supported on CUDA
32
+
33
+ # Load model
34
+ model = attempt_load(weights, map_location=device) # load FP32 model
35
+ stride = int(model.stride.max()) # model stride
36
+ imgsz = check_img_size(imgsz, s=stride) # check img_size
37
+
38
+ if trace:
39
+ model = TracedModel(model, device, opt.img_size)
40
+
41
+ if half:
42
+ model.half() # to FP16
43
+
44
+ # Second-stage classifier
45
+ classify = False
46
+ if classify:
47
+ modelc = load_classifier(name='resnet101', n=2) # initialize
48
+ modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
49
+
50
+ # Set Dataloader
51
+ vid_path, vid_writer = None, None
52
+ if webcam:
53
+ view_img = check_imshow()
54
+ cudnn.benchmark = True # set True to speed up constant image size inference
55
+ dataset = LoadStreams(source, img_size=imgsz, stride=stride)
56
+ else:
57
+ dataset = LoadImages(source, img_size=imgsz, stride=stride)
58
+
59
+ # Get names and colors
60
+ names = model.module.names if hasattr(model, 'module') else model.names
61
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
62
+
63
+ # Run inference
64
+ if device.type != 'cpu':
65
+ model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
66
+ old_img_w = old_img_h = imgsz
67
+ old_img_b = 1
68
+
69
+ t0 = time.time()
70
+ for path, img, im0s, vid_cap in dataset:
71
+ img = torch.from_numpy(img).to(device)
72
+ img = img.half() if half else img.float() # uint8 to fp16/32
73
+ img /= 255.0 # 0 - 255 to 0.0 - 1.0
74
+ if img.ndimension() == 3:
75
+ img = img.unsqueeze(0)
76
+
77
+ # Warmup
78
+ if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
79
+ old_img_b = img.shape[0]
80
+ old_img_h = img.shape[2]
81
+ old_img_w = img.shape[3]
82
+ for i in range(3):
83
+ model(img, augment=opt.augment)[0]
84
+
85
+ # Inference
86
+ t1 = time_synchronized()
87
+ with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
88
+ pred = model(img, augment=opt.augment)[0]
89
+ t2 = time_synchronized()
90
+
91
+ # Apply NMS
92
+ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
93
+ t3 = time_synchronized()
94
+
95
+ # Apply Classifier
96
+ if classify:
97
+ pred = apply_classifier(pred, modelc, img, im0s)
98
+
99
+ # Process detections
100
+ for i, det in enumerate(pred): # detections per image
101
+ if webcam: # batch_size >= 1
102
+ p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
103
+ else:
104
+ p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
105
+
106
+ p = Path(p) # to Path
107
+ save_path = str(save_dir / p.name) # img.jpg
108
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
109
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
110
+ if len(det):
111
+ # Rescale boxes from img_size to im0 size
112
+ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
113
+
114
+ # Print results
115
+ for c in det[:, -1].unique():
116
+ n = (det[:, -1] == c).sum() # detections per class
117
+ s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
118
+
119
+ # Write results
120
+ for *xyxy, conf, cls in reversed(det):
121
+ if save_txt: # Write to file
122
+ xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
123
+ line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
124
+ with open(txt_path + '.txt', 'a') as f:
125
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
126
+
127
+ if save_img or view_img: # Add bbox to image
128
+ label = f'{names[int(cls)]} {conf:.2f}'
129
+ plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)
130
+
131
+ # Print time (inference + NMS)
132
+ print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
133
+
134
+ # Stream results
135
+ if view_img:
136
+ cv2.imshow(str(p), im0)
137
+ cv2.waitKey(1) # 1 millisecond
138
+
139
+ # Save results (image with detections)
140
+ if save_img:
141
+ if dataset.mode == 'image':
142
+ cv2.imwrite(save_path, im0)
143
+ print(f" The image with the result is saved in: {save_path}")
144
+ else: # 'video' or 'stream'
145
+ if vid_path != save_path: # new video
146
+ vid_path = save_path
147
+ if isinstance(vid_writer, cv2.VideoWriter):
148
+ vid_writer.release() # release previous video writer
149
+ if vid_cap: # video
150
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
151
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
152
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
153
+ else: # stream
154
+ fps, w, h = 30, im0.shape[1], im0.shape[0]
155
+ save_path += '.mp4'
156
+ vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
157
+ vid_writer.write(im0)
158
+
159
+ if save_txt or save_img:
160
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
161
+ #print(f"Results saved to {save_dir}{s}")
162
+
163
+ print(f'Done. ({time.time() - t0:.3f}s)')
164
+
165
+
166
+ if __name__ == '__main__':
167
+ parser = argparse.ArgumentParser()
168
+ parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
169
+ parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
170
+ parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
171
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
172
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
173
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
174
+ parser.add_argument('--view-img', action='store_true', help='display results')
175
+ parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
176
+ parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
177
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
178
+ parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
179
+ parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
180
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
181
+ parser.add_argument('--update', action='store_true', help='update all models')
182
+ parser.add_argument('--project', default='runs/detect', help='save results to project/name')
183
+ parser.add_argument('--name', default='exp', help='save results to project/name')
184
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
185
+ parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
186
+ opt = parser.parse_args()
187
+ print(opt)
188
+ #check_requirements(exclude=('pycocotools', 'thop'))
189
+
190
+ with torch.no_grad():
191
+ if opt.update: # update all models (to fix SourceChangeWarning)
192
+ for opt.weights in ['yolov7.pt']:
193
+ detect()
194
+ strip_optimizer(opt.weights)
195
+ else:
196
+ detect()
detect_or_track.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import time
3
+ from pathlib import Path
4
+ import cv2
5
+ import torch
6
+ import torch.backends.cudnn as cudnn
7
+ from numpy import random
8
+
9
+ from models.experimental import attempt_load
10
+ from utils.datasets import LoadStreams, LoadImages
11
+ from utils.general import check_img_size, check_requirements, \
12
+ check_imshow, non_max_suppression, apply_classifier, \
13
+ scale_coords, xyxy2xywh, strip_optimizer, set_logging, \
14
+ increment_path
15
+ from utils.plots import plot_one_box
16
+ from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel
17
+
18
+ from sort import *
19
+
20
+
21
+ """Function to Draw Bounding boxes"""
22
+ def draw_boxes(img, bbox, identities=None, categories=None, confidences = None, names=None, colors = None):
23
+ for i, box in enumerate(bbox):
24
+ x1, y1, x2, y2 = [int(i) for i in box]
25
+ tl = opt.thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
26
+
27
+ cat = int(categories[i]) if categories is not None else 0
28
+ id = int(identities[i]) if identities is not None else 0
29
+ # conf = confidences[i] if confidences is not None else 0
30
+
31
+ color = colors[cat]
32
+
33
+ if not opt.nobbox:
34
+ cv2.rectangle(img, (x1, y1), (x2, y2), color, tl)
35
+
36
+ if not opt.nolabel:
37
+ label = str(id) + ":"+ names[cat] if identities is not None else f'{names[cat]} {confidences[i]:.2f}'
38
+ tf = max(tl - 1, 1) # font thickness
39
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
40
+ c2 = x1 + t_size[0], y1 - t_size[1] - 3
41
+ cv2.rectangle(img, (x1, y1), c2, color, -1, cv2.LINE_AA) # filled
42
+ cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
43
+
44
+
45
+ return img
46
+
47
+
48
+ def detect(save_img=False):
49
+ source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
50
+ save_img = not opt.nosave and not source.endswith('.txt') # save inference images
51
+ webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
52
+ ('rtsp://', 'rtmp://', 'http://', 'https://'))
53
+ save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
54
+ if not opt.nosave:
55
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
56
+
57
+ # Initialize
58
+ set_logging()
59
+ device = select_device(opt.device)
60
+ half = device.type != 'cpu' # half precision only supported on CUDA
61
+
62
+ # Load model
63
+ model = attempt_load(weights, map_location=device) # load FP32 model
64
+ stride = int(model.stride.max()) # model stride
65
+ imgsz = check_img_size(imgsz, s=stride) # check img_size
66
+
67
+ if trace:
68
+ model = TracedModel(model, device, opt.img_size)
69
+
70
+ if half:
71
+ model.half() # to FP16
72
+
73
+ # Second-stage classifier
74
+ classify = False
75
+ if classify:
76
+ modelc = load_classifier(name='resnet101', n=2) # initialize
77
+ modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
78
+
79
+ # Set Dataloader
80
+ vid_path, vid_writer = None, None
81
+ if webcam:
82
+ view_img = check_imshow()
83
+ cudnn.benchmark = True # set True to speed up constant image size inference
84
+ dataset = LoadStreams(source, img_size=imgsz, stride=stride)
85
+ else:
86
+ dataset = LoadImages(source, img_size=imgsz, stride=stride)
87
+
88
+ # Get names and colors
89
+ names = model.module.names if hasattr(model, 'module') else model.names
90
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
91
+
92
+ # Run inference
93
+ if device.type != 'cpu':
94
+ model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
95
+ old_img_w = old_img_h = imgsz
96
+ old_img_b = 1
97
+
98
+ t0 = time.time()
99
+ ###################################
100
+ startTime = 0
101
+ ###################################
102
+ for path, img, im0s, vid_cap in dataset:
103
+ img = torch.from_numpy(img).to(device)
104
+ img = img.half() if half else img.float() # uint8 to fp16/32
105
+ img /= 255.0 # 0 - 255 to 0.0 - 1.0
106
+ if img.ndimension() == 3:
107
+ img = img.unsqueeze(0)
108
+
109
+ # Warmup
110
+ if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
111
+ old_img_b = img.shape[0]
112
+ old_img_h = img.shape[2]
113
+ old_img_w = img.shape[3]
114
+ for i in range(3):
115
+ model(img, augment=opt.augment)[0]
116
+
117
+ # Inference
118
+ t1 = time_synchronized()
119
+ pred = model(img, augment=opt.augment)[0]
120
+ t2 = time_synchronized()
121
+
122
+ # Apply NMS
123
+ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
124
+ t3 = time_synchronized()
125
+
126
+ # Apply Classifier
127
+ if classify:
128
+ pred = apply_classifier(pred, modelc, img, im0s)
129
+
130
+ # Process detections
131
+ for i, det in enumerate(pred): # detections per image
132
+ if webcam: # batch_size >= 1
133
+ p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
134
+ else:
135
+ p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
136
+
137
+ p = Path(p) # to Path
138
+ save_path = str(save_dir / p.name) # img.jpg
139
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
140
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
141
+ if len(det):
142
+ # Rescale boxes from img_size to im0 size
143
+ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
144
+
145
+ # Print results
146
+ for c in det[:, -1].unique():
147
+ n = (det[:, -1] == c).sum() # detections per class
148
+ s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
149
+
150
+ dets_to_sort = np.empty((0,6))
151
+ # NOTE: We send in detected object class too
152
+ for x1,y1,x2,y2,conf,detclass in det.cpu().detach().numpy():
153
+ dets_to_sort = np.vstack((dets_to_sort,
154
+ np.array([x1, y1, x2, y2, conf, detclass])))
155
+
156
+
157
+ if opt.track:
158
+
159
+ tracked_dets = sort_tracker.update(dets_to_sort, opt.unique_track_color)
160
+ tracks =sort_tracker.getTrackers()
161
+
162
+ # draw boxes for visualization
163
+ if len(tracked_dets)>0:
164
+ bbox_xyxy = tracked_dets[:,:4]
165
+ identities = tracked_dets[:, 8]
166
+ categories = tracked_dets[:, 4]
167
+ confidences = None
168
+
169
+ if opt.show_track:
170
+ #loop over tracks
171
+ for t, track in enumerate(tracks):
172
+
173
+ track_color = colors[int(track.detclass)] if not opt.unique_track_color else sort_tracker.color_list[t]
174
+
175
+ [cv2.line(im0, (int(track.centroidarr[i][0]),
176
+ int(track.centroidarr[i][1])),
177
+ (int(track.centroidarr[i+1][0]),
178
+ int(track.centroidarr[i+1][1])),
179
+ track_color, thickness=opt.thickness)
180
+ for i,_ in enumerate(track.centroidarr)
181
+ if i < len(track.centroidarr)-1 ]
182
+ else:
183
+ bbox_xyxy = dets_to_sort[:,:4]
184
+ identities = None
185
+ categories = dets_to_sort[:, 5]
186
+ confidences = dets_to_sort[:, 4]
187
+
188
+ im0 = draw_boxes(im0, bbox_xyxy, identities, categories, confidences, names, colors)
189
+
190
+
191
+
192
+
193
+
194
+ # Print time (inference + NMS)
195
+ print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
196
+
197
+ # Stream results
198
+ ######################################################
199
+ if dataset.mode != 'image' and opt.show_fps:
200
+ currentTime = time.time()
201
+
202
+ fps = 1/(currentTime - startTime)
203
+ startTime = currentTime
204
+ cv2.putText(im0, "FPS: " + str(int(fps)), (20, 70), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0),2)
205
+
206
+ #######################################################
207
+ if view_img:
208
+ cv2.imshow(str(p), im0)
209
+ cv2.waitKey(1) # 1 millisecond
210
+
211
+ # Save results (image with detections)
212
+ if save_img:
213
+ if dataset.mode == 'image':
214
+ cv2.imwrite(save_path, im0)
215
+ print(f" The image with the result is saved in: {save_path}")
216
+ else: # 'video' or 'stream'
217
+ if vid_path != save_path: # new video
218
+ vid_path = save_path
219
+ if isinstance(vid_writer, cv2.VideoWriter):
220
+ vid_writer.release() # release previous video writer
221
+ if vid_cap: # video
222
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
223
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
224
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
225
+ else: # stream
226
+ fps, w, h = 30, im0.shape[1], im0.shape[0]
227
+ save_path += '.mp4'
228
+ vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
229
+ vid_writer.write(im0)
230
+
231
+ if save_txt or save_img:
232
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
233
+ #print(f"Results saved to {save_dir}{s}")
234
+
235
+ print(f'Done. ({time.time() - t0:.3f}s)')
236
+
237
+
238
+ if __name__ == '__main__':
239
+ parser = argparse.ArgumentParser()
240
+ parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
241
+ parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
242
+ parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
243
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
244
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
245
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
246
+ parser.add_argument('--view-img', action='store_true', help='display results')
247
+ parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
248
+ parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
249
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
250
+ parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
251
+ parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
252
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
253
+ parser.add_argument('--update', action='store_true', help='update all models')
254
+ parser.add_argument('--project', default='runs/detect', help='save results to project/name')
255
+ parser.add_argument('--name', default='exp', help='save results to project/name')
256
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
257
+ parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
258
+
259
+ parser.add_argument('--track', action='store_true', help='run tracking')
260
+ parser.add_argument('--show-track', action='store_true', help='show tracked path')
261
+ parser.add_argument('--show-fps', action='store_true', help='show fps')
262
+ parser.add_argument('--thickness', type=int, default=2, help='bounding box and font size thickness')
263
+ parser.add_argument('--seed', type=int, default=1, help='random seed to control bbox colors')
264
+ parser.add_argument('--nobbox', action='store_true', help='don`t show bounding box')
265
+ parser.add_argument('--nolabel', action='store_true', help='don`t show label')
266
+ parser.add_argument('--unique-track-color', action='store_true', help='show each track in unique color')
267
+
268
+
269
+ opt = parser.parse_args()
270
+ print(opt)
271
+ np.random.seed(opt.seed)
272
+
273
+ sort_tracker = Sort(max_age=5,
274
+ min_hits=2,
275
+ iou_threshold=0.2)
276
+
277
+ #check_requirements(exclude=('pycocotools', 'thop'))
278
+
279
+ with torch.no_grad():
280
+ if opt.update: # update all models (to fix SourceChangeWarning)
281
+ for opt.weights in ['yolov7.pt']:
282
+ detect()
283
+ strip_optimizer(opt.weights)
284
+ else:
285
+ detect()
export.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ import time
4
+ import warnings
5
+
6
+ sys.path.append('./') # to run '$ python *.py' files in subdirectories
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from torch.utils.mobile_optimizer import optimize_for_mobile
11
+
12
+ import models
13
+ from models.experimental import attempt_load, End2End
14
+ from utils.activations import Hardswish, SiLU
15
+ from utils.general import set_logging, check_img_size
16
+ from utils.torch_utils import select_device
17
+ from utils.add_nms import RegisterNMS
18
+
19
+ if __name__ == '__main__':
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument('--weights', type=str, default='./yolor-csp-c.pt', help='weights path')
22
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
23
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
24
+ parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
25
+ parser.add_argument('--dynamic-batch', action='store_true', help='dynamic batch onnx for tensorrt and onnx-runtime')
26
+ parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
27
+ parser.add_argument('--end2end', action='store_true', help='export end2end onnx')
28
+ parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms')
29
+ parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
30
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
31
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS')
32
+ parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
33
+ parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
34
+ parser.add_argument('--include-nms', action='store_true', help='export end2end onnx')
35
+ parser.add_argument('--fp16', action='store_true', help='CoreML FP16 half-precision export')
36
+ parser.add_argument('--int8', action='store_true', help='CoreML INT8 quantization')
37
+ opt = parser.parse_args()
38
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
39
+ opt.dynamic = opt.dynamic and not opt.end2end
40
+ opt.dynamic = False if opt.dynamic_batch else opt.dynamic
41
+ print(opt)
42
+ set_logging()
43
+ t = time.time()
44
+
45
+ # Load PyTorch model
46
+ device = select_device(opt.device)
47
+ model = attempt_load(opt.weights, map_location=device) # load FP32 model
48
+ labels = model.names
49
+
50
+ # Checks
51
+ gs = int(max(model.stride)) # grid size (max stride)
52
+ opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
53
+
54
+ # Input
55
+ img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
56
+
57
+ # Update model
58
+ for k, m in model.named_modules():
59
+ m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
60
+ if isinstance(m, models.common.Conv): # assign export-friendly activations
61
+ if isinstance(m.act, nn.Hardswish):
62
+ m.act = Hardswish()
63
+ elif isinstance(m.act, nn.SiLU):
64
+ m.act = SiLU()
65
+ # elif isinstance(m, models.yolo.Detect):
66
+ # m.forward = m.forward_export # assign forward (optional)
67
+ model.model[-1].export = not opt.grid # set Detect() layer grid export
68
+ y = model(img) # dry run
69
+ if opt.include_nms:
70
+ model.model[-1].include_nms = True
71
+ y = None
72
+
73
+ # TorchScript export
74
+ try:
75
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
76
+ f = opt.weights.replace('.pt', '.torchscript.pt') # filename
77
+ ts = torch.jit.trace(model, img, strict=False)
78
+ ts.save(f)
79
+ print('TorchScript export success, saved as %s' % f)
80
+ except Exception as e:
81
+ print('TorchScript export failure: %s' % e)
82
+
83
+ # CoreML export
84
+ try:
85
+ import coremltools as ct
86
+
87
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88
+ # convert model from torchscript and apply pixel scaling as per detect.py
89
+ ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90
+ bits, mode = (8, 'kmeans_lut') if opt.int8 else (16, 'linear') if opt.fp16 else (32, None)
91
+ if bits < 32:
92
+ if sys.platform.lower() == 'darwin': # quantization only supported on macOS
93
+ with warnings.catch_warnings():
94
+ warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning
95
+ ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
96
+ else:
97
+ print('quantization only supported on macOS, skipping...')
98
+
99
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
100
+ ct_model.save(f)
101
+ print('CoreML export success, saved as %s' % f)
102
+ except Exception as e:
103
+ print('CoreML export failure: %s' % e)
104
+
105
+ # TorchScript-Lite export
106
+ try:
107
+ print('\nStarting TorchScript-Lite export with torch %s...' % torch.__version__)
108
+ f = opt.weights.replace('.pt', '.torchscript.ptl') # filename
109
+ tsl = torch.jit.trace(model, img, strict=False)
110
+ tsl = optimize_for_mobile(tsl)
111
+ tsl._save_for_lite_interpreter(f)
112
+ print('TorchScript-Lite export success, saved as %s' % f)
113
+ except Exception as e:
114
+ print('TorchScript-Lite export failure: %s' % e)
115
+
116
+ # ONNX export
117
+ try:
118
+ import onnx
119
+
120
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
121
+ f = opt.weights.replace('.pt', '.onnx') # filename
122
+ model.eval()
123
+ output_names = ['classes', 'boxes'] if y is None else ['output']
124
+ dynamic_axes = None
125
+ if opt.dynamic:
126
+ dynamic_axes = {'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
127
+ 'output': {0: 'batch', 2: 'y', 3: 'x'}}
128
+ if opt.dynamic_batch:
129
+ opt.batch_size = 'batch'
130
+ dynamic_axes = {
131
+ 'images': {
132
+ 0: 'batch',
133
+ }, }
134
+ if opt.end2end and opt.max_wh is None:
135
+ output_axes = {
136
+ 'num_dets': {0: 'batch'},
137
+ 'det_boxes': {0: 'batch'},
138
+ 'det_scores': {0: 'batch'},
139
+ 'det_classes': {0: 'batch'},
140
+ }
141
+ else:
142
+ output_axes = {
143
+ 'output': {0: 'batch'},
144
+ }
145
+ dynamic_axes.update(output_axes)
146
+ if opt.grid:
147
+ if opt.end2end:
148
+ print('\nStarting export end2end onnx model for %s...' % 'TensorRT' if opt.max_wh is None else 'onnxruntime')
149
+ model = End2End(model,opt.topk_all,opt.iou_thres,opt.conf_thres,opt.max_wh,device,len(labels))
150
+ if opt.end2end and opt.max_wh is None:
151
+ output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes']
152
+ shapes = [opt.batch_size, 1, opt.batch_size, opt.topk_all, 4,
153
+ opt.batch_size, opt.topk_all, opt.batch_size, opt.topk_all]
154
+ else:
155
+ output_names = ['output']
156
+ else:
157
+ model.model[-1].concat = True
158
+
159
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
160
+ output_names=output_names,
161
+ dynamic_axes=dynamic_axes)
162
+
163
+ # Checks
164
+ onnx_model = onnx.load(f) # load onnx model
165
+ onnx.checker.check_model(onnx_model) # check onnx model
166
+
167
+ if opt.end2end and opt.max_wh is None:
168
+ for i in onnx_model.graph.output:
169
+ for j in i.type.tensor_type.shape.dim:
170
+ j.dim_param = str(shapes.pop(0))
171
+
172
+ # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
173
+
174
+ # # Metadata
175
+ # d = {'stride': int(max(model.stride))}
176
+ # for k, v in d.items():
177
+ # meta = onnx_model.metadata_props.add()
178
+ # meta.key, meta.value = k, str(v)
179
+ # onnx.save(onnx_model, f)
180
+
181
+ if opt.simplify:
182
+ try:
183
+ import onnxsim
184
+
185
+ print('\nStarting to simplify ONNX...')
186
+ onnx_model, check = onnxsim.simplify(onnx_model)
187
+ assert check, 'assert check failed'
188
+ except Exception as e:
189
+ print(f'Simplifier failure: {e}')
190
+
191
+ # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
192
+ onnx.save(onnx_model,f)
193
+ print('ONNX export success, saved as %s' % f)
194
+
195
+ if opt.include_nms:
196
+ print('Registering NMS plugin for ONNX...')
197
+ mo = RegisterNMS(f)
198
+ mo.register_nms()
199
+ mo.save(f)
200
+
201
+ except Exception as e:
202
+ print('ONNX export failure: %s' % e)
203
+
204
+ # Finish
205
+ print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
figure/horses_prediction.jpg ADDED
figure/mask.png ADDED
figure/performance.png ADDED
figure/pose.png ADDED
figure/tennis.jpg ADDED