Myogyi commited on
Commit
2913b7b
·
1 Parent(s): c7d7158

Upload 107 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. LICENSE.md +674 -0
  3. README.md +310 -12
  4. __pycache__/test.cpython-311.pyc +0 -0
  5. __pycache__/torch.cpython-310.pyc +0 -0
  6. __pycache__/torch.cpython-311.pyc +0 -0
  7. app.py +190 -0
  8. cfg/baseline/r50-csp.yaml +49 -0
  9. cfg/baseline/x50-csp.yaml +49 -0
  10. cfg/baseline/yolor-csp-x.yaml +52 -0
  11. cfg/baseline/yolor-csp.yaml +52 -0
  12. cfg/baseline/yolor-d6.yaml +63 -0
  13. cfg/baseline/yolor-e6.yaml +63 -0
  14. cfg/baseline/yolor-p6.yaml +63 -0
  15. cfg/baseline/yolor-w6.yaml +63 -0
  16. cfg/baseline/yolov3-spp.yaml +51 -0
  17. cfg/baseline/yolov3.yaml +51 -0
  18. cfg/baseline/yolov4-csp.yaml +52 -0
  19. cfg/deploy/yolov7-d6.yaml +202 -0
  20. cfg/deploy/yolov7-e6.yaml +180 -0
  21. cfg/deploy/yolov7-e6e.yaml +301 -0
  22. cfg/deploy/yolov7-tiny-silu.yaml +112 -0
  23. cfg/deploy/yolov7-tiny.yaml +112 -0
  24. cfg/deploy/yolov7-w6.yaml +158 -0
  25. cfg/deploy/yolov7.yaml +140 -0
  26. cfg/deploy/yolov7x.yaml +156 -0
  27. cfg/training/yolov7-d6.yaml +207 -0
  28. cfg/training/yolov7-e6.yaml +185 -0
  29. cfg/training/yolov7-e6e.yaml +306 -0
  30. cfg/training/yolov7-tiny.yaml +112 -0
  31. cfg/training/yolov7-w6.yaml +163 -0
  32. cfg/training/yolov7.yaml +140 -0
  33. cfg/training/yolov7x.yaml +156 -0
  34. data/coco.yaml +23 -0
  35. data/hyp.scratch.custom.yaml +31 -0
  36. data/hyp.scratch.p5.yaml +31 -0
  37. data/hyp.scratch.p6.yaml +31 -0
  38. data/hyp.scratch.tiny.yaml +31 -0
  39. deploy/triton-inference-server/README.md +164 -0
  40. deploy/triton-inference-server/boundingbox.py +33 -0
  41. deploy/triton-inference-server/client.py +334 -0
  42. deploy/triton-inference-server/labels.py +83 -0
  43. deploy/triton-inference-server/processing.py +51 -0
  44. deploy/triton-inference-server/render.py +110 -0
  45. environment.yml +469 -0
  46. export.py +205 -0
  47. hubconf.py +97 -0
  48. interfacetest2.py +223 -0
  49. models/__init__.py +1 -0
  50. models/__pycache__/__init__.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ deploy/triton-inference-server/data/dog_result.jpg filter=lfs diff=lfs merge=lfs -text
37
+ deploy/triton-inference-server/data/dog.jpg filter=lfs diff=lfs merge=lfs -text
38
+ models/__pycache__/common.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
39
+ tools/YOLOv7-Dynamic-Batch-TENSORRT.ipynb filter=lfs diff=lfs merge=lfs -text
LICENSE.md ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU General Public License is a free, copyleft license for
11
+ software and other kinds of works.
12
+
13
+ The licenses for most software and other practical works are designed
14
+ to take away your freedom to share and change the works. By contrast,
15
+ the GNU General Public License is intended to guarantee your freedom to
16
+ share and change all versions of a program--to make sure it remains free
17
+ software for all its users. We, the Free Software Foundation, use the
18
+ GNU General Public License for most of our software; it applies also to
19
+ any other work released this way by its authors. You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ them if you wish), that you receive source code or can get it if you
26
+ want it, that you can change the software or use pieces of it in new
27
+ free programs, and that you know you can do these things.
28
+
29
+ To protect your rights, we need to prevent others from denying you
30
+ these rights or asking you to surrender the rights. Therefore, you have
31
+ certain responsibilities if you distribute copies of the software, or if
32
+ you modify it: responsibilities to respect the freedom of others.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must pass on to the recipients the same
36
+ freedoms that you received. You must make sure that they, too, receive
37
+ or can get the source code. And you must show them these terms so they
38
+ know their rights.
39
+
40
+ Developers that use the GNU GPL protect your rights with two steps:
41
+ (1) assert copyright on the software, and (2) offer you this License
42
+ giving you legal permission to copy, distribute and/or modify it.
43
+
44
+ For the developers' and authors' protection, the GPL clearly explains
45
+ that there is no warranty for this free software. For both users' and
46
+ authors' sake, the GPL requires that modified versions be marked as
47
+ changed, so that their problems will not be attributed erroneously to
48
+ authors of previous versions.
49
+
50
+ Some devices are designed to deny users access to install or run
51
+ modified versions of the software inside them, although the manufacturer
52
+ can do so. This is fundamentally incompatible with the aim of
53
+ protecting users' freedom to change the software. The systematic
54
+ pattern of such abuse occurs in the area of products for individuals to
55
+ use, which is precisely where it is most unacceptable. Therefore, we
56
+ have designed this version of the GPL to prohibit the practice for those
57
+ products. If such problems arise substantially in other domains, we
58
+ stand ready to extend this provision to those domains in future versions
59
+ of the GPL, as needed to protect the freedom of users.
60
+
61
+ Finally, every program is threatened constantly by software patents.
62
+ States should not allow patents to restrict development and use of
63
+ software on general-purpose computers, but in those that do, we wish to
64
+ avoid the special danger that patents applied to a free program could
65
+ make it effectively proprietary. To prevent this, the GPL assures that
66
+ patents cannot be used to render the program non-free.
67
+
68
+ The precise terms and conditions for copying, distribution and
69
+ modification follow.
70
+
71
+ TERMS AND CONDITIONS
72
+
73
+ 0. Definitions.
74
+
75
+ "This License" refers to version 3 of the GNU General Public License.
76
+
77
+ "Copyright" also means copyright-like laws that apply to other kinds of
78
+ works, such as semiconductor masks.
79
+
80
+ "The Program" refers to any copyrightable work licensed under this
81
+ License. Each licensee is addressed as "you". "Licensees" and
82
+ "recipients" may be individuals or organizations.
83
+
84
+ To "modify" a work means to copy from or adapt all or part of the work
85
+ in a fashion requiring copyright permission, other than the making of an
86
+ exact copy. The resulting work is called a "modified version" of the
87
+ earlier work or a work "based on" the earlier work.
88
+
89
+ A "covered work" means either the unmodified Program or a work based
90
+ on the Program.
91
+
92
+ To "propagate" a work means to do anything with it that, without
93
+ permission, would make you directly or secondarily liable for
94
+ infringement under applicable copyright law, except executing it on a
95
+ computer or modifying a private copy. Propagation includes copying,
96
+ distribution (with or without modification), making available to the
97
+ public, and in some countries other activities as well.
98
+
99
+ To "convey" a work means any kind of propagation that enables other
100
+ parties to make or receive copies. Mere interaction with a user through
101
+ a computer network, with no transfer of a copy, is not conveying.
102
+
103
+ An interactive user interface displays "Appropriate Legal Notices"
104
+ to the extent that it includes a convenient and prominently visible
105
+ feature that (1) displays an appropriate copyright notice, and (2)
106
+ tells the user that there is no warranty for the work (except to the
107
+ extent that warranties are provided), that licensees may convey the
108
+ work under this License, and how to view a copy of this License. If
109
+ the interface presents a list of user commands or options, such as a
110
+ menu, a prominent item in the list meets this criterion.
111
+
112
+ 1. Source Code.
113
+
114
+ The "source code" for a work means the preferred form of the work
115
+ for making modifications to it. "Object code" means any non-source
116
+ form of a work.
117
+
118
+ A "Standard Interface" means an interface that either is an official
119
+ standard defined by a recognized standards body, or, in the case of
120
+ interfaces specified for a particular programming language, one that
121
+ is widely used among developers working in that language.
122
+
123
+ The "System Libraries" of an executable work include anything, other
124
+ than the work as a whole, that (a) is included in the normal form of
125
+ packaging a Major Component, but which is not part of that Major
126
+ Component, and (b) serves only to enable use of the work with that
127
+ Major Component, or to implement a Standard Interface for which an
128
+ implementation is available to the public in source code form. A
129
+ "Major Component", in this context, means a major essential component
130
+ (kernel, window system, and so on) of the specific operating system
131
+ (if any) on which the executable work runs, or a compiler used to
132
+ produce the work, or an object code interpreter used to run it.
133
+
134
+ The "Corresponding Source" for a work in object code form means all
135
+ the source code needed to generate, install, and (for an executable
136
+ work) run the object code and to modify the work, including scripts to
137
+ control those activities. However, it does not include the work's
138
+ System Libraries, or general-purpose tools or generally available free
139
+ programs which are used unmodified in performing those activities but
140
+ which are not part of the work. For example, Corresponding Source
141
+ includes interface definition files associated with source files for
142
+ the work, and the source code for shared libraries and dynamically
143
+ linked subprograms that the work is specifically designed to require,
144
+ such as by intimate data communication or control flow between those
145
+ subprograms and other parts of the work.
146
+
147
+ The Corresponding Source need not include anything that users
148
+ can regenerate automatically from other parts of the Corresponding
149
+ Source.
150
+
151
+ The Corresponding Source for a work in source code form is that
152
+ same work.
153
+
154
+ 2. Basic Permissions.
155
+
156
+ All rights granted under this License are granted for the term of
157
+ copyright on the Program, and are irrevocable provided the stated
158
+ conditions are met. This License explicitly affirms your unlimited
159
+ permission to run the unmodified Program. The output from running a
160
+ covered work is covered by this License only if the output, given its
161
+ content, constitutes a covered work. This License acknowledges your
162
+ rights of fair use or other equivalent, as provided by copyright law.
163
+
164
+ You may make, run and propagate covered works that you do not
165
+ convey, without conditions so long as your license otherwise remains
166
+ in force. You may convey covered works to others for the sole purpose
167
+ of having them make modifications exclusively for you, or provide you
168
+ with facilities for running those works, provided that you comply with
169
+ the terms of this License in conveying all material for which you do
170
+ not control copyright. Those thus making or running the covered works
171
+ for you must do so exclusively on your behalf, under your direction
172
+ and control, on terms that prohibit them from making any copies of
173
+ your copyrighted material outside their relationship with you.
174
+
175
+ Conveying under any other circumstances is permitted solely under
176
+ the conditions stated below. Sublicensing is not allowed; section 10
177
+ makes it unnecessary.
178
+
179
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180
+
181
+ No covered work shall be deemed part of an effective technological
182
+ measure under any applicable law fulfilling obligations under article
183
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184
+ similar laws prohibiting or restricting circumvention of such
185
+ measures.
186
+
187
+ When you convey a covered work, you waive any legal power to forbid
188
+ circumvention of technological measures to the extent such circumvention
189
+ is effected by exercising rights under this License with respect to
190
+ the covered work, and you disclaim any intention to limit operation or
191
+ modification of the work as a means of enforcing, against the work's
192
+ users, your or third parties' legal rights to forbid circumvention of
193
+ technological measures.
194
+
195
+ 4. Conveying Verbatim Copies.
196
+
197
+ You may convey verbatim copies of the Program's source code as you
198
+ receive it, in any medium, provided that you conspicuously and
199
+ appropriately publish on each copy an appropriate copyright notice;
200
+ keep intact all notices stating that this License and any
201
+ non-permissive terms added in accord with section 7 apply to the code;
202
+ keep intact all notices of the absence of any warranty; and give all
203
+ recipients a copy of this License along with the Program.
204
+
205
+ You may charge any price or no price for each copy that you convey,
206
+ and you may offer support or warranty protection for a fee.
207
+
208
+ 5. Conveying Modified Source Versions.
209
+
210
+ You may convey a work based on the Program, or the modifications to
211
+ produce it from the Program, in the form of source code under the
212
+ terms of section 4, provided that you also meet all of these conditions:
213
+
214
+ a) The work must carry prominent notices stating that you modified
215
+ it, and giving a relevant date.
216
+
217
+ b) The work must carry prominent notices stating that it is
218
+ released under this License and any conditions added under section
219
+ 7. This requirement modifies the requirement in section 4 to
220
+ "keep intact all notices".
221
+
222
+ c) You must license the entire work, as a whole, under this
223
+ License to anyone who comes into possession of a copy. This
224
+ License will therefore apply, along with any applicable section 7
225
+ additional terms, to the whole of the work, and all its parts,
226
+ regardless of how they are packaged. This License gives no
227
+ permission to license the work in any other way, but it does not
228
+ invalidate such permission if you have separately received it.
229
+
230
+ d) If the work has interactive user interfaces, each must display
231
+ Appropriate Legal Notices; however, if the Program has interactive
232
+ interfaces that do not display Appropriate Legal Notices, your
233
+ work need not make them do so.
234
+
235
+ A compilation of a covered work with other separate and independent
236
+ works, which are not by their nature extensions of the covered work,
237
+ and which are not combined with it such as to form a larger program,
238
+ in or on a volume of a storage or distribution medium, is called an
239
+ "aggregate" if the compilation and its resulting copyright are not
240
+ used to limit the access or legal rights of the compilation's users
241
+ beyond what the individual works permit. Inclusion of a covered work
242
+ in an aggregate does not cause this License to apply to the other
243
+ parts of the aggregate.
244
+
245
+ 6. Conveying Non-Source Forms.
246
+
247
+ You may convey a covered work in object code form under the terms
248
+ of sections 4 and 5, provided that you also convey the
249
+ machine-readable Corresponding Source under the terms of this License,
250
+ in one of these ways:
251
+
252
+ a) Convey the object code in, or embodied in, a physical product
253
+ (including a physical distribution medium), accompanied by the
254
+ Corresponding Source fixed on a durable physical medium
255
+ customarily used for software interchange.
256
+
257
+ b) Convey the object code in, or embodied in, a physical product
258
+ (including a physical distribution medium), accompanied by a
259
+ written offer, valid for at least three years and valid for as
260
+ long as you offer spare parts or customer support for that product
261
+ model, to give anyone who possesses the object code either (1) a
262
+ copy of the Corresponding Source for all the software in the
263
+ product that is covered by this License, on a durable physical
264
+ medium customarily used for software interchange, for a price no
265
+ more than your reasonable cost of physically performing this
266
+ conveying of source, or (2) access to copy the
267
+ Corresponding Source from a network server at no charge.
268
+
269
+ c) Convey individual copies of the object code with a copy of the
270
+ written offer to provide the Corresponding Source. This
271
+ alternative is allowed only occasionally and noncommercially, and
272
+ only if you received the object code with such an offer, in accord
273
+ with subsection 6b.
274
+
275
+ d) Convey the object code by offering access from a designated
276
+ place (gratis or for a charge), and offer equivalent access to the
277
+ Corresponding Source in the same way through the same place at no
278
+ further charge. You need not require recipients to copy the
279
+ Corresponding Source along with the object code. If the place to
280
+ copy the object code is a network server, the Corresponding Source
281
+ may be on a different server (operated by you or a third party)
282
+ that supports equivalent copying facilities, provided you maintain
283
+ clear directions next to the object code saying where to find the
284
+ Corresponding Source. Regardless of what server hosts the
285
+ Corresponding Source, you remain obligated to ensure that it is
286
+ available for as long as needed to satisfy these requirements.
287
+
288
+ e) Convey the object code using peer-to-peer transmission, provided
289
+ you inform other peers where the object code and Corresponding
290
+ Source of the work are being offered to the general public at no
291
+ charge under subsection 6d.
292
+
293
+ A separable portion of the object code, whose source code is excluded
294
+ from the Corresponding Source as a System Library, need not be
295
+ included in conveying the object code work.
296
+
297
+ A "User Product" is either (1) a "consumer product", which means any
298
+ tangible personal property which is normally used for personal, family,
299
+ or household purposes, or (2) anything designed or sold for incorporation
300
+ into a dwelling. In determining whether a product is a consumer product,
301
+ doubtful cases shall be resolved in favor of coverage. For a particular
302
+ product received by a particular user, "normally used" refers to a
303
+ typical or common use of that class of product, regardless of the status
304
+ of the particular user or of the way in which the particular user
305
+ actually uses, or expects or is expected to use, the product. A product
306
+ is a consumer product regardless of whether the product has substantial
307
+ commercial, industrial or non-consumer uses, unless such uses represent
308
+ the only significant mode of use of the product.
309
+
310
+ "Installation Information" for a User Product means any methods,
311
+ procedures, authorization keys, or other information required to install
312
+ and execute modified versions of a covered work in that User Product from
313
+ a modified version of its Corresponding Source. The information must
314
+ suffice to ensure that the continued functioning of the modified object
315
+ code is in no case prevented or interfered with solely because
316
+ modification has been made.
317
+
318
+ If you convey an object code work under this section in, or with, or
319
+ specifically for use in, a User Product, and the conveying occurs as
320
+ part of a transaction in which the right of possession and use of the
321
+ User Product is transferred to the recipient in perpetuity or for a
322
+ fixed term (regardless of how the transaction is characterized), the
323
+ Corresponding Source conveyed under this section must be accompanied
324
+ by the Installation Information. But this requirement does not apply
325
+ if neither you nor any third party retains the ability to install
326
+ modified object code on the User Product (for example, the work has
327
+ been installed in ROM).
328
+
329
+ The requirement to provide Installation Information does not include a
330
+ requirement to continue to provide support service, warranty, or updates
331
+ for a work that has been modified or installed by the recipient, or for
332
+ the User Product in which it has been modified or installed. Access to a
333
+ network may be denied when the modification itself materially and
334
+ adversely affects the operation of the network or violates the rules and
335
+ protocols for communication across the network.
336
+
337
+ Corresponding Source conveyed, and Installation Information provided,
338
+ in accord with this section must be in a format that is publicly
339
+ documented (and with an implementation available to the public in
340
+ source code form), and must require no special password or key for
341
+ unpacking, reading or copying.
342
+
343
+ 7. Additional Terms.
344
+
345
+ "Additional permissions" are terms that supplement the terms of this
346
+ License by making exceptions from one or more of its conditions.
347
+ Additional permissions that are applicable to the entire Program shall
348
+ be treated as though they were included in this License, to the extent
349
+ that they are valid under applicable law. If additional permissions
350
+ apply only to part of the Program, that part may be used separately
351
+ under those permissions, but the entire Program remains governed by
352
+ this License without regard to the additional permissions.
353
+
354
+ When you convey a copy of a covered work, you may at your option
355
+ remove any additional permissions from that copy, or from any part of
356
+ it. (Additional permissions may be written to require their own
357
+ removal in certain cases when you modify the work.) You may place
358
+ additional permissions on material, added by you to a covered work,
359
+ for which you have or can give appropriate copyright permission.
360
+
361
+ Notwithstanding any other provision of this License, for material you
362
+ add to a covered work, you may (if authorized by the copyright holders of
363
+ that material) supplement the terms of this License with terms:
364
+
365
+ a) Disclaiming warranty or limiting liability differently from the
366
+ terms of sections 15 and 16 of this License; or
367
+
368
+ b) Requiring preservation of specified reasonable legal notices or
369
+ author attributions in that material or in the Appropriate Legal
370
+ Notices displayed by works containing it; or
371
+
372
+ c) Prohibiting misrepresentation of the origin of that material, or
373
+ requiring that modified versions of such material be marked in
374
+ reasonable ways as different from the original version; or
375
+
376
+ d) Limiting the use for publicity purposes of names of licensors or
377
+ authors of the material; or
378
+
379
+ e) Declining to grant rights under trademark law for use of some
380
+ trade names, trademarks, or service marks; or
381
+
382
+ f) Requiring indemnification of licensors and authors of that
383
+ material by anyone who conveys the material (or modified versions of
384
+ it) with contractual assumptions of liability to the recipient, for
385
+ any liability that these contractual assumptions directly impose on
386
+ those licensors and authors.
387
+
388
+ All other non-permissive additional terms are considered "further
389
+ restrictions" within the meaning of section 10. If the Program as you
390
+ received it, or any part of it, contains a notice stating that it is
391
+ governed by this License along with a term that is a further
392
+ restriction, you may remove that term. If a license document contains
393
+ a further restriction but permits relicensing or conveying under this
394
+ License, you may add to a covered work material governed by the terms
395
+ of that license document, provided that the further restriction does
396
+ not survive such relicensing or conveying.
397
+
398
+ If you add terms to a covered work in accord with this section, you
399
+ must place, in the relevant source files, a statement of the
400
+ additional terms that apply to those files, or a notice indicating
401
+ where to find the applicable terms.
402
+
403
+ Additional terms, permissive or non-permissive, may be stated in the
404
+ form of a separately written license, or stated as exceptions;
405
+ the above requirements apply either way.
406
+
407
+ 8. Termination.
408
+
409
+ You may not propagate or modify a covered work except as expressly
410
+ provided under this License. Any attempt otherwise to propagate or
411
+ modify it is void, and will automatically terminate your rights under
412
+ this License (including any patent licenses granted under the third
413
+ paragraph of section 11).
414
+
415
+ However, if you cease all violation of this License, then your
416
+ license from a particular copyright holder is reinstated (a)
417
+ provisionally, unless and until the copyright holder explicitly and
418
+ finally terminates your license, and (b) permanently, if the copyright
419
+ holder fails to notify you of the violation by some reasonable means
420
+ prior to 60 days after the cessation.
421
+
422
+ Moreover, your license from a particular copyright holder is
423
+ reinstated permanently if the copyright holder notifies you of the
424
+ violation by some reasonable means, this is the first time you have
425
+ received notice of violation of this License (for any work) from that
426
+ copyright holder, and you cure the violation prior to 30 days after
427
+ your receipt of the notice.
428
+
429
+ Termination of your rights under this section does not terminate the
430
+ licenses of parties who have received copies or rights from you under
431
+ this License. If your rights have been terminated and not permanently
432
+ reinstated, you do not qualify to receive new licenses for the same
433
+ material under section 10.
434
+
435
+ 9. Acceptance Not Required for Having Copies.
436
+
437
+ You are not required to accept this License in order to receive or
438
+ run a copy of the Program. Ancillary propagation of a covered work
439
+ occurring solely as a consequence of using peer-to-peer transmission
440
+ to receive a copy likewise does not require acceptance. However,
441
+ nothing other than this License grants you permission to propagate or
442
+ modify any covered work. These actions infringe copyright if you do
443
+ not accept this License. Therefore, by modifying or propagating a
444
+ covered work, you indicate your acceptance of this License to do so.
445
+
446
+ 10. Automatic Licensing of Downstream Recipients.
447
+
448
+ Each time you convey a covered work, the recipient automatically
449
+ receives a license from the original licensors, to run, modify and
450
+ propagate that work, subject to this License. You are not responsible
451
+ for enforcing compliance by third parties with this License.
452
+
453
+ An "entity transaction" is a transaction transferring control of an
454
+ organization, or substantially all assets of one, or subdividing an
455
+ organization, or merging organizations. If propagation of a covered
456
+ work results from an entity transaction, each party to that
457
+ transaction who receives a copy of the work also receives whatever
458
+ licenses to the work the party's predecessor in interest had or could
459
+ give under the previous paragraph, plus a right to possession of the
460
+ Corresponding Source of the work from the predecessor in interest, if
461
+ the predecessor has it or can get it with reasonable efforts.
462
+
463
+ You may not impose any further restrictions on the exercise of the
464
+ rights granted or affirmed under this License. For example, you may
465
+ not impose a license fee, royalty, or other charge for exercise of
466
+ rights granted under this License, and you may not initiate litigation
467
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
468
+ any patent claim is infringed by making, using, selling, offering for
469
+ sale, or importing the Program or any portion of it.
470
+
471
+ 11. Patents.
472
+
473
+ A "contributor" is a copyright holder who authorizes use under this
474
+ License of the Program or a work on which the Program is based. The
475
+ work thus licensed is called the contributor's "contributor version".
476
+
477
+ A contributor's "essential patent claims" are all patent claims
478
+ owned or controlled by the contributor, whether already acquired or
479
+ hereafter acquired, that would be infringed by some manner, permitted
480
+ by this License, of making, using, or selling its contributor version,
481
+ but do not include claims that would be infringed only as a
482
+ consequence of further modification of the contributor version. For
483
+ purposes of this definition, "control" includes the right to grant
484
+ patent sublicenses in a manner consistent with the requirements of
485
+ this License.
486
+
487
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
488
+ patent license under the contributor's essential patent claims, to
489
+ make, use, sell, offer for sale, import and otherwise run, modify and
490
+ propagate the contents of its contributor version.
491
+
492
+ In the following three paragraphs, a "patent license" is any express
493
+ agreement or commitment, however denominated, not to enforce a patent
494
+ (such as an express permission to practice a patent or covenant not to
495
+ sue for patent infringement). To "grant" such a patent license to a
496
+ party means to make such an agreement or commitment not to enforce a
497
+ patent against the party.
498
+
499
+ If you convey a covered work, knowingly relying on a patent license,
500
+ and the Corresponding Source of the work is not available for anyone
501
+ to copy, free of charge and under the terms of this License, through a
502
+ publicly available network server or other readily accessible means,
503
+ then you must either (1) cause the Corresponding Source to be so
504
+ available, or (2) arrange to deprive yourself of the benefit of the
505
+ patent license for this particular work, or (3) arrange, in a manner
506
+ consistent with the requirements of this License, to extend the patent
507
+ license to downstream recipients. "Knowingly relying" means you have
508
+ actual knowledge that, but for the patent license, your conveying the
509
+ covered work in a country, or your recipient's use of the covered work
510
+ in a country, would infringe one or more identifiable patents in that
511
+ country that you have reason to believe are valid.
512
+
513
+ If, pursuant to or in connection with a single transaction or
514
+ arrangement, you convey, or propagate by procuring conveyance of, a
515
+ covered work, and grant a patent license to some of the parties
516
+ receiving the covered work authorizing them to use, propagate, modify
517
+ or convey a specific copy of the covered work, then the patent license
518
+ you grant is automatically extended to all recipients of the covered
519
+ work and works based on it.
520
+
521
+ A patent license is "discriminatory" if it does not include within
522
+ the scope of its coverage, prohibits the exercise of, or is
523
+ conditioned on the non-exercise of one or more of the rights that are
524
+ specifically granted under this License. You may not convey a covered
525
+ work if you are a party to an arrangement with a third party that is
526
+ in the business of distributing software, under which you make payment
527
+ to the third party based on the extent of your activity of conveying
528
+ the work, and under which the third party grants, to any of the
529
+ parties who would receive the covered work from you, a discriminatory
530
+ patent license (a) in connection with copies of the covered work
531
+ conveyed by you (or copies made from those copies), or (b) primarily
532
+ for and in connection with specific products or compilations that
533
+ contain the covered work, unless you entered into that arrangement,
534
+ or that patent license was granted, prior to 28 March 2007.
535
+
536
+ Nothing in this License shall be construed as excluding or limiting
537
+ any implied license or other defenses to infringement that may
538
+ otherwise be available to you under applicable patent law.
539
+
540
+ 12. No Surrender of Others' Freedom.
541
+
542
+ If conditions are imposed on you (whether by court order, agreement or
543
+ otherwise) that contradict the conditions of this License, they do not
544
+ excuse you from the conditions of this License. If you cannot convey a
545
+ covered work so as to satisfy simultaneously your obligations under this
546
+ License and any other pertinent obligations, then as a consequence you may
547
+ not convey it at all. For example, if you agree to terms that obligate you
548
+ to collect a royalty for further conveying from those to whom you convey
549
+ the Program, the only way you could satisfy both those terms and this
550
+ License would be to refrain entirely from conveying the Program.
551
+
552
+ 13. Use with the GNU Affero General Public License.
553
+
554
+ Notwithstanding any other provision of this License, you have
555
+ permission to link or combine any covered work with a work licensed
556
+ under version 3 of the GNU Affero General Public License into a single
557
+ combined work, and to convey the resulting work. The terms of this
558
+ License will continue to apply to the part which is the covered work,
559
+ but the special requirements of the GNU Affero General Public License,
560
+ section 13, concerning interaction through a network will apply to the
561
+ combination as such.
562
+
563
+ 14. Revised Versions of this License.
564
+
565
+ The Free Software Foundation may publish revised and/or new versions of
566
+ the GNU General Public License from time to time. Such new versions will
567
+ be similar in spirit to the present version, but may differ in detail to
568
+ address new problems or concerns.
569
+
570
+ Each version is given a distinguishing version number. If the
571
+ Program specifies that a certain numbered version of the GNU General
572
+ Public License "or any later version" applies to it, you have the
573
+ option of following the terms and conditions either of that numbered
574
+ version or of any later version published by the Free Software
575
+ Foundation. If the Program does not specify a version number of the
576
+ GNU General Public License, you may choose any version ever published
577
+ by the Free Software Foundation.
578
+
579
+ If the Program specifies that a proxy can decide which future
580
+ versions of the GNU General Public License can be used, that proxy's
581
+ public statement of acceptance of a version permanently authorizes you
582
+ to choose that version for the Program.
583
+
584
+ Later license versions may give you additional or different
585
+ permissions. However, no additional obligations are imposed on any
586
+ author or copyright holder as a result of your choosing to follow a
587
+ later version.
588
+
589
+ 15. Disclaimer of Warranty.
590
+
591
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
+
600
+ 16. Limitation of Liability.
601
+
602
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
+ SUCH DAMAGES.
611
+
612
+ 17. Interpretation of Sections 15 and 16.
613
+
614
+ If the disclaimer of warranty and limitation of liability provided
615
+ above cannot be given local legal effect according to their terms,
616
+ reviewing courts shall apply local law that most closely approximates
617
+ an absolute waiver of all civil liability in connection with the
618
+ Program, unless a warranty or assumption of liability accompanies a
619
+ copy of the Program in return for a fee.
620
+
621
+ END OF TERMS AND CONDITIONS
622
+
623
+ How to Apply These Terms to Your New Programs
624
+
625
+ If you develop a new program, and you want it to be of the greatest
626
+ possible use to the public, the best way to achieve this is to make it
627
+ free software which everyone can redistribute and change under these terms.
628
+
629
+ To do so, attach the following notices to the program. It is safest
630
+ to attach them to the start of each source file to most effectively
631
+ state the exclusion of warranty; and each file should have at least
632
+ the "copyright" line and a pointer to where the full notice is found.
633
+
634
+ <one line to give the program's name and a brief idea of what it does.>
635
+ Copyright (C) <year> <name of author>
636
+
637
+ This program is free software: you can redistribute it and/or modify
638
+ it under the terms of the GNU General Public License as published by
639
+ the Free Software Foundation, either version 3 of the License, or
640
+ (at your option) any later version.
641
+
642
+ This program is distributed in the hope that it will be useful,
643
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
644
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
+ GNU General Public License for more details.
646
+
647
+ You should have received a copy of the GNU General Public License
648
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
649
+
650
+ Also add information on how to contact you by electronic and paper mail.
651
+
652
+ If the program does terminal interaction, make it output a short
653
+ notice like this when it starts in an interactive mode:
654
+
655
+ <program> Copyright (C) <year> <name of author>
656
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657
+ This is free software, and you are welcome to redistribute it
658
+ under certain conditions; type `show c' for details.
659
+
660
+ The hypothetical commands `show w' and `show c' should show the appropriate
661
+ parts of the General Public License. Of course, your program's commands
662
+ might be different; for a GUI interface, you would use an "about box".
663
+
664
+ You should also get your employer (if you work as a programmer) or school,
665
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
666
+ For more information on this, and how to apply and follow the GNU GPL, see
667
+ <https://www.gnu.org/licenses/>.
668
+
669
+ The GNU General Public License does not permit incorporating your program
670
+ into proprietary programs. If your program is a subroutine library, you
671
+ may consider it more useful to permit linking proprietary applications with
672
+ the library. If this is what you want to do, use the GNU Lesser General
673
+ Public License instead of this License. But first, please read
674
+ <https://www.gnu.org/licenses/why-not-lgpl.html>.
README.md CHANGED
@@ -1,12 +1,310 @@
1
- ---
2
- title: Newcustomyolo
3
- emoji: 📚
4
- colorFrom: yellow
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.24.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Official YOLOv7
2
+
3
+ Implementation of paper - [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696)
4
+
5
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/yolov7-trainable-bag-of-freebies-sets-new/real-time-object-detection-on-coco)](https://paperswithcode.com/sota/real-time-object-detection-on-coco?p=yolov7-trainable-bag-of-freebies-sets-new)
6
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/yolov7)
7
+ <a href="https://colab.research.google.com/gist/AlexeyAB/b769f5795e65fdab80086f6cb7940dae/yolov7detection.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
8
+ [![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2207.02696-B31B1B.svg)](https://arxiv.org/abs/2207.02696)
9
+
10
+ <div align="center">
11
+ <a href="./">
12
+ <img src="./figure/performance.png" width="79%"/>
13
+ </a>
14
+ </div>
15
+
16
+ ## Web Demo
17
+
18
+ - Integrated into [Huggingface Spaces 🤗](https://huggingface.co/spaces/akhaliq/yolov7) using Gradio. Try out the Web Demo [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/yolov7)
19
+
20
+ ## Performance
21
+
22
+ MS COCO
23
+
24
+ | Model | Test Size | AP<sup>test</sup> | AP<sub>50</sub><sup>test</sup> | AP<sub>75</sub><sup>test</sup> | batch 1 fps | batch 32 average time |
25
+ | :-- | :-: | :-: | :-: | :-: | :-: | :-: |
26
+ | [**YOLOv7**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt) | 640 | **51.4%** | **69.7%** | **55.9%** | 161 *fps* | 2.8 *ms* |
27
+ | [**YOLOv7-X**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7x.pt) | 640 | **53.1%** | **71.2%** | **57.8%** | 114 *fps* | 4.3 *ms* |
28
+ | | | | | | | |
29
+ | [**YOLOv7-W6**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-w6.pt) | 1280 | **54.9%** | **72.6%** | **60.1%** | 84 *fps* | 7.6 *ms* |
30
+ | [**YOLOv7-E6**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6.pt) | 1280 | **56.0%** | **73.5%** | **61.2%** | 56 *fps* | 12.3 *ms* |
31
+ | [**YOLOv7-D6**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-d6.pt) | 1280 | **56.6%** | **74.0%** | **61.8%** | 44 *fps* | 15.0 *ms* |
32
+ | [**YOLOv7-E6E**](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6e.pt) | 1280 | **56.8%** | **74.4%** | **62.1%** | 36 *fps* | 18.7 *ms* |
33
+
34
+ ## Installation
35
+
36
+ Docker environment (recommended)
37
+ <details><summary> <b>Expand</b> </summary>
38
+
39
+ ``` shell
40
+ # create the docker container, you can change the share memory size if you have more.
41
+ nvidia-docker run --name yolov7 -it -v your_coco_path/:/coco/ -v your_code_path/:/yolov7 --shm-size=64g nvcr.io/nvidia/pytorch:21.08-py3
42
+
43
+ # apt install required packages
44
+ apt update
45
+ apt install -y zip htop screen libgl1-mesa-glx
46
+
47
+ # pip install required packages
48
+ pip install seaborn thop
49
+
50
+ # go to code folder
51
+ cd /yolov7
52
+ ```
53
+
54
+ </details>
55
+
56
+ ## Testing
57
+
58
+ [`yolov7.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt) [`yolov7x.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7x.pt) [`yolov7-w6.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-w6.pt) [`yolov7-e6.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6.pt) [`yolov7-d6.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-d6.pt) [`yolov7-e6e.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6e.pt)
59
+
60
+ ``` shell
61
+ python test.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.65 --device 0 --weights yolov7.pt --name yolov7_640_val
62
+ ```
63
+
64
+ You will get the results:
65
+
66
+ ```
67
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.51206
68
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.69730
69
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.55521
70
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.35247
71
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.55937
72
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.66693
73
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.38453
74
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.63765
75
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.68772
76
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.53766
77
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.73549
78
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.83868
79
+ ```
80
+
81
+ To measure accuracy, download [COCO-annotations for Pycocotools](http://images.cocodataset.org/annotations/annotations_trainval2017.zip) to the `./coco/annotations/instances_val2017.json`
82
+
83
+ ## Training
84
+
85
+ Data preparation
86
+
87
+ ``` shell
88
+ bash scripts/get_coco.sh
89
+ ```
90
+
91
+ * Download MS COCO dataset images ([train](http://images.cocodataset.org/zips/train2017.zip), [val](http://images.cocodataset.org/zips/val2017.zip), [test](http://images.cocodataset.org/zips/test2017.zip)) and [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip). If you have previously used a different version of YOLO, we strongly recommend that you delete `train2017.cache` and `val2017.cache` files, and redownload [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip)
92
+
93
+ Single GPU training
94
+
95
+ ``` shell
96
+ # train p5 models
97
+ python train.py --workers 8 --device 0 --batch-size 32 --data data/coco.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights '' --name yolov7 --hyp data/hyp.scratch.p5.yaml
98
+
99
+ # train p6 models
100
+ python train_aux.py --workers 8 --device 0 --batch-size 16 --data data/coco.yaml --img 1280 1280 --cfg cfg/training/yolov7-w6.yaml --weights '' --name yolov7-w6 --hyp data/hyp.scratch.p6.yaml
101
+ ```
102
+
103
+ Multiple GPU training
104
+
105
+ ``` shell
106
+ # train p5 models
107
+ python -m torch.distributed.launch --nproc_per_node 4 --master_port 9527 train.py --workers 8 --device 0,1,2,3 --sync-bn --batch-size 128 --data data/coco.yaml --img 640 640 --cfg cfg/training/yolov7.yaml --weights '' --name yolov7 --hyp data/hyp.scratch.p5.yaml
108
+
109
+ # train p6 models
110
+ python -m torch.distributed.launch --nproc_per_node 8 --master_port 9527 train_aux.py --workers 8 --device 0,1,2,3,4,5,6,7 --sync-bn --batch-size 128 --data data/coco.yaml --img 1280 1280 --cfg cfg/training/yolov7-w6.yaml --weights '' --name yolov7-w6 --hyp data/hyp.scratch.p6.yaml
111
+ ```
112
+
113
+ ## Transfer learning
114
+
115
+ [`yolov7_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt) [`yolov7x_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7x_training.pt) [`yolov7-w6_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-w6_training.pt) [`yolov7-e6_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6_training.pt) [`yolov7-d6_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-d6_training.pt) [`yolov7-e6e_training.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6e_training.pt)
116
+
117
+ Single GPU finetuning for custom dataset
118
+
119
+ ``` shell
120
+ # finetune p5 models
121
+ python train.py --workers 8 --device 0 --batch-size 32 --data data/custom.yaml --img 640 640 --cfg cfg/training/yolov7-custom.yaml --weights 'yolov7_training.pt' --name yolov7-custom --hyp data/hyp.scratch.custom.yaml
122
+
123
+ # finetune p6 models
124
+ python train_aux.py --workers 8 --device 0 --batch-size 16 --data data/custom.yaml --img 1280 1280 --cfg cfg/training/yolov7-w6-custom.yaml --weights 'yolov7-w6_training.pt' --name yolov7-w6-custom --hyp data/hyp.scratch.custom.yaml
125
+ ```
126
+
127
+ ## Re-parameterization
128
+
129
+ See [reparameterization.ipynb](tools/reparameterization.ipynb)
130
+
131
+ ## Inference
132
+
133
+ On video:
134
+ ``` shell
135
+ python detect.py --weights yolov7.pt --conf 0.25 --img-size 640 --source yourvideo.mp4
136
+ ```
137
+
138
+ On image:
139
+ ``` shell
140
+ python detect.py --weights yolov7.pt --conf 0.25 --img-size 640 --source inference/images/horses.jpg
141
+ ```
142
+
143
+ <div align="center">
144
+ <a href="./">
145
+ <img src="./figure/horses_prediction.jpg" width="59%"/>
146
+ </a>
147
+ </div>
148
+
149
+
150
+ ## Export
151
+
152
+ **Pytorch to CoreML (and inference on MacOS/iOS)** <a href="https://colab.research.google.com/github/WongKinYiu/yolov7/blob/main/tools/YOLOv7CoreML.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
153
+
154
+ **Pytorch to ONNX with NMS (and inference)** <a href="https://colab.research.google.com/github/WongKinYiu/yolov7/blob/main/tools/YOLOv7onnx.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
155
+ ```shell
156
+ python export.py --weights yolov7-tiny.pt --grid --end2end --simplify \
157
+ --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640
158
+ ```
159
+
160
+ **Pytorch to TensorRT with NMS (and inference)** <a href="https://colab.research.google.com/github/WongKinYiu/yolov7/blob/main/tools/YOLOv7trt.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
161
+
162
+ ```shell
163
+ wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt
164
+ python export.py --weights ./yolov7-tiny.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
165
+ git clone https://github.com/Linaom1214/tensorrt-python.git
166
+ python ./tensorrt-python/export.py -o yolov7-tiny.onnx -e yolov7-tiny-nms.trt -p fp16
167
+ ```
168
+
169
+ **Pytorch to TensorRT another way** <a href="https://colab.research.google.com/gist/AlexeyAB/fcb47ae544cf284eb24d8ad8e880d45c/yolov7trtlinaom.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <details><summary> <b>Expand</b> </summary>
170
+
171
+
172
+ ```shell
173
+ wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt
174
+ python export.py --weights yolov7-tiny.pt --grid --include-nms
175
+ git clone https://github.com/Linaom1214/tensorrt-python.git
176
+ python ./tensorrt-python/export.py -o yolov7-tiny.onnx -e yolov7-tiny-nms.trt -p fp16
177
+
178
+ # Or use trtexec to convert ONNX to TensorRT engine
179
+ /usr/src/tensorrt/bin/trtexec --onnx=yolov7-tiny.onnx --saveEngine=yolov7-tiny-nms.trt --fp16
180
+ ```
181
+
182
+ </details>
183
+
184
+ Tested with: Python 3.7.13, Pytorch 1.12.0+cu113
185
+
186
+ ## Pose estimation
187
+
188
+ [`code`](https://github.com/WongKinYiu/yolov7/tree/pose) [`yolov7-w6-pose.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-w6-pose.pt)
189
+
190
+ See [keypoint.ipynb](https://github.com/WongKinYiu/yolov7/blob/main/tools/keypoint.ipynb).
191
+
192
+ <div align="center">
193
+ <a href="./">
194
+ <img src="./figure/pose.png" width="39%"/>
195
+ </a>
196
+ </div>
197
+
198
+
199
+ ## Instance segmentation (with NTU)
200
+
201
+ [`code`](https://github.com/WongKinYiu/yolov7/tree/mask) [`yolov7-mask.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-mask.pt)
202
+
203
+ See [instance.ipynb](https://github.com/WongKinYiu/yolov7/blob/main/tools/instance.ipynb).
204
+
205
+ <div align="center">
206
+ <a href="./">
207
+ <img src="./figure/mask.png" width="59%"/>
208
+ </a>
209
+ </div>
210
+
211
+ ## Instance segmentation
212
+
213
+ [`code`](https://github.com/WongKinYiu/yolov7/tree/u7/seg) [`yolov7-seg.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-seg.pt)
214
+
215
+ YOLOv7 for instance segmentation (YOLOR + YOLOv5 + YOLACT)
216
+
217
+ | Model | Test Size | AP<sup>box</sup> | AP<sub>50</sub><sup>box</sup> | AP<sub>75</sub><sup>box</sup> | AP<sup>mask</sup> | AP<sub>50</sub><sup>mask</sup> | AP<sub>75</sub><sup>mask</sup> |
218
+ | :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
219
+ | **YOLOv7-seg** | 640 | **51.4%** | **69.4%** | **55.8%** | **41.5%** | **65.5%** | **43.7%** |
220
+
221
+ ## Anchor free detection head
222
+
223
+ [`code`](https://github.com/WongKinYiu/yolov7/tree/u6) [`yolov7-u6.pt`](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-u6.pt)
224
+
225
+ YOLOv7 with decoupled TAL head (YOLOR + YOLOv5 + YOLOv6)
226
+
227
+ | Model | Test Size | AP<sup>val</sup> | AP<sub>50</sub><sup>val</sup> | AP<sub>75</sub><sup>val</sup> |
228
+ | :-- | :-: | :-: | :-: | :-: |
229
+ | **YOLOv7-u6** | 640 | **52.6%** | **69.7%** | **57.3%** |
230
+
231
+
232
+ ## Citation
233
+
234
+ ```
235
+ @inproceedings{wang2023yolov7,
236
+ title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors},
237
+ author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark},
238
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
239
+ year={2023}
240
+ }
241
+ ```
242
+
243
+ ```
244
+ @article{wang2023designing,
245
+ title={Designing Network Design Strategies Through Gradient Path Analysis},
246
+ author={Wang, Chien-Yao and Liao, Hong-Yuan Mark and Yeh, I-Hau},
247
+ journal={Journal of Information Science and Engineering},
248
+ year={2023}
249
+ }
250
+ ```
251
+
252
+
253
+ ## Teaser
254
+
255
+ YOLOv7-semantic & YOLOv7-panoptic & YOLOv7-caption
256
+
257
+ <div align="center">
258
+ <a href="./">
259
+ <img src="./figure/tennis.jpg" width="24%"/>
260
+ </a>
261
+ <a href="./">
262
+ <img src="./figure/tennis_semantic.jpg" width="24%"/>
263
+ </a>
264
+ <a href="./">
265
+ <img src="./figure/tennis_panoptic.png" width="24%"/>
266
+ </a>
267
+ <a href="./">
268
+ <img src="./figure/tennis_caption.png" width="24%"/>
269
+ </a>
270
+ </div>
271
+
272
+ YOLOv7-semantic & YOLOv7-detection & YOLOv7-depth (with NTUT)
273
+
274
+ <div align="center">
275
+ <a href="./">
276
+ <img src="./figure/yolov7_city.jpg" width="80%"/>
277
+ </a>
278
+ </div>
279
+
280
+ YOLOv7-3d-detection & YOLOv7-lidar & YOLOv7-road (with NTUT)
281
+
282
+ <div align="center">
283
+ <a href="./">
284
+ <img src="./figure/yolov7_3d.jpg" width="30%"/>
285
+ </a>
286
+ <a href="./">
287
+ <img src="./figure/yolov7_lidar.jpg" width="30%"/>
288
+ </a>
289
+ <a href="./">
290
+ <img src="./figure/yolov7_road.jpg" width="30%"/>
291
+ </a>
292
+ </div>
293
+
294
+
295
+ ## Acknowledgements
296
+
297
+ <details><summary> <b>Expand</b> </summary>
298
+
299
+ * [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)
300
+ * [https://github.com/WongKinYiu/yolor](https://github.com/WongKinYiu/yolor)
301
+ * [https://github.com/WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
302
+ * [https://github.com/WongKinYiu/ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)
303
+ * [https://github.com/Megvii-BaseDetection/YOLOX](https://github.com/Megvii-BaseDetection/YOLOX)
304
+ * [https://github.com/ultralytics/yolov3](https://github.com/ultralytics/yolov3)
305
+ * [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5)
306
+ * [https://github.com/DingXiaoH/RepVGG](https://github.com/DingXiaoH/RepVGG)
307
+ * [https://github.com/JUGGHM/OREPA_CVPR2022](https://github.com/JUGGHM/OREPA_CVPR2022)
308
+ * [https://github.com/TexasInstruments/edgeai-yolov5/tree/yolo-pose](https://github.com/TexasInstruments/edgeai-yolov5/tree/yolo-pose)
309
+
310
+ </details>
__pycache__/test.cpython-311.pyc ADDED
Binary file (25.9 kB). View file
 
__pycache__/torch.cpython-310.pyc ADDED
Binary file (170 Bytes). View file
 
__pycache__/torch.cpython-311.pyc ADDED
Binary file (275 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import time
3
+ from pathlib import Path
4
+ import os
5
+ import cv2
6
+ import torch
7
+ import torch.backends.cudnn as cudnn
8
+ from numpy import random
9
+ import sys
10
+ import numpy as np
11
+ from models.experimental import attempt_load
12
+ from utils.datasets import LoadImages
13
+ from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging, increment_path
14
+ from utils.plots import plot_one_box
15
+ from utils.torch_utils import select_device, time_synchronized
16
+ import gradio as gr
17
+ import ffmpeg
18
+
19
+ # IoU and scanner movement functions (unchanged)
20
+ def compute_iou(box1, box2):
21
+ x1, y1, x2, y2 = box1
22
+ x1_, y1_, x2_, y2_ = box2
23
+ xi1 = max(x1, x1_)
24
+ yi1 = max(y1, y1_)
25
+ xi2 = min(x2, x2_)
26
+ yi2 = min(y2, y2_)
27
+ inter_width = max(0, xi2 - xi1)
28
+ inter_height = max(0, yi2 - yi1)
29
+ inter_area = inter_width * inter_height
30
+ box1_area = (x2 - x1) * (y2 - y1)
31
+ box2_area = (x2_ - x1_) * (y2_ - y1_)
32
+ union_area = box1_area + box2_area - inter_area
33
+ return inter_area / union_area if union_area != 0 else 0.0
34
+
35
+ def is_scanner_moving(prev_centroids, curr_box, scanner_id, threshold=5.0):
36
+ x1, y1, x2, y2 = curr_box
37
+ curr_centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
38
+ if scanner_id in prev_centroids:
39
+ prev_x, prev_y = prev_centroids[scanner_id]
40
+ distance = np.sqrt((curr_centroid[0] - prev_x)**2 + (curr_centroid[1] - prev_y)**2)
41
+ return distance > threshold
42
+ return False
43
+
44
+ def detect_video(video_path, weights, conf_thres=0.25, iou_thres=0.45, img_size=640, device='', save_dir='runs/detect/exp'):
45
+ save_dir = Path(increment_path(Path(save_dir), exist_ok=True))
46
+ save_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ set_logging()
49
+ device = select_device(device)
50
+ half = device.type != 'cpu'
51
+ model = attempt_load(weights, map_location=device)
52
+ stride = int(model.stride.max())
53
+ imgsz = check_img_size(img_size, s=stride)
54
+ if half:
55
+ model.half()
56
+
57
+ dataset = LoadImages(video_path, img_size=imgsz, stride=stride)
58
+ names = model.module.names if hasattr(model, 'module') else model.names
59
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
60
+
61
+ vid_path, vid_writer = None, None
62
+ prev_centroids = {}
63
+ scanner_id_counter = 0
64
+
65
+ for path, img, im0s, vid_cap in dataset:
66
+ img = torch.from_numpy(img).to(device)
67
+ img = img.half() if half else img.float()
68
+ img /= 255.0
69
+ if img.ndimension() == 3:
70
+ img = img.unsqueeze(0)
71
+
72
+ with torch.no_grad():
73
+ pred = model(img)[0]
74
+ pred = non_max_suppression(pred, conf_thres, iou_thres)
75
+
76
+ for i, det in enumerate(pred):
77
+ p = Path(path)
78
+ save_path = str(save_dir / p.name.replace('.mp4', '_output.mp4'))
79
+ im0 = im0s
80
+
81
+ if len(det):
82
+ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
83
+ item_boxes, scanner_data, phone_boxes = [], [], []
84
+ curr_scanner_boxes = []
85
+
86
+ for *xyxy, conf, cls in det:
87
+ x1, y1, x2, y2 = map(int, xyxy)
88
+ class_name = names[int(cls)]
89
+ color = colors[int(cls)]
90
+ if class_name.lower() == "item":
91
+ item_boxes.append([x1, y1, x2, y2])
92
+ elif class_name.lower() == "phone":
93
+ phone_boxes.append([x1, y1, x2, y2])
94
+ elif class_name.lower() == "scanner":
95
+ curr_scanner_boxes.append([x1, y1, x2, y2])
96
+ plot_one_box(xyxy, im0, label=class_name, color=color, line_thickness=2)
97
+
98
+ new_prev_centroids = {}
99
+ if prev_centroids and curr_scanner_boxes:
100
+ for curr_box in curr_scanner_boxes:
101
+ curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
102
+ best_match_id = min(prev_centroids.keys(),
103
+ key=lambda k: np.sqrt((curr_centroid[0] - prev_centroids[k][0])**2 +
104
+ (curr_centroid[1] - prev_centroids[k][1])**2),
105
+ default=None)
106
+ if best_match_id is not None and np.sqrt((curr_centroid[0] - prev_centroids[best_match_id][0])**2 +
107
+ (curr_centroid[1] - prev_centroids[best_match_id][1])**2) < 50:
108
+ scanner_id = best_match_id
109
+ else:
110
+ scanner_id = scanner_id_counter
111
+ scanner_id_counter += 1
112
+ is_moving = is_scanner_moving(prev_centroids, curr_box, scanner_id)
113
+ movement_status = "Scanning" if is_moving else "Idle"
114
+ scanner_data.append([curr_box, movement_status, scanner_id])
115
+ new_prev_centroids[scanner_id] = curr_centroid
116
+ elif curr_scanner_boxes:
117
+ for curr_box in curr_scanner_boxes:
118
+ scanner_id = scanner_id_counter
119
+ scanner_id_counter += 1
120
+ movement_status = "Idle"
121
+ curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
122
+ scanner_data.append([curr_box, movement_status, scanner_id])
123
+ new_prev_centroids[scanner_id] = curr_centroid
124
+
125
+ prev_centroids = new_prev_centroids
126
+
127
+ for scanner_box, movement_status, scanner_id in scanner_data:
128
+ x1, y1, x2, y2 = scanner_box
129
+ label = f"scanner {movement_status} (ID: {scanner_id})"
130
+ plot_one_box([x1, y1, x2, y2], im0, label=label, color=colors[names.index("scanner")], line_thickness=2)
131
+
132
+ product_scanning_status = ""
133
+ payment_scanning_status = ""
134
+ for scanner_box, movement_status, _ in scanner_data:
135
+ for item_box in item_boxes:
136
+ if movement_status == "Scanning" and compute_iou(scanner_box, item_box) > 0.1:
137
+ product_scanning_status = "Product scanning is finished"
138
+ for phone_box in phone_boxes:
139
+ if movement_status == "Scanning" and compute_iou(scanner_box, phone_box) > 0.1:
140
+ payment_scanning_status = "Payment scanning is finished"
141
+
142
+ if product_scanning_status:
143
+ cv2.putText(im0, product_scanning_status, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)
144
+ if payment_scanning_status:
145
+ cv2.putText(im0, payment_scanning_status, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)
146
+
147
+ if vid_path != save_path:
148
+ vid_path = save_path
149
+ if isinstance(vid_writer, cv2.VideoWriter):
150
+ vid_writer.release()
151
+ fps = vid_cap.get(cv2.CAP_PROP_FPS) if vid_cap else 30
152
+ w, h = im0.shape[1], im0.shape[0]
153
+ vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
154
+ vid_writer.write(im0)
155
+
156
+ if isinstance(vid_writer, cv2.VideoWriter):
157
+ vid_writer.release()
158
+
159
+ # Convert to H.264 for browser compatibility
160
+ output_h264 = str(Path(save_path).with_name(f"{Path(save_path).stem}_h264.mp4"))
161
+ try:
162
+ stream = ffmpeg.input(save_path)
163
+ stream = ffmpeg.output(stream, output_h264, vcodec='libx264', acodec='aac', format='mp4', pix_fmt='yuv420p')
164
+ ffmpeg.run(stream, overwrite_output=True)
165
+ os.remove(save_path) # Remove original
166
+ return output_h264
167
+ except ffmpeg.Error as e:
168
+ print(f"FFmpeg error: {e.stderr.decode()}")
169
+ return save_path
170
+
171
+ def gradio_interface(video, conf_thres, iou_thres):
172
+ weights = "/home/myominhtet/Desktop/deepsortfromscratch/yolov7/best.pt"
173
+ img_size = 640
174
+ output_video = detect_video(video, weights, conf_thres, iou_thres, img_size)
175
+ return output_video if output_video else "Error processing video."
176
+
177
+ interface = gr.Interface(
178
+ fn=gradio_interface,
179
+ inputs=[
180
+ gr.Video(label="Upload Video"),
181
+ gr.Slider(0, 1, value=0.25, step=0.05, label="Confidence Threshold"),
182
+ gr.Slider(0, 1, value=0.45, step=0.05, label="IoU Threshold"),
183
+ ],
184
+ outputs=gr.Video(label="Processed Video"),
185
+ title="YOLO Video Detection",
186
+ description="Upload a video to run YOLO detection with custom parameters."
187
+ )
188
+
189
+ if __name__ == "__main__":
190
+ interface.launch(share=True)
cfg/baseline/r50-csp.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-ResNet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Stem, [128]], # 0-P1/2
16
+ [-1, 3, ResCSPC, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
18
+ [-1, 4, ResCSPC, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 4-P3/8
20
+ [-1, 6, ResCSPC, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8
22
+ [-1, 3, ResCSPC, [1024]], # 7
23
+ ]
24
+
25
+ # CSP-Res-PAN head
26
+ head:
27
+ [[-1, 1, SPPCSPC, [512]], # 8
28
+ [-1, 1, Conv, [256, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [5, 1, Conv, [256, 1, 1]], # route backbone P4
31
+ [[-1, -2], 1, Concat, [1]],
32
+ [-1, 2, ResCSPB, [256]], # 13
33
+ [-1, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [3, 1, Conv, [128, 1, 1]], # route backbone P3
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 2, ResCSPB, [128]], # 18
38
+ [-1, 1, Conv, [256, 3, 1]],
39
+ [-2, 1, Conv, [256, 3, 2]],
40
+ [[-1, 13], 1, Concat, [1]], # cat
41
+ [-1, 2, ResCSPB, [256]], # 22
42
+ [-1, 1, Conv, [512, 3, 1]],
43
+ [-2, 1, Conv, [512, 3, 2]],
44
+ [[-1, 8], 1, Concat, [1]], # cat
45
+ [-1, 2, ResCSPB, [512]], # 26
46
+ [-1, 1, Conv, [1024, 3, 1]],
47
+
48
+ [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
cfg/baseline/x50-csp.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-ResNeXt backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Stem, [128]], # 0-P1/2
16
+ [-1, 3, ResXCSPC, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
18
+ [-1, 4, ResXCSPC, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 4-P3/8
20
+ [-1, 6, ResXCSPC, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8
22
+ [-1, 3, ResXCSPC, [1024]], # 7
23
+ ]
24
+
25
+ # CSP-ResX-PAN head
26
+ head:
27
+ [[-1, 1, SPPCSPC, [512]], # 8
28
+ [-1, 1, Conv, [256, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [5, 1, Conv, [256, 1, 1]], # route backbone P4
31
+ [[-1, -2], 1, Concat, [1]],
32
+ [-1, 2, ResXCSPB, [256]], # 13
33
+ [-1, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [3, 1, Conv, [128, 1, 1]], # route backbone P3
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 2, ResXCSPB, [128]], # 18
38
+ [-1, 1, Conv, [256, 3, 1]],
39
+ [-2, 1, Conv, [256, 3, 2]],
40
+ [[-1, 13], 1, Concat, [1]], # cat
41
+ [-1, 2, ResXCSPB, [256]], # 22
42
+ [-1, 1, Conv, [512, 3, 1]],
43
+ [-2, 1, Conv, [512, 3, 2]],
44
+ [[-1, 8], 1, Concat, [1]], # cat
45
+ [-1, 2, ResXCSPB, [512]], # 26
46
+ [-1, 1, Conv, [1024, 3, 1]],
47
+
48
+ [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
cfg/baseline/yolor-csp-x.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.33 # model depth multiple
4
+ width_multiple: 1.25 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/baseline/yolor-csp.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/baseline/yolor-d6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.25 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, DownC, [128]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, DownC, [256]], # 4-P3/8
21
+ [-1, 15, BottleneckCSPA, [256]],
22
+ [-1, 1, DownC, [512]], # 6-P4/16
23
+ [-1, 15, BottleneckCSPA, [512]],
24
+ [-1, 1, DownC, [768]], # 8-P5/32
25
+ [-1, 7, BottleneckCSPA, [768]],
26
+ [-1, 1, DownC, [1024]], # 10-P6/64
27
+ [-1, 7, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, DownC, [256]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, DownC, [384]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, DownC, [512]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-e6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.25 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, DownC, [128]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, DownC, [256]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, DownC, [512]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [512]],
24
+ [-1, 1, DownC, [768]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [768]],
26
+ [-1, 1, DownC, [1024]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, DownC, [256]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, DownC, [384]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, DownC, [512]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-p6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.0 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, Conv, [384, 3, 2]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [384]],
24
+ [-1, 1, Conv, [512, 3, 2]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [512]],
26
+ [-1, 1, Conv, [640, 3, 2]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [640]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [320]], # 12
33
+ [-1, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [256, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [256]], # 17
38
+ [-1, 1, Conv, [192, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [192, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [192]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, Conv, [192, 3, 2]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [192]], # 31
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-2, 1, Conv, [256, 3, 2]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [256]], # 35
56
+ [-1, 1, Conv, [512, 3, 1]],
57
+ [-2, 1, Conv, [320, 3, 2]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [320]], # 39
60
+ [-1, 1, Conv, [640, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolor-w6.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # expand model depth
4
+ width_multiple: 1.0 # expand layer channels
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # CSP-Darknet backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
19
+ [-1, 3, BottleneckCSPA, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 4-P3/8
21
+ [-1, 7, BottleneckCSPA, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 6-P4/16
23
+ [-1, 7, BottleneckCSPA, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 8-P5/32
25
+ [-1, 3, BottleneckCSPA, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 10-P6/64
27
+ [-1, 3, BottleneckCSPA, [1024]], # 11
28
+ ]
29
+
30
+ # CSP-Dark-PAN head
31
+ head:
32
+ [[-1, 1, SPPCSPC, [512]], # 12
33
+ [-1, 1, Conv, [384, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36
+ [[-1, -2], 1, Concat, [1]],
37
+ [-1, 3, BottleneckCSPB, [384]], # 17
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41
+ [[-1, -2], 1, Concat, [1]],
42
+ [-1, 3, BottleneckCSPB, [256]], # 22
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46
+ [[-1, -2], 1, Concat, [1]],
47
+ [-1, 3, BottleneckCSPB, [128]], # 27
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-2, 1, Conv, [256, 3, 2]],
50
+ [[-1, 22], 1, Concat, [1]], # cat
51
+ [-1, 3, BottleneckCSPB, [256]], # 31
52
+ [-1, 1, Conv, [512, 3, 1]],
53
+ [-2, 1, Conv, [384, 3, 2]],
54
+ [[-1, 17], 1, Concat, [1]], # cat
55
+ [-1, 3, BottleneckCSPB, [384]], # 35
56
+ [-1, 1, Conv, [768, 3, 1]],
57
+ [-2, 1, Conv, [512, 3, 2]],
58
+ [[-1, 12], 1, Concat, [1]], # cat
59
+ [-1, 3, BottleneckCSPB, [512]], # 39
60
+ [-1, 1, Conv, [1024, 3, 1]],
61
+
62
+ [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
63
+ ]
cfg/baseline/yolov3-spp.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3-SPP head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, SPP, [512, [5, 9, 13]]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
cfg/baseline/yolov3.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, Bottleneck, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, Bottleneck, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, Bottleneck, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, Bottleneck, [1024]], # 10
26
+ ]
27
+
28
+ # YOLOv3 head
29
+ head:
30
+ [[-1, 1, Bottleneck, [1024, False]],
31
+ [-1, 1, Conv, [512, [1, 1]]],
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [512, 1, 1]],
34
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Bottleneck, [512, False]],
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43
+
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
47
+ [-1, 1, Bottleneck, [256, False]],
48
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49
+
50
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51
+ ]
cfg/baseline/yolov4-csp.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # CSP-Darknet backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17
+ [-1, 1, Bottleneck, [64]],
18
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19
+ [-1, 2, BottleneckCSPC, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21
+ [-1, 8, BottleneckCSPC, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23
+ [-1, 8, BottleneckCSPC, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25
+ [-1, 4, BottleneckCSPC, [1024]], # 10
26
+ ]
27
+
28
+ # CSP-Dark-PAN head
29
+ head:
30
+ [[-1, 1, SPPCSPC, [512]], # 11
31
+ [-1, 1, Conv, [256, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [8, 1, Conv, [256, 1, 1]], # route backbone P4
34
+ [[-1, -2], 1, Concat, [1]],
35
+ [-1, 2, BottleneckCSPB, [256]], # 16
36
+ [-1, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [6, 1, Conv, [128, 1, 1]], # route backbone P3
39
+ [[-1, -2], 1, Concat, [1]],
40
+ [-1, 2, BottleneckCSPB, [128]], # 21
41
+ [-1, 1, Conv, [256, 3, 1]],
42
+ [-2, 1, Conv, [256, 3, 2]],
43
+ [[-1, 16], 1, Concat, [1]], # cat
44
+ [-1, 2, BottleneckCSPB, [256]], # 25
45
+ [-1, 1, Conv, [512, 3, 1]],
46
+ [-2, 1, Conv, [512, 3, 2]],
47
+ [[-1, 11], 1, Concat, [1]], # cat
48
+ [-1, 2, BottleneckCSPB, [512]], # 29
49
+ [-1, 1, Conv, [1024, 3, 1]],
50
+
51
+ [[22,26,30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
cfg/deploy/yolov7-d6.yaml ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-d6 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [96, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [192]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
31
+ [-1, 1, Conv, [192, 1, 1]], # 14
32
+
33
+ [-1, 1, DownC, [384]], # 15-P3/8
34
+ [-1, 1, Conv, [128, 1, 1]],
35
+ [-2, 1, Conv, [128, 1, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
45
+ [-1, 1, Conv, [384, 1, 1]], # 27
46
+
47
+ [-1, 1, DownC, [768]], # 28-P4/16
48
+ [-1, 1, Conv, [256, 1, 1]],
49
+ [-2, 1, Conv, [256, 1, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
59
+ [-1, 1, Conv, [768, 1, 1]], # 40
60
+
61
+ [-1, 1, DownC, [1152]], # 41-P5/32
62
+ [-1, 1, Conv, [384, 1, 1]],
63
+ [-2, 1, Conv, [384, 1, 1]],
64
+ [-1, 1, Conv, [384, 3, 1]],
65
+ [-1, 1, Conv, [384, 3, 1]],
66
+ [-1, 1, Conv, [384, 3, 1]],
67
+ [-1, 1, Conv, [384, 3, 1]],
68
+ [-1, 1, Conv, [384, 3, 1]],
69
+ [-1, 1, Conv, [384, 3, 1]],
70
+ [-1, 1, Conv, [384, 3, 1]],
71
+ [-1, 1, Conv, [384, 3, 1]],
72
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
73
+ [-1, 1, Conv, [1152, 1, 1]], # 53
74
+
75
+ [-1, 1, DownC, [1536]], # 54-P6/64
76
+ [-1, 1, Conv, [512, 1, 1]],
77
+ [-2, 1, Conv, [512, 1, 1]],
78
+ [-1, 1, Conv, [512, 3, 1]],
79
+ [-1, 1, Conv, [512, 3, 1]],
80
+ [-1, 1, Conv, [512, 3, 1]],
81
+ [-1, 1, Conv, [512, 3, 1]],
82
+ [-1, 1, Conv, [512, 3, 1]],
83
+ [-1, 1, Conv, [512, 3, 1]],
84
+ [-1, 1, Conv, [512, 3, 1]],
85
+ [-1, 1, Conv, [512, 3, 1]],
86
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
87
+ [-1, 1, Conv, [1536, 1, 1]], # 66
88
+ ]
89
+
90
+ # yolov7-d6 head
91
+ head:
92
+ [[-1, 1, SPPCSPC, [768]], # 67
93
+
94
+ [-1, 1, Conv, [576, 1, 1]],
95
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
96
+ [53, 1, Conv, [576, 1, 1]], # route backbone P5
97
+ [[-1, -2], 1, Concat, [1]],
98
+
99
+ [-1, 1, Conv, [384, 1, 1]],
100
+ [-2, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [192, 3, 1]],
102
+ [-1, 1, Conv, [192, 3, 1]],
103
+ [-1, 1, Conv, [192, 3, 1]],
104
+ [-1, 1, Conv, [192, 3, 1]],
105
+ [-1, 1, Conv, [192, 3, 1]],
106
+ [-1, 1, Conv, [192, 3, 1]],
107
+ [-1, 1, Conv, [192, 3, 1]],
108
+ [-1, 1, Conv, [192, 3, 1]],
109
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110
+ [-1, 1, Conv, [576, 1, 1]], # 83
111
+
112
+ [-1, 1, Conv, [384, 1, 1]],
113
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114
+ [40, 1, Conv, [384, 1, 1]], # route backbone P4
115
+ [[-1, -2], 1, Concat, [1]],
116
+
117
+ [-1, 1, Conv, [256, 1, 1]],
118
+ [-2, 1, Conv, [256, 1, 1]],
119
+ [-1, 1, Conv, [128, 3, 1]],
120
+ [-1, 1, Conv, [128, 3, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [-1, 1, Conv, [128, 3, 1]],
126
+ [-1, 1, Conv, [128, 3, 1]],
127
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128
+ [-1, 1, Conv, [384, 1, 1]], # 99
129
+
130
+ [-1, 1, Conv, [192, 1, 1]],
131
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132
+ [27, 1, Conv, [192, 1, 1]], # route backbone P3
133
+ [[-1, -2], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [128, 1, 1]],
136
+ [-2, 1, Conv, [128, 1, 1]],
137
+ [-1, 1, Conv, [64, 3, 1]],
138
+ [-1, 1, Conv, [64, 3, 1]],
139
+ [-1, 1, Conv, [64, 3, 1]],
140
+ [-1, 1, Conv, [64, 3, 1]],
141
+ [-1, 1, Conv, [64, 3, 1]],
142
+ [-1, 1, Conv, [64, 3, 1]],
143
+ [-1, 1, Conv, [64, 3, 1]],
144
+ [-1, 1, Conv, [64, 3, 1]],
145
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146
+ [-1, 1, Conv, [192, 1, 1]], # 115
147
+
148
+ [-1, 1, DownC, [384]],
149
+ [[-1, 99], 1, Concat, [1]],
150
+
151
+ [-1, 1, Conv, [256, 1, 1]],
152
+ [-2, 1, Conv, [256, 1, 1]],
153
+ [-1, 1, Conv, [128, 3, 1]],
154
+ [-1, 1, Conv, [128, 3, 1]],
155
+ [-1, 1, Conv, [128, 3, 1]],
156
+ [-1, 1, Conv, [128, 3, 1]],
157
+ [-1, 1, Conv, [128, 3, 1]],
158
+ [-1, 1, Conv, [128, 3, 1]],
159
+ [-1, 1, Conv, [128, 3, 1]],
160
+ [-1, 1, Conv, [128, 3, 1]],
161
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162
+ [-1, 1, Conv, [384, 1, 1]], # 129
163
+
164
+ [-1, 1, DownC, [576]],
165
+ [[-1, 83], 1, Concat, [1]],
166
+
167
+ [-1, 1, Conv, [384, 1, 1]],
168
+ [-2, 1, Conv, [384, 1, 1]],
169
+ [-1, 1, Conv, [192, 3, 1]],
170
+ [-1, 1, Conv, [192, 3, 1]],
171
+ [-1, 1, Conv, [192, 3, 1]],
172
+ [-1, 1, Conv, [192, 3, 1]],
173
+ [-1, 1, Conv, [192, 3, 1]],
174
+ [-1, 1, Conv, [192, 3, 1]],
175
+ [-1, 1, Conv, [192, 3, 1]],
176
+ [-1, 1, Conv, [192, 3, 1]],
177
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178
+ [-1, 1, Conv, [576, 1, 1]], # 143
179
+
180
+ [-1, 1, DownC, [768]],
181
+ [[-1, 67], 1, Concat, [1]],
182
+
183
+ [-1, 1, Conv, [512, 1, 1]],
184
+ [-2, 1, Conv, [512, 1, 1]],
185
+ [-1, 1, Conv, [256, 3, 1]],
186
+ [-1, 1, Conv, [256, 3, 1]],
187
+ [-1, 1, Conv, [256, 3, 1]],
188
+ [-1, 1, Conv, [256, 3, 1]],
189
+ [-1, 1, Conv, [256, 3, 1]],
190
+ [-1, 1, Conv, [256, 3, 1]],
191
+ [-1, 1, Conv, [256, 3, 1]],
192
+ [-1, 1, Conv, [256, 3, 1]],
193
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194
+ [-1, 1, Conv, [768, 1, 1]], # 157
195
+
196
+ [115, 1, Conv, [384, 3, 1]],
197
+ [129, 1, Conv, [768, 3, 1]],
198
+ [143, 1, Conv, [1152, 3, 1]],
199
+ [157, 1, Conv, [1536, 3, 1]],
200
+
201
+ [[158,159,160,161], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
202
+ ]
cfg/deploy/yolov7-e6.yaml ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-e6 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+
31
+ [-1, 1, DownC, [320]], # 13-P3/8
32
+ [-1, 1, Conv, [128, 1, 1]],
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
41
+ [-1, 1, Conv, [320, 1, 1]], # 23
42
+
43
+ [-1, 1, DownC, [640]], # 24-P4/16
44
+ [-1, 1, Conv, [256, 1, 1]],
45
+ [-2, 1, Conv, [256, 1, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [-1, 1, Conv, [256, 3, 1]],
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-1, 1, Conv, [256, 3, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
53
+ [-1, 1, Conv, [640, 1, 1]], # 34
54
+
55
+ [-1, 1, DownC, [960]], # 35-P5/32
56
+ [-1, 1, Conv, [384, 1, 1]],
57
+ [-2, 1, Conv, [384, 1, 1]],
58
+ [-1, 1, Conv, [384, 3, 1]],
59
+ [-1, 1, Conv, [384, 3, 1]],
60
+ [-1, 1, Conv, [384, 3, 1]],
61
+ [-1, 1, Conv, [384, 3, 1]],
62
+ [-1, 1, Conv, [384, 3, 1]],
63
+ [-1, 1, Conv, [384, 3, 1]],
64
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
65
+ [-1, 1, Conv, [960, 1, 1]], # 45
66
+
67
+ [-1, 1, DownC, [1280]], # 46-P6/64
68
+ [-1, 1, Conv, [512, 1, 1]],
69
+ [-2, 1, Conv, [512, 1, 1]],
70
+ [-1, 1, Conv, [512, 3, 1]],
71
+ [-1, 1, Conv, [512, 3, 1]],
72
+ [-1, 1, Conv, [512, 3, 1]],
73
+ [-1, 1, Conv, [512, 3, 1]],
74
+ [-1, 1, Conv, [512, 3, 1]],
75
+ [-1, 1, Conv, [512, 3, 1]],
76
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
77
+ [-1, 1, Conv, [1280, 1, 1]], # 56
78
+ ]
79
+
80
+ # yolov7-e6 head
81
+ head:
82
+ [[-1, 1, SPPCSPC, [640]], # 57
83
+
84
+ [-1, 1, Conv, [480, 1, 1]],
85
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
86
+ [45, 1, Conv, [480, 1, 1]], # route backbone P5
87
+ [[-1, -2], 1, Concat, [1]],
88
+
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [192, 3, 1]],
92
+ [-1, 1, Conv, [192, 3, 1]],
93
+ [-1, 1, Conv, [192, 3, 1]],
94
+ [-1, 1, Conv, [192, 3, 1]],
95
+ [-1, 1, Conv, [192, 3, 1]],
96
+ [-1, 1, Conv, [192, 3, 1]],
97
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [480, 1, 1]], # 71
99
+
100
+ [-1, 1, Conv, [320, 1, 1]],
101
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102
+ [34, 1, Conv, [320, 1, 1]], # route backbone P4
103
+ [[-1, -2], 1, Concat, [1]],
104
+
105
+ [-1, 1, Conv, [256, 1, 1]],
106
+ [-2, 1, Conv, [256, 1, 1]],
107
+ [-1, 1, Conv, [128, 3, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114
+ [-1, 1, Conv, [320, 1, 1]], # 85
115
+
116
+ [-1, 1, Conv, [160, 1, 1]],
117
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118
+ [23, 1, Conv, [160, 1, 1]], # route backbone P3
119
+ [[-1, -2], 1, Concat, [1]],
120
+
121
+ [-1, 1, Conv, [128, 1, 1]],
122
+ [-2, 1, Conv, [128, 1, 1]],
123
+ [-1, 1, Conv, [64, 3, 1]],
124
+ [-1, 1, Conv, [64, 3, 1]],
125
+ [-1, 1, Conv, [64, 3, 1]],
126
+ [-1, 1, Conv, [64, 3, 1]],
127
+ [-1, 1, Conv, [64, 3, 1]],
128
+ [-1, 1, Conv, [64, 3, 1]],
129
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130
+ [-1, 1, Conv, [160, 1, 1]], # 99
131
+
132
+ [-1, 1, DownC, [320]],
133
+ [[-1, 85], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [256, 1, 1]],
136
+ [-2, 1, Conv, [256, 1, 1]],
137
+ [-1, 1, Conv, [128, 3, 1]],
138
+ [-1, 1, Conv, [128, 3, 1]],
139
+ [-1, 1, Conv, [128, 3, 1]],
140
+ [-1, 1, Conv, [128, 3, 1]],
141
+ [-1, 1, Conv, [128, 3, 1]],
142
+ [-1, 1, Conv, [128, 3, 1]],
143
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144
+ [-1, 1, Conv, [320, 1, 1]], # 111
145
+
146
+ [-1, 1, DownC, [480]],
147
+ [[-1, 71], 1, Concat, [1]],
148
+
149
+ [-1, 1, Conv, [384, 1, 1]],
150
+ [-2, 1, Conv, [384, 1, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [-1, 1, Conv, [192, 3, 1]],
153
+ [-1, 1, Conv, [192, 3, 1]],
154
+ [-1, 1, Conv, [192, 3, 1]],
155
+ [-1, 1, Conv, [192, 3, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158
+ [-1, 1, Conv, [480, 1, 1]], # 123
159
+
160
+ [-1, 1, DownC, [640]],
161
+ [[-1, 57], 1, Concat, [1]],
162
+
163
+ [-1, 1, Conv, [512, 1, 1]],
164
+ [-2, 1, Conv, [512, 1, 1]],
165
+ [-1, 1, Conv, [256, 3, 1]],
166
+ [-1, 1, Conv, [256, 3, 1]],
167
+ [-1, 1, Conv, [256, 3, 1]],
168
+ [-1, 1, Conv, [256, 3, 1]],
169
+ [-1, 1, Conv, [256, 3, 1]],
170
+ [-1, 1, Conv, [256, 3, 1]],
171
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172
+ [-1, 1, Conv, [640, 1, 1]], # 135
173
+
174
+ [99, 1, Conv, [320, 3, 1]],
175
+ [111, 1, Conv, [640, 3, 1]],
176
+ [123, 1, Conv, [960, 3, 1]],
177
+ [135, 1, Conv, [1280, 3, 1]],
178
+
179
+ [[136,137,138,139], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
180
+ ]
cfg/deploy/yolov7-e6e.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-e6e backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+ [-11, 1, Conv, [64, 1, 1]],
31
+ [-12, 1, Conv, [64, 1, 1]],
32
+ [-1, 1, Conv, [64, 3, 1]],
33
+ [-1, 1, Conv, [64, 3, 1]],
34
+ [-1, 1, Conv, [64, 3, 1]],
35
+ [-1, 1, Conv, [64, 3, 1]],
36
+ [-1, 1, Conv, [64, 3, 1]],
37
+ [-1, 1, Conv, [64, 3, 1]],
38
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
39
+ [-1, 1, Conv, [160, 1, 1]], # 22
40
+ [[-1, -11], 1, Shortcut, [1]], # 23
41
+
42
+ [-1, 1, DownC, [320]], # 24-P3/8
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, Conv, [128, 3, 1]],
46
+ [-1, 1, Conv, [128, 3, 1]],
47
+ [-1, 1, Conv, [128, 3, 1]],
48
+ [-1, 1, Conv, [128, 3, 1]],
49
+ [-1, 1, Conv, [128, 3, 1]],
50
+ [-1, 1, Conv, [128, 3, 1]],
51
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
52
+ [-1, 1, Conv, [320, 1, 1]], # 34
53
+ [-11, 1, Conv, [128, 1, 1]],
54
+ [-12, 1, Conv, [128, 1, 1]],
55
+ [-1, 1, Conv, [128, 3, 1]],
56
+ [-1, 1, Conv, [128, 3, 1]],
57
+ [-1, 1, Conv, [128, 3, 1]],
58
+ [-1, 1, Conv, [128, 3, 1]],
59
+ [-1, 1, Conv, [128, 3, 1]],
60
+ [-1, 1, Conv, [128, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [320, 1, 1]], # 44
63
+ [[-1, -11], 1, Shortcut, [1]], # 45
64
+
65
+ [-1, 1, DownC, [640]], # 46-P4/16
66
+ [-1, 1, Conv, [256, 1, 1]],
67
+ [-2, 1, Conv, [256, 1, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [-1, 1, Conv, [256, 3, 1]],
70
+ [-1, 1, Conv, [256, 3, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
75
+ [-1, 1, Conv, [640, 1, 1]], # 56
76
+ [-11, 1, Conv, [256, 1, 1]],
77
+ [-12, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, Conv, [256, 3, 1]],
79
+ [-1, 1, Conv, [256, 3, 1]],
80
+ [-1, 1, Conv, [256, 3, 1]],
81
+ [-1, 1, Conv, [256, 3, 1]],
82
+ [-1, 1, Conv, [256, 3, 1]],
83
+ [-1, 1, Conv, [256, 3, 1]],
84
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
85
+ [-1, 1, Conv, [640, 1, 1]], # 66
86
+ [[-1, -11], 1, Shortcut, [1]], # 67
87
+
88
+ [-1, 1, DownC, [960]], # 68-P5/32
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [384, 3, 1]],
92
+ [-1, 1, Conv, [384, 3, 1]],
93
+ [-1, 1, Conv, [384, 3, 1]],
94
+ [-1, 1, Conv, [384, 3, 1]],
95
+ [-1, 1, Conv, [384, 3, 1]],
96
+ [-1, 1, Conv, [384, 3, 1]],
97
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [960, 1, 1]], # 78
99
+ [-11, 1, Conv, [384, 1, 1]],
100
+ [-12, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [384, 3, 1]],
102
+ [-1, 1, Conv, [384, 3, 1]],
103
+ [-1, 1, Conv, [384, 3, 1]],
104
+ [-1, 1, Conv, [384, 3, 1]],
105
+ [-1, 1, Conv, [384, 3, 1]],
106
+ [-1, 1, Conv, [384, 3, 1]],
107
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108
+ [-1, 1, Conv, [960, 1, 1]], # 88
109
+ [[-1, -11], 1, Shortcut, [1]], # 89
110
+
111
+ [-1, 1, DownC, [1280]], # 90-P6/64
112
+ [-1, 1, Conv, [512, 1, 1]],
113
+ [-2, 1, Conv, [512, 1, 1]],
114
+ [-1, 1, Conv, [512, 3, 1]],
115
+ [-1, 1, Conv, [512, 3, 1]],
116
+ [-1, 1, Conv, [512, 3, 1]],
117
+ [-1, 1, Conv, [512, 3, 1]],
118
+ [-1, 1, Conv, [512, 3, 1]],
119
+ [-1, 1, Conv, [512, 3, 1]],
120
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121
+ [-1, 1, Conv, [1280, 1, 1]], # 100
122
+ [-11, 1, Conv, [512, 1, 1]],
123
+ [-12, 1, Conv, [512, 1, 1]],
124
+ [-1, 1, Conv, [512, 3, 1]],
125
+ [-1, 1, Conv, [512, 3, 1]],
126
+ [-1, 1, Conv, [512, 3, 1]],
127
+ [-1, 1, Conv, [512, 3, 1]],
128
+ [-1, 1, Conv, [512, 3, 1]],
129
+ [-1, 1, Conv, [512, 3, 1]],
130
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131
+ [-1, 1, Conv, [1280, 1, 1]], # 110
132
+ [[-1, -11], 1, Shortcut, [1]], # 111
133
+ ]
134
+
135
+ # yolov7-e6e head
136
+ head:
137
+ [[-1, 1, SPPCSPC, [640]], # 112
138
+
139
+ [-1, 1, Conv, [480, 1, 1]],
140
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141
+ [89, 1, Conv, [480, 1, 1]], # route backbone P5
142
+ [[-1, -2], 1, Concat, [1]],
143
+
144
+ [-1, 1, Conv, [384, 1, 1]],
145
+ [-2, 1, Conv, [384, 1, 1]],
146
+ [-1, 1, Conv, [192, 3, 1]],
147
+ [-1, 1, Conv, [192, 3, 1]],
148
+ [-1, 1, Conv, [192, 3, 1]],
149
+ [-1, 1, Conv, [192, 3, 1]],
150
+ [-1, 1, Conv, [192, 3, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153
+ [-1, 1, Conv, [480, 1, 1]], # 126
154
+ [-11, 1, Conv, [384, 1, 1]],
155
+ [-12, 1, Conv, [384, 1, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [-1, 1, Conv, [192, 3, 1]],
158
+ [-1, 1, Conv, [192, 3, 1]],
159
+ [-1, 1, Conv, [192, 3, 1]],
160
+ [-1, 1, Conv, [192, 3, 1]],
161
+ [-1, 1, Conv, [192, 3, 1]],
162
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163
+ [-1, 1, Conv, [480, 1, 1]], # 136
164
+ [[-1, -11], 1, Shortcut, [1]], # 137
165
+
166
+ [-1, 1, Conv, [320, 1, 1]],
167
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168
+ [67, 1, Conv, [320, 1, 1]], # route backbone P4
169
+ [[-1, -2], 1, Concat, [1]],
170
+
171
+ [-1, 1, Conv, [256, 1, 1]],
172
+ [-2, 1, Conv, [256, 1, 1]],
173
+ [-1, 1, Conv, [128, 3, 1]],
174
+ [-1, 1, Conv, [128, 3, 1]],
175
+ [-1, 1, Conv, [128, 3, 1]],
176
+ [-1, 1, Conv, [128, 3, 1]],
177
+ [-1, 1, Conv, [128, 3, 1]],
178
+ [-1, 1, Conv, [128, 3, 1]],
179
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180
+ [-1, 1, Conv, [320, 1, 1]], # 151
181
+ [-11, 1, Conv, [256, 1, 1]],
182
+ [-12, 1, Conv, [256, 1, 1]],
183
+ [-1, 1, Conv, [128, 3, 1]],
184
+ [-1, 1, Conv, [128, 3, 1]],
185
+ [-1, 1, Conv, [128, 3, 1]],
186
+ [-1, 1, Conv, [128, 3, 1]],
187
+ [-1, 1, Conv, [128, 3, 1]],
188
+ [-1, 1, Conv, [128, 3, 1]],
189
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190
+ [-1, 1, Conv, [320, 1, 1]], # 161
191
+ [[-1, -11], 1, Shortcut, [1]], # 162
192
+
193
+ [-1, 1, Conv, [160, 1, 1]],
194
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195
+ [45, 1, Conv, [160, 1, 1]], # route backbone P3
196
+ [[-1, -2], 1, Concat, [1]],
197
+
198
+ [-1, 1, Conv, [128, 1, 1]],
199
+ [-2, 1, Conv, [128, 1, 1]],
200
+ [-1, 1, Conv, [64, 3, 1]],
201
+ [-1, 1, Conv, [64, 3, 1]],
202
+ [-1, 1, Conv, [64, 3, 1]],
203
+ [-1, 1, Conv, [64, 3, 1]],
204
+ [-1, 1, Conv, [64, 3, 1]],
205
+ [-1, 1, Conv, [64, 3, 1]],
206
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207
+ [-1, 1, Conv, [160, 1, 1]], # 176
208
+ [-11, 1, Conv, [128, 1, 1]],
209
+ [-12, 1, Conv, [128, 1, 1]],
210
+ [-1, 1, Conv, [64, 3, 1]],
211
+ [-1, 1, Conv, [64, 3, 1]],
212
+ [-1, 1, Conv, [64, 3, 1]],
213
+ [-1, 1, Conv, [64, 3, 1]],
214
+ [-1, 1, Conv, [64, 3, 1]],
215
+ [-1, 1, Conv, [64, 3, 1]],
216
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217
+ [-1, 1, Conv, [160, 1, 1]], # 186
218
+ [[-1, -11], 1, Shortcut, [1]], # 187
219
+
220
+ [-1, 1, DownC, [320]],
221
+ [[-1, 162], 1, Concat, [1]],
222
+
223
+ [-1, 1, Conv, [256, 1, 1]],
224
+ [-2, 1, Conv, [256, 1, 1]],
225
+ [-1, 1, Conv, [128, 3, 1]],
226
+ [-1, 1, Conv, [128, 3, 1]],
227
+ [-1, 1, Conv, [128, 3, 1]],
228
+ [-1, 1, Conv, [128, 3, 1]],
229
+ [-1, 1, Conv, [128, 3, 1]],
230
+ [-1, 1, Conv, [128, 3, 1]],
231
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232
+ [-1, 1, Conv, [320, 1, 1]], # 199
233
+ [-11, 1, Conv, [256, 1, 1]],
234
+ [-12, 1, Conv, [256, 1, 1]],
235
+ [-1, 1, Conv, [128, 3, 1]],
236
+ [-1, 1, Conv, [128, 3, 1]],
237
+ [-1, 1, Conv, [128, 3, 1]],
238
+ [-1, 1, Conv, [128, 3, 1]],
239
+ [-1, 1, Conv, [128, 3, 1]],
240
+ [-1, 1, Conv, [128, 3, 1]],
241
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242
+ [-1, 1, Conv, [320, 1, 1]], # 209
243
+ [[-1, -11], 1, Shortcut, [1]], # 210
244
+
245
+ [-1, 1, DownC, [480]],
246
+ [[-1, 137], 1, Concat, [1]],
247
+
248
+ [-1, 1, Conv, [384, 1, 1]],
249
+ [-2, 1, Conv, [384, 1, 1]],
250
+ [-1, 1, Conv, [192, 3, 1]],
251
+ [-1, 1, Conv, [192, 3, 1]],
252
+ [-1, 1, Conv, [192, 3, 1]],
253
+ [-1, 1, Conv, [192, 3, 1]],
254
+ [-1, 1, Conv, [192, 3, 1]],
255
+ [-1, 1, Conv, [192, 3, 1]],
256
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257
+ [-1, 1, Conv, [480, 1, 1]], # 222
258
+ [-11, 1, Conv, [384, 1, 1]],
259
+ [-12, 1, Conv, [384, 1, 1]],
260
+ [-1, 1, Conv, [192, 3, 1]],
261
+ [-1, 1, Conv, [192, 3, 1]],
262
+ [-1, 1, Conv, [192, 3, 1]],
263
+ [-1, 1, Conv, [192, 3, 1]],
264
+ [-1, 1, Conv, [192, 3, 1]],
265
+ [-1, 1, Conv, [192, 3, 1]],
266
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267
+ [-1, 1, Conv, [480, 1, 1]], # 232
268
+ [[-1, -11], 1, Shortcut, [1]], # 233
269
+
270
+ [-1, 1, DownC, [640]],
271
+ [[-1, 112], 1, Concat, [1]],
272
+
273
+ [-1, 1, Conv, [512, 1, 1]],
274
+ [-2, 1, Conv, [512, 1, 1]],
275
+ [-1, 1, Conv, [256, 3, 1]],
276
+ [-1, 1, Conv, [256, 3, 1]],
277
+ [-1, 1, Conv, [256, 3, 1]],
278
+ [-1, 1, Conv, [256, 3, 1]],
279
+ [-1, 1, Conv, [256, 3, 1]],
280
+ [-1, 1, Conv, [256, 3, 1]],
281
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282
+ [-1, 1, Conv, [640, 1, 1]], # 245
283
+ [-11, 1, Conv, [512, 1, 1]],
284
+ [-12, 1, Conv, [512, 1, 1]],
285
+ [-1, 1, Conv, [256, 3, 1]],
286
+ [-1, 1, Conv, [256, 3, 1]],
287
+ [-1, 1, Conv, [256, 3, 1]],
288
+ [-1, 1, Conv, [256, 3, 1]],
289
+ [-1, 1, Conv, [256, 3, 1]],
290
+ [-1, 1, Conv, [256, 3, 1]],
291
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292
+ [-1, 1, Conv, [640, 1, 1]], # 255
293
+ [[-1, -11], 1, Shortcut, [1]], # 256
294
+
295
+ [187, 1, Conv, [320, 3, 1]],
296
+ [210, 1, Conv, [640, 3, 1]],
297
+ [233, 1, Conv, [960, 3, 1]],
298
+ [256, 1, Conv, [1280, 3, 1]],
299
+
300
+ [[257,258,259,260], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
301
+ ]
cfg/deploy/yolov7-tiny-silu.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # YOLOv7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1]],
20
+ [-2, 1, Conv, [32, 1, 1]],
21
+ [-1, 1, Conv, [32, 3, 1]],
22
+ [-1, 1, Conv, [32, 3, 1]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1]],
28
+ [-2, 1, Conv, [64, 1, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [-1, 1, Conv, [64, 3, 1]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-2, 1, Conv, [256, 1, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1]], # 28
49
+ ]
50
+
51
+ # YOLOv7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1]],
69
+ [-2, 1, Conv, [64, 1, 1]],
70
+ [-1, 1, Conv, [64, 3, 1]],
71
+ [-1, 1, Conv, [64, 3, 1]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1]],
81
+ [-2, 1, Conv, [32, 1, 1]],
82
+ [-1, 1, Conv, [32, 3, 1]],
83
+ [-1, 1, Conv, [32, 3, 1]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1]],
91
+ [-2, 1, Conv, [64, 1, 1]],
92
+ [-1, 1, Conv, [64, 3, 1]],
93
+ [-1, 1, Conv, [64, 3, 1]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1]],
101
+ [-2, 1, Conv, [128, 1, 1]],
102
+ [-1, 1, Conv, [128, 3, 1]],
103
+ [-1, 1, Conv, [128, 3, 1]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1]],
108
+ [65, 1, Conv, [256, 3, 1]],
109
+ [73, 1, Conv, [512, 3, 1]],
110
+
111
+ [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/deploy/yolov7-tiny.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # yolov7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
15
+ [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
20
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
21
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
22
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
28
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
29
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
30
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
36
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
37
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
38
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
44
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
45
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
46
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28
49
+ ]
50
+
51
+ # yolov7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
54
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
69
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
70
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
71
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
81
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
82
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
83
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
91
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
92
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
93
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108
+ [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109
+ [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110
+
111
+ [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/deploy/yolov7-w6.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7-w6 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [[-1, -3, -5, -6], 1, Concat, [1]],
27
+ [-1, 1, Conv, [128, 1, 1]], # 10
28
+
29
+ [-1, 1, Conv, [256, 3, 2]], # 11-P3/8
30
+ [-1, 1, Conv, [128, 1, 1]],
31
+ [-2, 1, Conv, [128, 1, 1]],
32
+ [-1, 1, Conv, [128, 3, 1]],
33
+ [-1, 1, Conv, [128, 3, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [[-1, -3, -5, -6], 1, Concat, [1]],
37
+ [-1, 1, Conv, [256, 1, 1]], # 19
38
+
39
+ [-1, 1, Conv, [512, 3, 2]], # 20-P4/16
40
+ [-1, 1, Conv, [256, 1, 1]],
41
+ [-2, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [256, 3, 1]],
43
+ [-1, 1, Conv, [256, 3, 1]],
44
+ [-1, 1, Conv, [256, 3, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [[-1, -3, -5, -6], 1, Concat, [1]],
47
+ [-1, 1, Conv, [512, 1, 1]], # 28
48
+
49
+ [-1, 1, Conv, [768, 3, 2]], # 29-P5/32
50
+ [-1, 1, Conv, [384, 1, 1]],
51
+ [-2, 1, Conv, [384, 1, 1]],
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-1, 1, Conv, [384, 3, 1]],
54
+ [-1, 1, Conv, [384, 3, 1]],
55
+ [-1, 1, Conv, [384, 3, 1]],
56
+ [[-1, -3, -5, -6], 1, Concat, [1]],
57
+ [-1, 1, Conv, [768, 1, 1]], # 37
58
+
59
+ [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64
60
+ [-1, 1, Conv, [512, 1, 1]],
61
+ [-2, 1, Conv, [512, 1, 1]],
62
+ [-1, 1, Conv, [512, 3, 1]],
63
+ [-1, 1, Conv, [512, 3, 1]],
64
+ [-1, 1, Conv, [512, 3, 1]],
65
+ [-1, 1, Conv, [512, 3, 1]],
66
+ [[-1, -3, -5, -6], 1, Concat, [1]],
67
+ [-1, 1, Conv, [1024, 1, 1]], # 46
68
+ ]
69
+
70
+ # yolov7-w6 head
71
+ head:
72
+ [[-1, 1, SPPCSPC, [512]], # 47
73
+
74
+ [-1, 1, Conv, [384, 1, 1]],
75
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
76
+ [37, 1, Conv, [384, 1, 1]], # route backbone P5
77
+ [[-1, -2], 1, Concat, [1]],
78
+
79
+ [-1, 1, Conv, [384, 1, 1]],
80
+ [-2, 1, Conv, [384, 1, 1]],
81
+ [-1, 1, Conv, [192, 3, 1]],
82
+ [-1, 1, Conv, [192, 3, 1]],
83
+ [-1, 1, Conv, [192, 3, 1]],
84
+ [-1, 1, Conv, [192, 3, 1]],
85
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
86
+ [-1, 1, Conv, [384, 1, 1]], # 59
87
+
88
+ [-1, 1, Conv, [256, 1, 1]],
89
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
90
+ [28, 1, Conv, [256, 1, 1]], # route backbone P4
91
+ [[-1, -2], 1, Concat, [1]],
92
+
93
+ [-1, 1, Conv, [256, 1, 1]],
94
+ [-2, 1, Conv, [256, 1, 1]],
95
+ [-1, 1, Conv, [128, 3, 1]],
96
+ [-1, 1, Conv, [128, 3, 1]],
97
+ [-1, 1, Conv, [128, 3, 1]],
98
+ [-1, 1, Conv, [128, 3, 1]],
99
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100
+ [-1, 1, Conv, [256, 1, 1]], # 71
101
+
102
+ [-1, 1, Conv, [128, 1, 1]],
103
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104
+ [19, 1, Conv, [128, 1, 1]], # route backbone P3
105
+ [[-1, -2], 1, Concat, [1]],
106
+
107
+ [-1, 1, Conv, [128, 1, 1]],
108
+ [-2, 1, Conv, [128, 1, 1]],
109
+ [-1, 1, Conv, [64, 3, 1]],
110
+ [-1, 1, Conv, [64, 3, 1]],
111
+ [-1, 1, Conv, [64, 3, 1]],
112
+ [-1, 1, Conv, [64, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114
+ [-1, 1, Conv, [128, 1, 1]], # 83
115
+
116
+ [-1, 1, Conv, [256, 3, 2]],
117
+ [[-1, 71], 1, Concat, [1]], # cat
118
+
119
+ [-1, 1, Conv, [256, 1, 1]],
120
+ [-2, 1, Conv, [256, 1, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126
+ [-1, 1, Conv, [256, 1, 1]], # 93
127
+
128
+ [-1, 1, Conv, [384, 3, 2]],
129
+ [[-1, 59], 1, Concat, [1]], # cat
130
+
131
+ [-1, 1, Conv, [384, 1, 1]],
132
+ [-2, 1, Conv, [384, 1, 1]],
133
+ [-1, 1, Conv, [192, 3, 1]],
134
+ [-1, 1, Conv, [192, 3, 1]],
135
+ [-1, 1, Conv, [192, 3, 1]],
136
+ [-1, 1, Conv, [192, 3, 1]],
137
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138
+ [-1, 1, Conv, [384, 1, 1]], # 103
139
+
140
+ [-1, 1, Conv, [512, 3, 2]],
141
+ [[-1, 47], 1, Concat, [1]], # cat
142
+
143
+ [-1, 1, Conv, [512, 1, 1]],
144
+ [-2, 1, Conv, [512, 1, 1]],
145
+ [-1, 1, Conv, [256, 3, 1]],
146
+ [-1, 1, Conv, [256, 3, 1]],
147
+ [-1, 1, Conv, [256, 3, 1]],
148
+ [-1, 1, Conv, [256, 3, 1]],
149
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150
+ [-1, 1, Conv, [512, 1, 1]], # 113
151
+
152
+ [83, 1, Conv, [256, 3, 1]],
153
+ [93, 1, Conv, [512, 3, 1]],
154
+ [103, 1, Conv, [768, 3, 1]],
155
+ [113, 1, Conv, [1024, 3, 1]],
156
+
157
+ [[114,115,116,117], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
158
+ ]
cfg/deploy/yolov7.yaml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [64, 3, 1]],
19
+
20
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [[-1, -3, -5, -6], 1, Concat, [1]],
28
+ [-1, 1, Conv, [256, 1, 1]], # 11
29
+
30
+ [-1, 1, MP, []],
31
+ [-1, 1, Conv, [128, 1, 1]],
32
+ [-3, 1, Conv, [128, 1, 1]],
33
+ [-1, 1, Conv, [128, 3, 2]],
34
+ [[-1, -3], 1, Concat, [1]], # 16-P3/8
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [[-1, -3, -5, -6], 1, Concat, [1]],
42
+ [-1, 1, Conv, [512, 1, 1]], # 24
43
+
44
+ [-1, 1, MP, []],
45
+ [-1, 1, Conv, [256, 1, 1]],
46
+ [-3, 1, Conv, [256, 1, 1]],
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, -3], 1, Concat, [1]], # 29-P4/16
49
+ [-1, 1, Conv, [256, 1, 1]],
50
+ [-2, 1, Conv, [256, 1, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [[-1, -3, -5, -6], 1, Concat, [1]],
56
+ [-1, 1, Conv, [1024, 1, 1]], # 37
57
+
58
+ [-1, 1, MP, []],
59
+ [-1, 1, Conv, [512, 1, 1]],
60
+ [-3, 1, Conv, [512, 1, 1]],
61
+ [-1, 1, Conv, [512, 3, 2]],
62
+ [[-1, -3], 1, Concat, [1]], # 42-P5/32
63
+ [-1, 1, Conv, [256, 1, 1]],
64
+ [-2, 1, Conv, [256, 1, 1]],
65
+ [-1, 1, Conv, [256, 3, 1]],
66
+ [-1, 1, Conv, [256, 3, 1]],
67
+ [-1, 1, Conv, [256, 3, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [[-1, -3, -5, -6], 1, Concat, [1]],
70
+ [-1, 1, Conv, [1024, 1, 1]], # 50
71
+ ]
72
+
73
+ # yolov7 head
74
+ head:
75
+ [[-1, 1, SPPCSPC, [512]], # 51
76
+
77
+ [-1, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
79
+ [37, 1, Conv, [256, 1, 1]], # route backbone P4
80
+ [[-1, -2], 1, Concat, [1]],
81
+
82
+ [-1, 1, Conv, [256, 1, 1]],
83
+ [-2, 1, Conv, [256, 1, 1]],
84
+ [-1, 1, Conv, [128, 3, 1]],
85
+ [-1, 1, Conv, [128, 3, 1]],
86
+ [-1, 1, Conv, [128, 3, 1]],
87
+ [-1, 1, Conv, [128, 3, 1]],
88
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
89
+ [-1, 1, Conv, [256, 1, 1]], # 63
90
+
91
+ [-1, 1, Conv, [128, 1, 1]],
92
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
93
+ [24, 1, Conv, [128, 1, 1]], # route backbone P3
94
+ [[-1, -2], 1, Concat, [1]],
95
+
96
+ [-1, 1, Conv, [128, 1, 1]],
97
+ [-2, 1, Conv, [128, 1, 1]],
98
+ [-1, 1, Conv, [64, 3, 1]],
99
+ [-1, 1, Conv, [64, 3, 1]],
100
+ [-1, 1, Conv, [64, 3, 1]],
101
+ [-1, 1, Conv, [64, 3, 1]],
102
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103
+ [-1, 1, Conv, [128, 1, 1]], # 75
104
+
105
+ [-1, 1, MP, []],
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-3, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 2]],
109
+ [[-1, -3, 63], 1, Concat, [1]],
110
+
111
+ [-1, 1, Conv, [256, 1, 1]],
112
+ [-2, 1, Conv, [256, 1, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [-1, 1, Conv, [128, 3, 1]],
115
+ [-1, 1, Conv, [128, 3, 1]],
116
+ [-1, 1, Conv, [128, 3, 1]],
117
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118
+ [-1, 1, Conv, [256, 1, 1]], # 88
119
+
120
+ [-1, 1, MP, []],
121
+ [-1, 1, Conv, [256, 1, 1]],
122
+ [-3, 1, Conv, [256, 1, 1]],
123
+ [-1, 1, Conv, [256, 3, 2]],
124
+ [[-1, -3, 51], 1, Concat, [1]],
125
+
126
+ [-1, 1, Conv, [512, 1, 1]],
127
+ [-2, 1, Conv, [512, 1, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [-1, 1, Conv, [256, 3, 1]],
132
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133
+ [-1, 1, Conv, [512, 1, 1]], # 101
134
+
135
+ [75, 1, RepConv, [256, 3, 1]],
136
+ [88, 1, RepConv, [512, 3, 1]],
137
+ [101, 1, RepConv, [1024, 3, 1]],
138
+
139
+ [[102,103,104], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
140
+ ]
cfg/deploy/yolov7x.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7x backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [40, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [80, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [80, 3, 1]],
19
+
20
+ [-1, 1, Conv, [160, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
30
+ [-1, 1, Conv, [320, 1, 1]], # 13
31
+
32
+ [-1, 1, MP, []],
33
+ [-1, 1, Conv, [160, 1, 1]],
34
+ [-3, 1, Conv, [160, 1, 1]],
35
+ [-1, 1, Conv, [160, 3, 2]],
36
+ [[-1, -3], 1, Concat, [1]], # 18-P3/8
37
+ [-1, 1, Conv, [128, 1, 1]],
38
+ [-2, 1, Conv, [128, 1, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [-1, 1, Conv, [128, 3, 1]],
45
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
46
+ [-1, 1, Conv, [640, 1, 1]], # 28
47
+
48
+ [-1, 1, MP, []],
49
+ [-1, 1, Conv, [320, 1, 1]],
50
+ [-3, 1, Conv, [320, 1, 1]],
51
+ [-1, 1, Conv, [320, 3, 2]],
52
+ [[-1, -3], 1, Concat, [1]], # 33-P4/16
53
+ [-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [-1, 1, Conv, [256, 3, 1]],
59
+ [-1, 1, Conv, [256, 3, 1]],
60
+ [-1, 1, Conv, [256, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [1280, 1, 1]], # 43
63
+
64
+ [-1, 1, MP, []],
65
+ [-1, 1, Conv, [640, 1, 1]],
66
+ [-3, 1, Conv, [640, 1, 1]],
67
+ [-1, 1, Conv, [640, 3, 2]],
68
+ [[-1, -3], 1, Concat, [1]], # 48-P5/32
69
+ [-1, 1, Conv, [256, 1, 1]],
70
+ [-2, 1, Conv, [256, 1, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [-1, 1, Conv, [256, 3, 1]],
75
+ [-1, 1, Conv, [256, 3, 1]],
76
+ [-1, 1, Conv, [256, 3, 1]],
77
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
78
+ [-1, 1, Conv, [1280, 1, 1]], # 58
79
+ ]
80
+
81
+ # yolov7x head
82
+ head:
83
+ [[-1, 1, SPPCSPC, [640]], # 59
84
+
85
+ [-1, 1, Conv, [320, 1, 1]],
86
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
87
+ [43, 1, Conv, [320, 1, 1]], # route backbone P4
88
+ [[-1, -2], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [256, 1, 1]],
91
+ [-2, 1, Conv, [256, 1, 1]],
92
+ [-1, 1, Conv, [256, 3, 1]],
93
+ [-1, 1, Conv, [256, 3, 1]],
94
+ [-1, 1, Conv, [256, 3, 1]],
95
+ [-1, 1, Conv, [256, 3, 1]],
96
+ [-1, 1, Conv, [256, 3, 1]],
97
+ [-1, 1, Conv, [256, 3, 1]],
98
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
99
+ [-1, 1, Conv, [320, 1, 1]], # 73
100
+
101
+ [-1, 1, Conv, [160, 1, 1]],
102
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103
+ [28, 1, Conv, [160, 1, 1]], # route backbone P3
104
+ [[-1, -2], 1, Concat, [1]],
105
+
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-2, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115
+ [-1, 1, Conv, [160, 1, 1]], # 87
116
+
117
+ [-1, 1, MP, []],
118
+ [-1, 1, Conv, [160, 1, 1]],
119
+ [-3, 1, Conv, [160, 1, 1]],
120
+ [-1, 1, Conv, [160, 3, 2]],
121
+ [[-1, -3, 73], 1, Concat, [1]],
122
+
123
+ [-1, 1, Conv, [256, 1, 1]],
124
+ [-2, 1, Conv, [256, 1, 1]],
125
+ [-1, 1, Conv, [256, 3, 1]],
126
+ [-1, 1, Conv, [256, 3, 1]],
127
+ [-1, 1, Conv, [256, 3, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132
+ [-1, 1, Conv, [320, 1, 1]], # 102
133
+
134
+ [-1, 1, MP, []],
135
+ [-1, 1, Conv, [320, 1, 1]],
136
+ [-3, 1, Conv, [320, 1, 1]],
137
+ [-1, 1, Conv, [320, 3, 2]],
138
+ [[-1, -3, 59], 1, Concat, [1]],
139
+
140
+ [-1, 1, Conv, [512, 1, 1]],
141
+ [-2, 1, Conv, [512, 1, 1]],
142
+ [-1, 1, Conv, [512, 3, 1]],
143
+ [-1, 1, Conv, [512, 3, 1]],
144
+ [-1, 1, Conv, [512, 3, 1]],
145
+ [-1, 1, Conv, [512, 3, 1]],
146
+ [-1, 1, Conv, [512, 3, 1]],
147
+ [-1, 1, Conv, [512, 3, 1]],
148
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149
+ [-1, 1, Conv, [640, 1, 1]], # 117
150
+
151
+ [87, 1, Conv, [320, 3, 1]],
152
+ [102, 1, Conv, [640, 3, 1]],
153
+ [117, 1, Conv, [1280, 3, 1]],
154
+
155
+ [[118,119,120], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
156
+ ]
cfg/training/yolov7-d6.yaml ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [96, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [192]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [-1, 1, Conv, [64, 3, 1]],
30
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
31
+ [-1, 1, Conv, [192, 1, 1]], # 14
32
+
33
+ [-1, 1, DownC, [384]], # 15-P3/8
34
+ [-1, 1, Conv, [128, 1, 1]],
35
+ [-2, 1, Conv, [128, 1, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
45
+ [-1, 1, Conv, [384, 1, 1]], # 27
46
+
47
+ [-1, 1, DownC, [768]], # 28-P4/16
48
+ [-1, 1, Conv, [256, 1, 1]],
49
+ [-2, 1, Conv, [256, 1, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
59
+ [-1, 1, Conv, [768, 1, 1]], # 40
60
+
61
+ [-1, 1, DownC, [1152]], # 41-P5/32
62
+ [-1, 1, Conv, [384, 1, 1]],
63
+ [-2, 1, Conv, [384, 1, 1]],
64
+ [-1, 1, Conv, [384, 3, 1]],
65
+ [-1, 1, Conv, [384, 3, 1]],
66
+ [-1, 1, Conv, [384, 3, 1]],
67
+ [-1, 1, Conv, [384, 3, 1]],
68
+ [-1, 1, Conv, [384, 3, 1]],
69
+ [-1, 1, Conv, [384, 3, 1]],
70
+ [-1, 1, Conv, [384, 3, 1]],
71
+ [-1, 1, Conv, [384, 3, 1]],
72
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
73
+ [-1, 1, Conv, [1152, 1, 1]], # 53
74
+
75
+ [-1, 1, DownC, [1536]], # 54-P6/64
76
+ [-1, 1, Conv, [512, 1, 1]],
77
+ [-2, 1, Conv, [512, 1, 1]],
78
+ [-1, 1, Conv, [512, 3, 1]],
79
+ [-1, 1, Conv, [512, 3, 1]],
80
+ [-1, 1, Conv, [512, 3, 1]],
81
+ [-1, 1, Conv, [512, 3, 1]],
82
+ [-1, 1, Conv, [512, 3, 1]],
83
+ [-1, 1, Conv, [512, 3, 1]],
84
+ [-1, 1, Conv, [512, 3, 1]],
85
+ [-1, 1, Conv, [512, 3, 1]],
86
+ [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
87
+ [-1, 1, Conv, [1536, 1, 1]], # 66
88
+ ]
89
+
90
+ # yolov7 head
91
+ head:
92
+ [[-1, 1, SPPCSPC, [768]], # 67
93
+
94
+ [-1, 1, Conv, [576, 1, 1]],
95
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
96
+ [53, 1, Conv, [576, 1, 1]], # route backbone P5
97
+ [[-1, -2], 1, Concat, [1]],
98
+
99
+ [-1, 1, Conv, [384, 1, 1]],
100
+ [-2, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [192, 3, 1]],
102
+ [-1, 1, Conv, [192, 3, 1]],
103
+ [-1, 1, Conv, [192, 3, 1]],
104
+ [-1, 1, Conv, [192, 3, 1]],
105
+ [-1, 1, Conv, [192, 3, 1]],
106
+ [-1, 1, Conv, [192, 3, 1]],
107
+ [-1, 1, Conv, [192, 3, 1]],
108
+ [-1, 1, Conv, [192, 3, 1]],
109
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110
+ [-1, 1, Conv, [576, 1, 1]], # 83
111
+
112
+ [-1, 1, Conv, [384, 1, 1]],
113
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114
+ [40, 1, Conv, [384, 1, 1]], # route backbone P4
115
+ [[-1, -2], 1, Concat, [1]],
116
+
117
+ [-1, 1, Conv, [256, 1, 1]],
118
+ [-2, 1, Conv, [256, 1, 1]],
119
+ [-1, 1, Conv, [128, 3, 1]],
120
+ [-1, 1, Conv, [128, 3, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [-1, 1, Conv, [128, 3, 1]],
126
+ [-1, 1, Conv, [128, 3, 1]],
127
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128
+ [-1, 1, Conv, [384, 1, 1]], # 99
129
+
130
+ [-1, 1, Conv, [192, 1, 1]],
131
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132
+ [27, 1, Conv, [192, 1, 1]], # route backbone P3
133
+ [[-1, -2], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [128, 1, 1]],
136
+ [-2, 1, Conv, [128, 1, 1]],
137
+ [-1, 1, Conv, [64, 3, 1]],
138
+ [-1, 1, Conv, [64, 3, 1]],
139
+ [-1, 1, Conv, [64, 3, 1]],
140
+ [-1, 1, Conv, [64, 3, 1]],
141
+ [-1, 1, Conv, [64, 3, 1]],
142
+ [-1, 1, Conv, [64, 3, 1]],
143
+ [-1, 1, Conv, [64, 3, 1]],
144
+ [-1, 1, Conv, [64, 3, 1]],
145
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146
+ [-1, 1, Conv, [192, 1, 1]], # 115
147
+
148
+ [-1, 1, DownC, [384]],
149
+ [[-1, 99], 1, Concat, [1]],
150
+
151
+ [-1, 1, Conv, [256, 1, 1]],
152
+ [-2, 1, Conv, [256, 1, 1]],
153
+ [-1, 1, Conv, [128, 3, 1]],
154
+ [-1, 1, Conv, [128, 3, 1]],
155
+ [-1, 1, Conv, [128, 3, 1]],
156
+ [-1, 1, Conv, [128, 3, 1]],
157
+ [-1, 1, Conv, [128, 3, 1]],
158
+ [-1, 1, Conv, [128, 3, 1]],
159
+ [-1, 1, Conv, [128, 3, 1]],
160
+ [-1, 1, Conv, [128, 3, 1]],
161
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162
+ [-1, 1, Conv, [384, 1, 1]], # 129
163
+
164
+ [-1, 1, DownC, [576]],
165
+ [[-1, 83], 1, Concat, [1]],
166
+
167
+ [-1, 1, Conv, [384, 1, 1]],
168
+ [-2, 1, Conv, [384, 1, 1]],
169
+ [-1, 1, Conv, [192, 3, 1]],
170
+ [-1, 1, Conv, [192, 3, 1]],
171
+ [-1, 1, Conv, [192, 3, 1]],
172
+ [-1, 1, Conv, [192, 3, 1]],
173
+ [-1, 1, Conv, [192, 3, 1]],
174
+ [-1, 1, Conv, [192, 3, 1]],
175
+ [-1, 1, Conv, [192, 3, 1]],
176
+ [-1, 1, Conv, [192, 3, 1]],
177
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178
+ [-1, 1, Conv, [576, 1, 1]], # 143
179
+
180
+ [-1, 1, DownC, [768]],
181
+ [[-1, 67], 1, Concat, [1]],
182
+
183
+ [-1, 1, Conv, [512, 1, 1]],
184
+ [-2, 1, Conv, [512, 1, 1]],
185
+ [-1, 1, Conv, [256, 3, 1]],
186
+ [-1, 1, Conv, [256, 3, 1]],
187
+ [-1, 1, Conv, [256, 3, 1]],
188
+ [-1, 1, Conv, [256, 3, 1]],
189
+ [-1, 1, Conv, [256, 3, 1]],
190
+ [-1, 1, Conv, [256, 3, 1]],
191
+ [-1, 1, Conv, [256, 3, 1]],
192
+ [-1, 1, Conv, [256, 3, 1]],
193
+ [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194
+ [-1, 1, Conv, [768, 1, 1]], # 157
195
+
196
+ [115, 1, Conv, [384, 3, 1]],
197
+ [129, 1, Conv, [768, 3, 1]],
198
+ [143, 1, Conv, [1152, 3, 1]],
199
+ [157, 1, Conv, [1536, 3, 1]],
200
+
201
+ [115, 1, Conv, [384, 3, 1]],
202
+ [99, 1, Conv, [768, 3, 1]],
203
+ [83, 1, Conv, [1152, 3, 1]],
204
+ [67, 1, Conv, [1536, 3, 1]],
205
+
206
+ [[158,159,160,161,162,163,164,165], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
207
+ ]
cfg/training/yolov7-e6.yaml ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+
31
+ [-1, 1, DownC, [320]], # 13-P3/8
32
+ [-1, 1, Conv, [128, 1, 1]],
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
41
+ [-1, 1, Conv, [320, 1, 1]], # 23
42
+
43
+ [-1, 1, DownC, [640]], # 24-P4/16
44
+ [-1, 1, Conv, [256, 1, 1]],
45
+ [-2, 1, Conv, [256, 1, 1]],
46
+ [-1, 1, Conv, [256, 3, 1]],
47
+ [-1, 1, Conv, [256, 3, 1]],
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-1, 1, Conv, [256, 3, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
53
+ [-1, 1, Conv, [640, 1, 1]], # 34
54
+
55
+ [-1, 1, DownC, [960]], # 35-P5/32
56
+ [-1, 1, Conv, [384, 1, 1]],
57
+ [-2, 1, Conv, [384, 1, 1]],
58
+ [-1, 1, Conv, [384, 3, 1]],
59
+ [-1, 1, Conv, [384, 3, 1]],
60
+ [-1, 1, Conv, [384, 3, 1]],
61
+ [-1, 1, Conv, [384, 3, 1]],
62
+ [-1, 1, Conv, [384, 3, 1]],
63
+ [-1, 1, Conv, [384, 3, 1]],
64
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
65
+ [-1, 1, Conv, [960, 1, 1]], # 45
66
+
67
+ [-1, 1, DownC, [1280]], # 46-P6/64
68
+ [-1, 1, Conv, [512, 1, 1]],
69
+ [-2, 1, Conv, [512, 1, 1]],
70
+ [-1, 1, Conv, [512, 3, 1]],
71
+ [-1, 1, Conv, [512, 3, 1]],
72
+ [-1, 1, Conv, [512, 3, 1]],
73
+ [-1, 1, Conv, [512, 3, 1]],
74
+ [-1, 1, Conv, [512, 3, 1]],
75
+ [-1, 1, Conv, [512, 3, 1]],
76
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
77
+ [-1, 1, Conv, [1280, 1, 1]], # 56
78
+ ]
79
+
80
+ # yolov7 head
81
+ head:
82
+ [[-1, 1, SPPCSPC, [640]], # 57
83
+
84
+ [-1, 1, Conv, [480, 1, 1]],
85
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
86
+ [45, 1, Conv, [480, 1, 1]], # route backbone P5
87
+ [[-1, -2], 1, Concat, [1]],
88
+
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [192, 3, 1]],
92
+ [-1, 1, Conv, [192, 3, 1]],
93
+ [-1, 1, Conv, [192, 3, 1]],
94
+ [-1, 1, Conv, [192, 3, 1]],
95
+ [-1, 1, Conv, [192, 3, 1]],
96
+ [-1, 1, Conv, [192, 3, 1]],
97
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [480, 1, 1]], # 71
99
+
100
+ [-1, 1, Conv, [320, 1, 1]],
101
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102
+ [34, 1, Conv, [320, 1, 1]], # route backbone P4
103
+ [[-1, -2], 1, Concat, [1]],
104
+
105
+ [-1, 1, Conv, [256, 1, 1]],
106
+ [-2, 1, Conv, [256, 1, 1]],
107
+ [-1, 1, Conv, [128, 3, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114
+ [-1, 1, Conv, [320, 1, 1]], # 85
115
+
116
+ [-1, 1, Conv, [160, 1, 1]],
117
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118
+ [23, 1, Conv, [160, 1, 1]], # route backbone P3
119
+ [[-1, -2], 1, Concat, [1]],
120
+
121
+ [-1, 1, Conv, [128, 1, 1]],
122
+ [-2, 1, Conv, [128, 1, 1]],
123
+ [-1, 1, Conv, [64, 3, 1]],
124
+ [-1, 1, Conv, [64, 3, 1]],
125
+ [-1, 1, Conv, [64, 3, 1]],
126
+ [-1, 1, Conv, [64, 3, 1]],
127
+ [-1, 1, Conv, [64, 3, 1]],
128
+ [-1, 1, Conv, [64, 3, 1]],
129
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130
+ [-1, 1, Conv, [160, 1, 1]], # 99
131
+
132
+ [-1, 1, DownC, [320]],
133
+ [[-1, 85], 1, Concat, [1]],
134
+
135
+ [-1, 1, Conv, [256, 1, 1]],
136
+ [-2, 1, Conv, [256, 1, 1]],
137
+ [-1, 1, Conv, [128, 3, 1]],
138
+ [-1, 1, Conv, [128, 3, 1]],
139
+ [-1, 1, Conv, [128, 3, 1]],
140
+ [-1, 1, Conv, [128, 3, 1]],
141
+ [-1, 1, Conv, [128, 3, 1]],
142
+ [-1, 1, Conv, [128, 3, 1]],
143
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144
+ [-1, 1, Conv, [320, 1, 1]], # 111
145
+
146
+ [-1, 1, DownC, [480]],
147
+ [[-1, 71], 1, Concat, [1]],
148
+
149
+ [-1, 1, Conv, [384, 1, 1]],
150
+ [-2, 1, Conv, [384, 1, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [-1, 1, Conv, [192, 3, 1]],
153
+ [-1, 1, Conv, [192, 3, 1]],
154
+ [-1, 1, Conv, [192, 3, 1]],
155
+ [-1, 1, Conv, [192, 3, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158
+ [-1, 1, Conv, [480, 1, 1]], # 123
159
+
160
+ [-1, 1, DownC, [640]],
161
+ [[-1, 57], 1, Concat, [1]],
162
+
163
+ [-1, 1, Conv, [512, 1, 1]],
164
+ [-2, 1, Conv, [512, 1, 1]],
165
+ [-1, 1, Conv, [256, 3, 1]],
166
+ [-1, 1, Conv, [256, 3, 1]],
167
+ [-1, 1, Conv, [256, 3, 1]],
168
+ [-1, 1, Conv, [256, 3, 1]],
169
+ [-1, 1, Conv, [256, 3, 1]],
170
+ [-1, 1, Conv, [256, 3, 1]],
171
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172
+ [-1, 1, Conv, [640, 1, 1]], # 135
173
+
174
+ [99, 1, Conv, [320, 3, 1]],
175
+ [111, 1, Conv, [640, 3, 1]],
176
+ [123, 1, Conv, [960, 3, 1]],
177
+ [135, 1, Conv, [1280, 3, 1]],
178
+
179
+ [99, 1, Conv, [320, 3, 1]],
180
+ [85, 1, Conv, [640, 3, 1]],
181
+ [71, 1, Conv, [960, 3, 1]],
182
+ [57, 1, Conv, [1280, 3, 1]],
183
+
184
+ [[136,137,138,139,140,141,142,143], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
185
+ ]
cfg/training/yolov7-e6e.yaml ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args],
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [80, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, DownC, [160]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
29
+ [-1, 1, Conv, [160, 1, 1]], # 12
30
+ [-11, 1, Conv, [64, 1, 1]],
31
+ [-12, 1, Conv, [64, 1, 1]],
32
+ [-1, 1, Conv, [64, 3, 1]],
33
+ [-1, 1, Conv, [64, 3, 1]],
34
+ [-1, 1, Conv, [64, 3, 1]],
35
+ [-1, 1, Conv, [64, 3, 1]],
36
+ [-1, 1, Conv, [64, 3, 1]],
37
+ [-1, 1, Conv, [64, 3, 1]],
38
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
39
+ [-1, 1, Conv, [160, 1, 1]], # 22
40
+ [[-1, -11], 1, Shortcut, [1]], # 23
41
+
42
+ [-1, 1, DownC, [320]], # 24-P3/8
43
+ [-1, 1, Conv, [128, 1, 1]],
44
+ [-2, 1, Conv, [128, 1, 1]],
45
+ [-1, 1, Conv, [128, 3, 1]],
46
+ [-1, 1, Conv, [128, 3, 1]],
47
+ [-1, 1, Conv, [128, 3, 1]],
48
+ [-1, 1, Conv, [128, 3, 1]],
49
+ [-1, 1, Conv, [128, 3, 1]],
50
+ [-1, 1, Conv, [128, 3, 1]],
51
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
52
+ [-1, 1, Conv, [320, 1, 1]], # 34
53
+ [-11, 1, Conv, [128, 1, 1]],
54
+ [-12, 1, Conv, [128, 1, 1]],
55
+ [-1, 1, Conv, [128, 3, 1]],
56
+ [-1, 1, Conv, [128, 3, 1]],
57
+ [-1, 1, Conv, [128, 3, 1]],
58
+ [-1, 1, Conv, [128, 3, 1]],
59
+ [-1, 1, Conv, [128, 3, 1]],
60
+ [-1, 1, Conv, [128, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [320, 1, 1]], # 44
63
+ [[-1, -11], 1, Shortcut, [1]], # 45
64
+
65
+ [-1, 1, DownC, [640]], # 46-P4/16
66
+ [-1, 1, Conv, [256, 1, 1]],
67
+ [-2, 1, Conv, [256, 1, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [-1, 1, Conv, [256, 3, 1]],
70
+ [-1, 1, Conv, [256, 3, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
75
+ [-1, 1, Conv, [640, 1, 1]], # 56
76
+ [-11, 1, Conv, [256, 1, 1]],
77
+ [-12, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, Conv, [256, 3, 1]],
79
+ [-1, 1, Conv, [256, 3, 1]],
80
+ [-1, 1, Conv, [256, 3, 1]],
81
+ [-1, 1, Conv, [256, 3, 1]],
82
+ [-1, 1, Conv, [256, 3, 1]],
83
+ [-1, 1, Conv, [256, 3, 1]],
84
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
85
+ [-1, 1, Conv, [640, 1, 1]], # 66
86
+ [[-1, -11], 1, Shortcut, [1]], # 67
87
+
88
+ [-1, 1, DownC, [960]], # 68-P5/32
89
+ [-1, 1, Conv, [384, 1, 1]],
90
+ [-2, 1, Conv, [384, 1, 1]],
91
+ [-1, 1, Conv, [384, 3, 1]],
92
+ [-1, 1, Conv, [384, 3, 1]],
93
+ [-1, 1, Conv, [384, 3, 1]],
94
+ [-1, 1, Conv, [384, 3, 1]],
95
+ [-1, 1, Conv, [384, 3, 1]],
96
+ [-1, 1, Conv, [384, 3, 1]],
97
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
98
+ [-1, 1, Conv, [960, 1, 1]], # 78
99
+ [-11, 1, Conv, [384, 1, 1]],
100
+ [-12, 1, Conv, [384, 1, 1]],
101
+ [-1, 1, Conv, [384, 3, 1]],
102
+ [-1, 1, Conv, [384, 3, 1]],
103
+ [-1, 1, Conv, [384, 3, 1]],
104
+ [-1, 1, Conv, [384, 3, 1]],
105
+ [-1, 1, Conv, [384, 3, 1]],
106
+ [-1, 1, Conv, [384, 3, 1]],
107
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108
+ [-1, 1, Conv, [960, 1, 1]], # 88
109
+ [[-1, -11], 1, Shortcut, [1]], # 89
110
+
111
+ [-1, 1, DownC, [1280]], # 90-P6/64
112
+ [-1, 1, Conv, [512, 1, 1]],
113
+ [-2, 1, Conv, [512, 1, 1]],
114
+ [-1, 1, Conv, [512, 3, 1]],
115
+ [-1, 1, Conv, [512, 3, 1]],
116
+ [-1, 1, Conv, [512, 3, 1]],
117
+ [-1, 1, Conv, [512, 3, 1]],
118
+ [-1, 1, Conv, [512, 3, 1]],
119
+ [-1, 1, Conv, [512, 3, 1]],
120
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121
+ [-1, 1, Conv, [1280, 1, 1]], # 100
122
+ [-11, 1, Conv, [512, 1, 1]],
123
+ [-12, 1, Conv, [512, 1, 1]],
124
+ [-1, 1, Conv, [512, 3, 1]],
125
+ [-1, 1, Conv, [512, 3, 1]],
126
+ [-1, 1, Conv, [512, 3, 1]],
127
+ [-1, 1, Conv, [512, 3, 1]],
128
+ [-1, 1, Conv, [512, 3, 1]],
129
+ [-1, 1, Conv, [512, 3, 1]],
130
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131
+ [-1, 1, Conv, [1280, 1, 1]], # 110
132
+ [[-1, -11], 1, Shortcut, [1]], # 111
133
+ ]
134
+
135
+ # yolov7 head
136
+ head:
137
+ [[-1, 1, SPPCSPC, [640]], # 112
138
+
139
+ [-1, 1, Conv, [480, 1, 1]],
140
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141
+ [89, 1, Conv, [480, 1, 1]], # route backbone P5
142
+ [[-1, -2], 1, Concat, [1]],
143
+
144
+ [-1, 1, Conv, [384, 1, 1]],
145
+ [-2, 1, Conv, [384, 1, 1]],
146
+ [-1, 1, Conv, [192, 3, 1]],
147
+ [-1, 1, Conv, [192, 3, 1]],
148
+ [-1, 1, Conv, [192, 3, 1]],
149
+ [-1, 1, Conv, [192, 3, 1]],
150
+ [-1, 1, Conv, [192, 3, 1]],
151
+ [-1, 1, Conv, [192, 3, 1]],
152
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153
+ [-1, 1, Conv, [480, 1, 1]], # 126
154
+ [-11, 1, Conv, [384, 1, 1]],
155
+ [-12, 1, Conv, [384, 1, 1]],
156
+ [-1, 1, Conv, [192, 3, 1]],
157
+ [-1, 1, Conv, [192, 3, 1]],
158
+ [-1, 1, Conv, [192, 3, 1]],
159
+ [-1, 1, Conv, [192, 3, 1]],
160
+ [-1, 1, Conv, [192, 3, 1]],
161
+ [-1, 1, Conv, [192, 3, 1]],
162
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163
+ [-1, 1, Conv, [480, 1, 1]], # 136
164
+ [[-1, -11], 1, Shortcut, [1]], # 137
165
+
166
+ [-1, 1, Conv, [320, 1, 1]],
167
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168
+ [67, 1, Conv, [320, 1, 1]], # route backbone P4
169
+ [[-1, -2], 1, Concat, [1]],
170
+
171
+ [-1, 1, Conv, [256, 1, 1]],
172
+ [-2, 1, Conv, [256, 1, 1]],
173
+ [-1, 1, Conv, [128, 3, 1]],
174
+ [-1, 1, Conv, [128, 3, 1]],
175
+ [-1, 1, Conv, [128, 3, 1]],
176
+ [-1, 1, Conv, [128, 3, 1]],
177
+ [-1, 1, Conv, [128, 3, 1]],
178
+ [-1, 1, Conv, [128, 3, 1]],
179
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180
+ [-1, 1, Conv, [320, 1, 1]], # 151
181
+ [-11, 1, Conv, [256, 1, 1]],
182
+ [-12, 1, Conv, [256, 1, 1]],
183
+ [-1, 1, Conv, [128, 3, 1]],
184
+ [-1, 1, Conv, [128, 3, 1]],
185
+ [-1, 1, Conv, [128, 3, 1]],
186
+ [-1, 1, Conv, [128, 3, 1]],
187
+ [-1, 1, Conv, [128, 3, 1]],
188
+ [-1, 1, Conv, [128, 3, 1]],
189
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190
+ [-1, 1, Conv, [320, 1, 1]], # 161
191
+ [[-1, -11], 1, Shortcut, [1]], # 162
192
+
193
+ [-1, 1, Conv, [160, 1, 1]],
194
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195
+ [45, 1, Conv, [160, 1, 1]], # route backbone P3
196
+ [[-1, -2], 1, Concat, [1]],
197
+
198
+ [-1, 1, Conv, [128, 1, 1]],
199
+ [-2, 1, Conv, [128, 1, 1]],
200
+ [-1, 1, Conv, [64, 3, 1]],
201
+ [-1, 1, Conv, [64, 3, 1]],
202
+ [-1, 1, Conv, [64, 3, 1]],
203
+ [-1, 1, Conv, [64, 3, 1]],
204
+ [-1, 1, Conv, [64, 3, 1]],
205
+ [-1, 1, Conv, [64, 3, 1]],
206
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207
+ [-1, 1, Conv, [160, 1, 1]], # 176
208
+ [-11, 1, Conv, [128, 1, 1]],
209
+ [-12, 1, Conv, [128, 1, 1]],
210
+ [-1, 1, Conv, [64, 3, 1]],
211
+ [-1, 1, Conv, [64, 3, 1]],
212
+ [-1, 1, Conv, [64, 3, 1]],
213
+ [-1, 1, Conv, [64, 3, 1]],
214
+ [-1, 1, Conv, [64, 3, 1]],
215
+ [-1, 1, Conv, [64, 3, 1]],
216
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217
+ [-1, 1, Conv, [160, 1, 1]], # 186
218
+ [[-1, -11], 1, Shortcut, [1]], # 187
219
+
220
+ [-1, 1, DownC, [320]],
221
+ [[-1, 162], 1, Concat, [1]],
222
+
223
+ [-1, 1, Conv, [256, 1, 1]],
224
+ [-2, 1, Conv, [256, 1, 1]],
225
+ [-1, 1, Conv, [128, 3, 1]],
226
+ [-1, 1, Conv, [128, 3, 1]],
227
+ [-1, 1, Conv, [128, 3, 1]],
228
+ [-1, 1, Conv, [128, 3, 1]],
229
+ [-1, 1, Conv, [128, 3, 1]],
230
+ [-1, 1, Conv, [128, 3, 1]],
231
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232
+ [-1, 1, Conv, [320, 1, 1]], # 199
233
+ [-11, 1, Conv, [256, 1, 1]],
234
+ [-12, 1, Conv, [256, 1, 1]],
235
+ [-1, 1, Conv, [128, 3, 1]],
236
+ [-1, 1, Conv, [128, 3, 1]],
237
+ [-1, 1, Conv, [128, 3, 1]],
238
+ [-1, 1, Conv, [128, 3, 1]],
239
+ [-1, 1, Conv, [128, 3, 1]],
240
+ [-1, 1, Conv, [128, 3, 1]],
241
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242
+ [-1, 1, Conv, [320, 1, 1]], # 209
243
+ [[-1, -11], 1, Shortcut, [1]], # 210
244
+
245
+ [-1, 1, DownC, [480]],
246
+ [[-1, 137], 1, Concat, [1]],
247
+
248
+ [-1, 1, Conv, [384, 1, 1]],
249
+ [-2, 1, Conv, [384, 1, 1]],
250
+ [-1, 1, Conv, [192, 3, 1]],
251
+ [-1, 1, Conv, [192, 3, 1]],
252
+ [-1, 1, Conv, [192, 3, 1]],
253
+ [-1, 1, Conv, [192, 3, 1]],
254
+ [-1, 1, Conv, [192, 3, 1]],
255
+ [-1, 1, Conv, [192, 3, 1]],
256
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257
+ [-1, 1, Conv, [480, 1, 1]], # 222
258
+ [-11, 1, Conv, [384, 1, 1]],
259
+ [-12, 1, Conv, [384, 1, 1]],
260
+ [-1, 1, Conv, [192, 3, 1]],
261
+ [-1, 1, Conv, [192, 3, 1]],
262
+ [-1, 1, Conv, [192, 3, 1]],
263
+ [-1, 1, Conv, [192, 3, 1]],
264
+ [-1, 1, Conv, [192, 3, 1]],
265
+ [-1, 1, Conv, [192, 3, 1]],
266
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267
+ [-1, 1, Conv, [480, 1, 1]], # 232
268
+ [[-1, -11], 1, Shortcut, [1]], # 233
269
+
270
+ [-1, 1, DownC, [640]],
271
+ [[-1, 112], 1, Concat, [1]],
272
+
273
+ [-1, 1, Conv, [512, 1, 1]],
274
+ [-2, 1, Conv, [512, 1, 1]],
275
+ [-1, 1, Conv, [256, 3, 1]],
276
+ [-1, 1, Conv, [256, 3, 1]],
277
+ [-1, 1, Conv, [256, 3, 1]],
278
+ [-1, 1, Conv, [256, 3, 1]],
279
+ [-1, 1, Conv, [256, 3, 1]],
280
+ [-1, 1, Conv, [256, 3, 1]],
281
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282
+ [-1, 1, Conv, [640, 1, 1]], # 245
283
+ [-11, 1, Conv, [512, 1, 1]],
284
+ [-12, 1, Conv, [512, 1, 1]],
285
+ [-1, 1, Conv, [256, 3, 1]],
286
+ [-1, 1, Conv, [256, 3, 1]],
287
+ [-1, 1, Conv, [256, 3, 1]],
288
+ [-1, 1, Conv, [256, 3, 1]],
289
+ [-1, 1, Conv, [256, 3, 1]],
290
+ [-1, 1, Conv, [256, 3, 1]],
291
+ [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292
+ [-1, 1, Conv, [640, 1, 1]], # 255
293
+ [[-1, -11], 1, Shortcut, [1]], # 256
294
+
295
+ [187, 1, Conv, [320, 3, 1]],
296
+ [210, 1, Conv, [640, 3, 1]],
297
+ [233, 1, Conv, [960, 3, 1]],
298
+ [256, 1, Conv, [1280, 3, 1]],
299
+
300
+ [186, 1, Conv, [320, 3, 1]],
301
+ [161, 1, Conv, [640, 3, 1]],
302
+ [136, 1, Conv, [960, 3, 1]],
303
+ [112, 1, Conv, [1280, 3, 1]],
304
+
305
+ [[257,258,259,260,261,262,263,264], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
306
+ ]
cfg/training/yolov7-tiny.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [10,13, 16,30, 33,23] # P3/8
9
+ - [30,61, 62,45, 59,119] # P4/16
10
+ - [116,90, 156,198, 373,326] # P5/32
11
+
12
+ # yolov7-tiny backbone
13
+ backbone:
14
+ # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
15
+ [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2
16
+
17
+ [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4
18
+
19
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
20
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
21
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
22
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
23
+ [[-1, -2, -3, -4], 1, Concat, [1]],
24
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7
25
+
26
+ [-1, 1, MP, []], # 8-P3/8
27
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
28
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
29
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
30
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
31
+ [[-1, -2, -3, -4], 1, Concat, [1]],
32
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14
33
+
34
+ [-1, 1, MP, []], # 15-P4/16
35
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
36
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
37
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
38
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
39
+ [[-1, -2, -3, -4], 1, Concat, [1]],
40
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21
41
+
42
+ [-1, 1, MP, []], # 22-P5/32
43
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
44
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
45
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
46
+ [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
47
+ [[-1, -2, -3, -4], 1, Concat, [1]],
48
+ [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28
49
+ ]
50
+
51
+ # yolov7-tiny head
52
+ head:
53
+ [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
54
+ [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
55
+ [-1, 1, SP, [5]],
56
+ [-2, 1, SP, [9]],
57
+ [-3, 1, SP, [13]],
58
+ [[-1, -2, -3, -4], 1, Concat, [1]],
59
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
60
+ [[-1, -7], 1, Concat, [1]],
61
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37
62
+
63
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
64
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
65
+ [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
66
+ [[-1, -2], 1, Concat, [1]],
67
+
68
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
69
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
70
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
71
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
72
+ [[-1, -2, -3, -4], 1, Concat, [1]],
73
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47
74
+
75
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
76
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
77
+ [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
78
+ [[-1, -2], 1, Concat, [1]],
79
+
80
+ [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
81
+ [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
82
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
83
+ [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
84
+ [[-1, -2, -3, -4], 1, Concat, [1]],
85
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57
86
+
87
+ [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
88
+ [[-1, 47], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
91
+ [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
92
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
93
+ [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
94
+ [[-1, -2, -3, -4], 1, Concat, [1]],
95
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65
96
+
97
+ [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
98
+ [[-1, 37], 1, Concat, [1]],
99
+
100
+ [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101
+ [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103
+ [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104
+ [[-1, -2, -3, -4], 1, Concat, [1]],
105
+ [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73
106
+
107
+ [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108
+ [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109
+ [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110
+
111
+ [[74,75,76], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
112
+ ]
cfg/training/yolov7-w6.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [ 19,27, 44,40, 38,94 ] # P3/8
9
+ - [ 96,68, 86,152, 180,137 ] # P4/16
10
+ - [ 140,301, 303,264, 238,542 ] # P5/32
11
+ - [ 436,615, 739,380, 925,792 ] # P6/64
12
+
13
+ # yolov7 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [[-1, 1, ReOrg, []], # 0
17
+ [-1, 1, Conv, [64, 3, 1]], # 1-P1/2
18
+
19
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
20
+ [-1, 1, Conv, [64, 1, 1]],
21
+ [-2, 1, Conv, [64, 1, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [[-1, -3, -5, -6], 1, Concat, [1]],
27
+ [-1, 1, Conv, [128, 1, 1]], # 10
28
+
29
+ [-1, 1, Conv, [256, 3, 2]], # 11-P3/8
30
+ [-1, 1, Conv, [128, 1, 1]],
31
+ [-2, 1, Conv, [128, 1, 1]],
32
+ [-1, 1, Conv, [128, 3, 1]],
33
+ [-1, 1, Conv, [128, 3, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [[-1, -3, -5, -6], 1, Concat, [1]],
37
+ [-1, 1, Conv, [256, 1, 1]], # 19
38
+
39
+ [-1, 1, Conv, [512, 3, 2]], # 20-P4/16
40
+ [-1, 1, Conv, [256, 1, 1]],
41
+ [-2, 1, Conv, [256, 1, 1]],
42
+ [-1, 1, Conv, [256, 3, 1]],
43
+ [-1, 1, Conv, [256, 3, 1]],
44
+ [-1, 1, Conv, [256, 3, 1]],
45
+ [-1, 1, Conv, [256, 3, 1]],
46
+ [[-1, -3, -5, -6], 1, Concat, [1]],
47
+ [-1, 1, Conv, [512, 1, 1]], # 28
48
+
49
+ [-1, 1, Conv, [768, 3, 2]], # 29-P5/32
50
+ [-1, 1, Conv, [384, 1, 1]],
51
+ [-2, 1, Conv, [384, 1, 1]],
52
+ [-1, 1, Conv, [384, 3, 1]],
53
+ [-1, 1, Conv, [384, 3, 1]],
54
+ [-1, 1, Conv, [384, 3, 1]],
55
+ [-1, 1, Conv, [384, 3, 1]],
56
+ [[-1, -3, -5, -6], 1, Concat, [1]],
57
+ [-1, 1, Conv, [768, 1, 1]], # 37
58
+
59
+ [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64
60
+ [-1, 1, Conv, [512, 1, 1]],
61
+ [-2, 1, Conv, [512, 1, 1]],
62
+ [-1, 1, Conv, [512, 3, 1]],
63
+ [-1, 1, Conv, [512, 3, 1]],
64
+ [-1, 1, Conv, [512, 3, 1]],
65
+ [-1, 1, Conv, [512, 3, 1]],
66
+ [[-1, -3, -5, -6], 1, Concat, [1]],
67
+ [-1, 1, Conv, [1024, 1, 1]], # 46
68
+ ]
69
+
70
+ # yolov7 head
71
+ head:
72
+ [[-1, 1, SPPCSPC, [512]], # 47
73
+
74
+ [-1, 1, Conv, [384, 1, 1]],
75
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
76
+ [37, 1, Conv, [384, 1, 1]], # route backbone P5
77
+ [[-1, -2], 1, Concat, [1]],
78
+
79
+ [-1, 1, Conv, [384, 1, 1]],
80
+ [-2, 1, Conv, [384, 1, 1]],
81
+ [-1, 1, Conv, [192, 3, 1]],
82
+ [-1, 1, Conv, [192, 3, 1]],
83
+ [-1, 1, Conv, [192, 3, 1]],
84
+ [-1, 1, Conv, [192, 3, 1]],
85
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
86
+ [-1, 1, Conv, [384, 1, 1]], # 59
87
+
88
+ [-1, 1, Conv, [256, 1, 1]],
89
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
90
+ [28, 1, Conv, [256, 1, 1]], # route backbone P4
91
+ [[-1, -2], 1, Concat, [1]],
92
+
93
+ [-1, 1, Conv, [256, 1, 1]],
94
+ [-2, 1, Conv, [256, 1, 1]],
95
+ [-1, 1, Conv, [128, 3, 1]],
96
+ [-1, 1, Conv, [128, 3, 1]],
97
+ [-1, 1, Conv, [128, 3, 1]],
98
+ [-1, 1, Conv, [128, 3, 1]],
99
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100
+ [-1, 1, Conv, [256, 1, 1]], # 71
101
+
102
+ [-1, 1, Conv, [128, 1, 1]],
103
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104
+ [19, 1, Conv, [128, 1, 1]], # route backbone P3
105
+ [[-1, -2], 1, Concat, [1]],
106
+
107
+ [-1, 1, Conv, [128, 1, 1]],
108
+ [-2, 1, Conv, [128, 1, 1]],
109
+ [-1, 1, Conv, [64, 3, 1]],
110
+ [-1, 1, Conv, [64, 3, 1]],
111
+ [-1, 1, Conv, [64, 3, 1]],
112
+ [-1, 1, Conv, [64, 3, 1]],
113
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114
+ [-1, 1, Conv, [128, 1, 1]], # 83
115
+
116
+ [-1, 1, Conv, [256, 3, 2]],
117
+ [[-1, 71], 1, Concat, [1]], # cat
118
+
119
+ [-1, 1, Conv, [256, 1, 1]],
120
+ [-2, 1, Conv, [256, 1, 1]],
121
+ [-1, 1, Conv, [128, 3, 1]],
122
+ [-1, 1, Conv, [128, 3, 1]],
123
+ [-1, 1, Conv, [128, 3, 1]],
124
+ [-1, 1, Conv, [128, 3, 1]],
125
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126
+ [-1, 1, Conv, [256, 1, 1]], # 93
127
+
128
+ [-1, 1, Conv, [384, 3, 2]],
129
+ [[-1, 59], 1, Concat, [1]], # cat
130
+
131
+ [-1, 1, Conv, [384, 1, 1]],
132
+ [-2, 1, Conv, [384, 1, 1]],
133
+ [-1, 1, Conv, [192, 3, 1]],
134
+ [-1, 1, Conv, [192, 3, 1]],
135
+ [-1, 1, Conv, [192, 3, 1]],
136
+ [-1, 1, Conv, [192, 3, 1]],
137
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138
+ [-1, 1, Conv, [384, 1, 1]], # 103
139
+
140
+ [-1, 1, Conv, [512, 3, 2]],
141
+ [[-1, 47], 1, Concat, [1]], # cat
142
+
143
+ [-1, 1, Conv, [512, 1, 1]],
144
+ [-2, 1, Conv, [512, 1, 1]],
145
+ [-1, 1, Conv, [256, 3, 1]],
146
+ [-1, 1, Conv, [256, 3, 1]],
147
+ [-1, 1, Conv, [256, 3, 1]],
148
+ [-1, 1, Conv, [256, 3, 1]],
149
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150
+ [-1, 1, Conv, [512, 1, 1]], # 113
151
+
152
+ [83, 1, Conv, [256, 3, 1]],
153
+ [93, 1, Conv, [512, 3, 1]],
154
+ [103, 1, Conv, [768, 3, 1]],
155
+ [113, 1, Conv, [1024, 3, 1]],
156
+
157
+ [83, 1, Conv, [320, 3, 1]],
158
+ [71, 1, Conv, [640, 3, 1]],
159
+ [59, 1, Conv, [960, 3, 1]],
160
+ [47, 1, Conv, [1280, 3, 1]],
161
+
162
+ [[114,115,116,117,118,119,120,121], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6)
163
+ ]
cfg/training/yolov7.yaml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [32, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [64, 3, 1]],
19
+
20
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [[-1, -3, -5, -6], 1, Concat, [1]],
28
+ [-1, 1, Conv, [256, 1, 1]], # 11
29
+
30
+ [-1, 1, MP, []],
31
+ [-1, 1, Conv, [128, 1, 1]],
32
+ [-3, 1, Conv, [128, 1, 1]],
33
+ [-1, 1, Conv, [128, 3, 2]],
34
+ [[-1, -3], 1, Concat, [1]], # 16-P3/8
35
+ [-1, 1, Conv, [128, 1, 1]],
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [-1, 1, Conv, [128, 3, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [[-1, -3, -5, -6], 1, Concat, [1]],
42
+ [-1, 1, Conv, [512, 1, 1]], # 24
43
+
44
+ [-1, 1, MP, []],
45
+ [-1, 1, Conv, [256, 1, 1]],
46
+ [-3, 1, Conv, [256, 1, 1]],
47
+ [-1, 1, Conv, [256, 3, 2]],
48
+ [[-1, -3], 1, Concat, [1]], # 29-P4/16
49
+ [-1, 1, Conv, [256, 1, 1]],
50
+ [-2, 1, Conv, [256, 1, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [-1, 1, Conv, [256, 3, 1]],
53
+ [-1, 1, Conv, [256, 3, 1]],
54
+ [-1, 1, Conv, [256, 3, 1]],
55
+ [[-1, -3, -5, -6], 1, Concat, [1]],
56
+ [-1, 1, Conv, [1024, 1, 1]], # 37
57
+
58
+ [-1, 1, MP, []],
59
+ [-1, 1, Conv, [512, 1, 1]],
60
+ [-3, 1, Conv, [512, 1, 1]],
61
+ [-1, 1, Conv, [512, 3, 2]],
62
+ [[-1, -3], 1, Concat, [1]], # 42-P5/32
63
+ [-1, 1, Conv, [256, 1, 1]],
64
+ [-2, 1, Conv, [256, 1, 1]],
65
+ [-1, 1, Conv, [256, 3, 1]],
66
+ [-1, 1, Conv, [256, 3, 1]],
67
+ [-1, 1, Conv, [256, 3, 1]],
68
+ [-1, 1, Conv, [256, 3, 1]],
69
+ [[-1, -3, -5, -6], 1, Concat, [1]],
70
+ [-1, 1, Conv, [1024, 1, 1]], # 50
71
+ ]
72
+
73
+ # yolov7 head
74
+ head:
75
+ [[-1, 1, SPPCSPC, [512]], # 51
76
+
77
+ [-1, 1, Conv, [256, 1, 1]],
78
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
79
+ [37, 1, Conv, [256, 1, 1]], # route backbone P4
80
+ [[-1, -2], 1, Concat, [1]],
81
+
82
+ [-1, 1, Conv, [256, 1, 1]],
83
+ [-2, 1, Conv, [256, 1, 1]],
84
+ [-1, 1, Conv, [128, 3, 1]],
85
+ [-1, 1, Conv, [128, 3, 1]],
86
+ [-1, 1, Conv, [128, 3, 1]],
87
+ [-1, 1, Conv, [128, 3, 1]],
88
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
89
+ [-1, 1, Conv, [256, 1, 1]], # 63
90
+
91
+ [-1, 1, Conv, [128, 1, 1]],
92
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
93
+ [24, 1, Conv, [128, 1, 1]], # route backbone P3
94
+ [[-1, -2], 1, Concat, [1]],
95
+
96
+ [-1, 1, Conv, [128, 1, 1]],
97
+ [-2, 1, Conv, [128, 1, 1]],
98
+ [-1, 1, Conv, [64, 3, 1]],
99
+ [-1, 1, Conv, [64, 3, 1]],
100
+ [-1, 1, Conv, [64, 3, 1]],
101
+ [-1, 1, Conv, [64, 3, 1]],
102
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103
+ [-1, 1, Conv, [128, 1, 1]], # 75
104
+
105
+ [-1, 1, MP, []],
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-3, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 2]],
109
+ [[-1, -3, 63], 1, Concat, [1]],
110
+
111
+ [-1, 1, Conv, [256, 1, 1]],
112
+ [-2, 1, Conv, [256, 1, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [-1, 1, Conv, [128, 3, 1]],
115
+ [-1, 1, Conv, [128, 3, 1]],
116
+ [-1, 1, Conv, [128, 3, 1]],
117
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118
+ [-1, 1, Conv, [256, 1, 1]], # 88
119
+
120
+ [-1, 1, MP, []],
121
+ [-1, 1, Conv, [256, 1, 1]],
122
+ [-3, 1, Conv, [256, 1, 1]],
123
+ [-1, 1, Conv, [256, 3, 2]],
124
+ [[-1, -3, 51], 1, Concat, [1]],
125
+
126
+ [-1, 1, Conv, [512, 1, 1]],
127
+ [-2, 1, Conv, [512, 1, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [-1, 1, Conv, [256, 3, 1]],
132
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133
+ [-1, 1, Conv, [512, 1, 1]], # 101
134
+
135
+ [75, 1, RepConv, [256, 3, 1]],
136
+ [88, 1, RepConv, [512, 3, 1]],
137
+ [101, 1, RepConv, [1024, 3, 1]],
138
+
139
+ [[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
140
+ ]
cfg/training/yolov7x.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 80 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [12,16, 19,36, 40,28] # P3/8
9
+ - [36,75, 76,55, 72,146] # P4/16
10
+ - [142,110, 192,243, 459,401] # P5/32
11
+
12
+ # yolov7 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, Conv, [40, 3, 1]], # 0
16
+
17
+ [-1, 1, Conv, [80, 3, 2]], # 1-P1/2
18
+ [-1, 1, Conv, [80, 3, 1]],
19
+
20
+ [-1, 1, Conv, [160, 3, 2]], # 3-P2/4
21
+ [-1, 1, Conv, [64, 1, 1]],
22
+ [-2, 1, Conv, [64, 1, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [-1, 1, Conv, [64, 3, 1]],
25
+ [-1, 1, Conv, [64, 3, 1]],
26
+ [-1, 1, Conv, [64, 3, 1]],
27
+ [-1, 1, Conv, [64, 3, 1]],
28
+ [-1, 1, Conv, [64, 3, 1]],
29
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
30
+ [-1, 1, Conv, [320, 1, 1]], # 13
31
+
32
+ [-1, 1, MP, []],
33
+ [-1, 1, Conv, [160, 1, 1]],
34
+ [-3, 1, Conv, [160, 1, 1]],
35
+ [-1, 1, Conv, [160, 3, 2]],
36
+ [[-1, -3], 1, Concat, [1]], # 18-P3/8
37
+ [-1, 1, Conv, [128, 1, 1]],
38
+ [-2, 1, Conv, [128, 1, 1]],
39
+ [-1, 1, Conv, [128, 3, 1]],
40
+ [-1, 1, Conv, [128, 3, 1]],
41
+ [-1, 1, Conv, [128, 3, 1]],
42
+ [-1, 1, Conv, [128, 3, 1]],
43
+ [-1, 1, Conv, [128, 3, 1]],
44
+ [-1, 1, Conv, [128, 3, 1]],
45
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
46
+ [-1, 1, Conv, [640, 1, 1]], # 28
47
+
48
+ [-1, 1, MP, []],
49
+ [-1, 1, Conv, [320, 1, 1]],
50
+ [-3, 1, Conv, [320, 1, 1]],
51
+ [-1, 1, Conv, [320, 3, 2]],
52
+ [[-1, -3], 1, Concat, [1]], # 33-P4/16
53
+ [-1, 1, Conv, [256, 1, 1]],
54
+ [-2, 1, Conv, [256, 1, 1]],
55
+ [-1, 1, Conv, [256, 3, 1]],
56
+ [-1, 1, Conv, [256, 3, 1]],
57
+ [-1, 1, Conv, [256, 3, 1]],
58
+ [-1, 1, Conv, [256, 3, 1]],
59
+ [-1, 1, Conv, [256, 3, 1]],
60
+ [-1, 1, Conv, [256, 3, 1]],
61
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
62
+ [-1, 1, Conv, [1280, 1, 1]], # 43
63
+
64
+ [-1, 1, MP, []],
65
+ [-1, 1, Conv, [640, 1, 1]],
66
+ [-3, 1, Conv, [640, 1, 1]],
67
+ [-1, 1, Conv, [640, 3, 2]],
68
+ [[-1, -3], 1, Concat, [1]], # 48-P5/32
69
+ [-1, 1, Conv, [256, 1, 1]],
70
+ [-2, 1, Conv, [256, 1, 1]],
71
+ [-1, 1, Conv, [256, 3, 1]],
72
+ [-1, 1, Conv, [256, 3, 1]],
73
+ [-1, 1, Conv, [256, 3, 1]],
74
+ [-1, 1, Conv, [256, 3, 1]],
75
+ [-1, 1, Conv, [256, 3, 1]],
76
+ [-1, 1, Conv, [256, 3, 1]],
77
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
78
+ [-1, 1, Conv, [1280, 1, 1]], # 58
79
+ ]
80
+
81
+ # yolov7 head
82
+ head:
83
+ [[-1, 1, SPPCSPC, [640]], # 59
84
+
85
+ [-1, 1, Conv, [320, 1, 1]],
86
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
87
+ [43, 1, Conv, [320, 1, 1]], # route backbone P4
88
+ [[-1, -2], 1, Concat, [1]],
89
+
90
+ [-1, 1, Conv, [256, 1, 1]],
91
+ [-2, 1, Conv, [256, 1, 1]],
92
+ [-1, 1, Conv, [256, 3, 1]],
93
+ [-1, 1, Conv, [256, 3, 1]],
94
+ [-1, 1, Conv, [256, 3, 1]],
95
+ [-1, 1, Conv, [256, 3, 1]],
96
+ [-1, 1, Conv, [256, 3, 1]],
97
+ [-1, 1, Conv, [256, 3, 1]],
98
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
99
+ [-1, 1, Conv, [320, 1, 1]], # 73
100
+
101
+ [-1, 1, Conv, [160, 1, 1]],
102
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103
+ [28, 1, Conv, [160, 1, 1]], # route backbone P3
104
+ [[-1, -2], 1, Concat, [1]],
105
+
106
+ [-1, 1, Conv, [128, 1, 1]],
107
+ [-2, 1, Conv, [128, 1, 1]],
108
+ [-1, 1, Conv, [128, 3, 1]],
109
+ [-1, 1, Conv, [128, 3, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115
+ [-1, 1, Conv, [160, 1, 1]], # 87
116
+
117
+ [-1, 1, MP, []],
118
+ [-1, 1, Conv, [160, 1, 1]],
119
+ [-3, 1, Conv, [160, 1, 1]],
120
+ [-1, 1, Conv, [160, 3, 2]],
121
+ [[-1, -3, 73], 1, Concat, [1]],
122
+
123
+ [-1, 1, Conv, [256, 1, 1]],
124
+ [-2, 1, Conv, [256, 1, 1]],
125
+ [-1, 1, Conv, [256, 3, 1]],
126
+ [-1, 1, Conv, [256, 3, 1]],
127
+ [-1, 1, Conv, [256, 3, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [-1, 1, Conv, [256, 3, 1]],
130
+ [-1, 1, Conv, [256, 3, 1]],
131
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132
+ [-1, 1, Conv, [320, 1, 1]], # 102
133
+
134
+ [-1, 1, MP, []],
135
+ [-1, 1, Conv, [320, 1, 1]],
136
+ [-3, 1, Conv, [320, 1, 1]],
137
+ [-1, 1, Conv, [320, 3, 2]],
138
+ [[-1, -3, 59], 1, Concat, [1]],
139
+
140
+ [-1, 1, Conv, [512, 1, 1]],
141
+ [-2, 1, Conv, [512, 1, 1]],
142
+ [-1, 1, Conv, [512, 3, 1]],
143
+ [-1, 1, Conv, [512, 3, 1]],
144
+ [-1, 1, Conv, [512, 3, 1]],
145
+ [-1, 1, Conv, [512, 3, 1]],
146
+ [-1, 1, Conv, [512, 3, 1]],
147
+ [-1, 1, Conv, [512, 3, 1]],
148
+ [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149
+ [-1, 1, Conv, [640, 1, 1]], # 117
150
+
151
+ [87, 1, Conv, [320, 3, 1]],
152
+ [102, 1, Conv, [640, 3, 1]],
153
+ [117, 1, Conv, [1280, 3, 1]],
154
+
155
+ [[118,119,120], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
156
+ ]
data/coco.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # COCO 2017 dataset http://cocodataset.org
2
+
3
+ # download command/URL (optional)
4
+ download: bash ./scripts/get_coco.sh
5
+
6
+ # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
7
+ train: ./coco/train2017.txt # 118287 images
8
+ val: ./coco/val2017.txt # 5000 images
9
+ test: ./coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
10
+
11
+ # number of classes
12
+ nc: 80
13
+
14
+ # class names
15
+ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
16
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
17
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
18
+ 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
19
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
20
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
21
+ 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
22
+ 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
23
+ 'hair drier', 'toothbrush' ]
data/hyp.scratch.custom.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.5 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.0 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.0 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.p5.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.9 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.15 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.15 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.p6.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.2 # image translation (+/- fraction)
22
+ scale: 0.9 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.15 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.15 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
data/hyp.scratch.tiny.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.5 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 1.0 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.1 # image translation (+/- fraction)
22
+ scale: 0.5 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.05 # image mixup (probability)
29
+ copy_paste: 0.0 # image copy paste (probability)
30
+ paste_in: 0.05 # image copy paste (probability), use 0 for faster training
31
+ loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
deploy/triton-inference-server/README.md ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv7 on Triton Inference Server
2
+
3
+ Instructions to deploy YOLOv7 as TensorRT engine to [Triton Inference Server](https://github.com/NVIDIA/triton-inference-server).
4
+
5
+ Triton Inference Server takes care of model deployment with many out-of-the-box benefits, like a GRPC and HTTP interface, automatic scheduling on multiple GPUs, shared memory (even on GPU), dynamic server-side batching, health metrics and memory resource management.
6
+
7
+ There are no additional dependencies needed to run this deployment, except a working docker daemon with GPU support.
8
+
9
+ ## Export TensorRT
10
+
11
+ See https://github.com/WongKinYiu/yolov7#export for more info.
12
+
13
+ ```bash
14
+ #install onnx-simplifier not listed in general yolov7 requirements.txt
15
+ pip3 install onnx-simplifier
16
+
17
+ # Pytorch Yolov7 -> ONNX with grid, EfficientNMS plugin and dynamic batch size
18
+ python export.py --weights ./yolov7.pt --grid --end2end --dynamic-batch --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
19
+ # ONNX -> TensorRT with trtexec and docker
20
+ docker run -it --rm --gpus=all nvcr.io/nvidia/tensorrt:22.06-py3
21
+ # Copy onnx -> container: docker cp yolov7.onnx <container-id>:/workspace/
22
+ # Export with FP16 precision, min batch 1, opt batch 8 and max batch 8
23
+ ./tensorrt/bin/trtexec --onnx=yolov7.onnx --minShapes=images:1x3x640x640 --optShapes=images:8x3x640x640 --maxShapes=images:8x3x640x640 --fp16 --workspace=4096 --saveEngine=yolov7-fp16-1x8x8.engine --timingCacheFile=timing.cache
24
+ # Test engine
25
+ ./tensorrt/bin/trtexec --loadEngine=yolov7-fp16-1x8x8.engine
26
+ # Copy engine -> host: docker cp <container-id>:/workspace/yolov7-fp16-1x8x8.engine .
27
+ ```
28
+
29
+ Example output of test with RTX 3090.
30
+
31
+ ```
32
+ [I] === Performance summary ===
33
+ [I] Throughput: 73.4985 qps
34
+ [I] Latency: min = 14.8578 ms, max = 15.8344 ms, mean = 15.07 ms, median = 15.0422 ms, percentile(99%) = 15.7443 ms
35
+ [I] End-to-End Host Latency: min = 25.8715 ms, max = 28.4102 ms, mean = 26.672 ms, median = 26.6082 ms, percentile(99%) = 27.8314 ms
36
+ [I] Enqueue Time: min = 0.793701 ms, max = 1.47144 ms, mean = 1.2008 ms, median = 1.28644 ms, percentile(99%) = 1.38965 ms
37
+ [I] H2D Latency: min = 1.50073 ms, max = 1.52454 ms, mean = 1.51225 ms, median = 1.51404 ms, percentile(99%) = 1.51941 ms
38
+ [I] GPU Compute Time: min = 13.3386 ms, max = 14.3186 ms, mean = 13.5448 ms, median = 13.5178 ms, percentile(99%) = 14.2151 ms
39
+ [I] D2H Latency: min = 0.00878906 ms, max = 0.0172729 ms, mean = 0.0128844 ms, median = 0.0125732 ms, percentile(99%) = 0.0166016 ms
40
+ [I] Total Host Walltime: 3.04768 s
41
+ [I] Total GPU Compute Time: 3.03404 s
42
+ [I] Explanations of the performance metrics are printed in the verbose logs.
43
+ ```
44
+ Note: 73.5 qps x batch 8 = 588 fps @ ~15ms latency.
45
+
46
+ ## Model Repository
47
+
48
+ See [Triton Model Repository Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md#model-repository) for more info.
49
+
50
+ ```bash
51
+ # Create folder structure
52
+ mkdir -p triton-deploy/models/yolov7/1/
53
+ touch triton-deploy/models/yolov7/config.pbtxt
54
+ # Place model
55
+ mv yolov7-fp16-1x8x8.engine triton-deploy/models/yolov7/1/model.plan
56
+ ```
57
+
58
+ ## Model Configuration
59
+
60
+ See [Triton Model Configuration Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#model-configuration) for more info.
61
+
62
+ Minimal configuration for `triton-deploy/models/yolov7/config.pbtxt`:
63
+
64
+ ```
65
+ name: "yolov7"
66
+ platform: "tensorrt_plan"
67
+ max_batch_size: 8
68
+ dynamic_batching { }
69
+ ```
70
+
71
+ Example repository:
72
+
73
+ ```bash
74
+ $ tree triton-deploy/
75
+ triton-deploy/
76
+ └── models
77
+ └── yolov7
78
+ ├── 1
79
+ │   └── model.plan
80
+ └── config.pbtxt
81
+
82
+ 3 directories, 2 files
83
+ ```
84
+
85
+ ## Start Triton Inference Server
86
+
87
+ ```
88
+ docker run --gpus all --rm --ipc=host --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/triton-deploy/models:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models --strict-model-config=false --log-verbose 1
89
+ ```
90
+
91
+ In the log you should see:
92
+
93
+ ```
94
+ +--------+---------+--------+
95
+ | Model | Version | Status |
96
+ +--------+---------+--------+
97
+ | yolov7 | 1 | READY |
98
+ +--------+---------+--------+
99
+ ```
100
+
101
+ ## Performance with Model Analyzer
102
+
103
+ See [Triton Model Analyzer Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_analyzer.md#model-analyzer) for more info.
104
+
105
+ Performance numbers @ RTX 3090 + AMD Ryzen 9 5950X
106
+
107
+ Example test for 16 concurrent clients using shared memory, each with batch size 1 requests:
108
+
109
+ ```bash
110
+ docker run -it --ipc=host --net=host nvcr.io/nvidia/tritonserver:22.06-py3-sdk /bin/bash
111
+
112
+ ./install/bin/perf_analyzer -m yolov7 -u 127.0.0.1:8001 -i grpc --shared-memory system --concurrency-range 16
113
+
114
+ # Result (truncated)
115
+ Concurrency: 16, throughput: 590.119 infer/sec, latency 27080 usec
116
+ ```
117
+
118
+ Throughput for 16 clients with batch size 1 is the same as for a single thread running the engine at 16 batch size locally thanks to Triton [Dynamic Batching Strategy](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#dynamic-batcher). Result without dynamic batching (disable in model configuration) considerably worse:
119
+
120
+ ```bash
121
+ # Result (truncated)
122
+ Concurrency: 16, throughput: 335.587 infer/sec, latency 47616 usec
123
+ ```
124
+
125
+ ## How to run model in your code
126
+
127
+ Example client can be found in client.py. It can run dummy input, images and videos.
128
+
129
+ ```bash
130
+ pip3 install tritonclient[all] opencv-python
131
+ python3 client.py image data/dog.jpg
132
+ ```
133
+
134
+ ![exemplary output result](data/dog_result.jpg)
135
+
136
+ ```
137
+ $ python3 client.py --help
138
+ usage: client.py [-h] [-m MODEL] [--width WIDTH] [--height HEIGHT] [-u URL] [-o OUT] [-f FPS] [-i] [-v] [-t CLIENT_TIMEOUT] [-s] [-r ROOT_CERTIFICATES] [-p PRIVATE_KEY] [-x CERTIFICATE_CHAIN] {dummy,image,video} [input]
139
+
140
+ positional arguments:
141
+ {dummy,image,video} Run mode. 'dummy' will send an emtpy buffer to the server to test if inference works. 'image' will process an image. 'video' will process a video.
142
+ input Input file to load from in image or video mode
143
+
144
+ optional arguments:
145
+ -h, --help show this help message and exit
146
+ -m MODEL, --model MODEL
147
+ Inference model name, default yolov7
148
+ --width WIDTH Inference model input width, default 640
149
+ --height HEIGHT Inference model input height, default 640
150
+ -u URL, --url URL Inference server URL, default localhost:8001
151
+ -o OUT, --out OUT Write output into file instead of displaying it
152
+ -f FPS, --fps FPS Video output fps, default 24.0 FPS
153
+ -i, --model-info Print model status, configuration and statistics
154
+ -v, --verbose Enable verbose client output
155
+ -t CLIENT_TIMEOUT, --client-timeout CLIENT_TIMEOUT
156
+ Client timeout in seconds, default no timeout
157
+ -s, --ssl Enable SSL encrypted channel to the server
158
+ -r ROOT_CERTIFICATES, --root-certificates ROOT_CERTIFICATES
159
+ File holding PEM-encoded root certificates, default none
160
+ -p PRIVATE_KEY, --private-key PRIVATE_KEY
161
+ File holding PEM-encoded private key, default is none
162
+ -x CERTIFICATE_CHAIN, --certificate-chain CERTIFICATE_CHAIN
163
+ File holding PEM-encoded certicate chain default is none
164
+ ```
deploy/triton-inference-server/boundingbox.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BoundingBox:
2
+ def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height):
3
+ self.classID = classID
4
+ self.confidence = confidence
5
+ self.x1 = x1
6
+ self.x2 = x2
7
+ self.y1 = y1
8
+ self.y2 = y2
9
+ self.u1 = x1 / image_width
10
+ self.u2 = x2 / image_width
11
+ self.v1 = y1 / image_height
12
+ self.v2 = y2 / image_height
13
+
14
+ def box(self):
15
+ return (self.x1, self.y1, self.x2, self.y2)
16
+
17
+ def width(self):
18
+ return self.x2 - self.x1
19
+
20
+ def height(self):
21
+ return self.y2 - self.y1
22
+
23
+ def center_absolute(self):
24
+ return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2))
25
+
26
+ def center_normalized(self):
27
+ return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2))
28
+
29
+ def size_absolute(self):
30
+ return (self.x2 - self.x1, self.y2 - self.y1)
31
+
32
+ def size_normalized(self):
33
+ return (self.u2 - self.u1, self.v2 - self.v1)
deploy/triton-inference-server/client.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import argparse
4
+ import numpy as np
5
+ import sys
6
+ import cv2
7
+
8
+ import tritonclient.grpc as grpcclient
9
+ from tritonclient.utils import InferenceServerException
10
+
11
+ from processing import preprocess, postprocess
12
+ from render import render_box, render_filled_box, get_text_size, render_text, RAND_COLORS
13
+ from labels import COCOLabels
14
+
15
+ INPUT_NAMES = ["images"]
16
+ OUTPUT_NAMES = ["num_dets", "det_boxes", "det_scores", "det_classes"]
17
+
18
+ if __name__ == '__main__':
19
+ parser = argparse.ArgumentParser()
20
+ parser.add_argument('mode',
21
+ choices=['dummy', 'image', 'video'],
22
+ default='dummy',
23
+ help='Run mode. \'dummy\' will send an emtpy buffer to the server to test if inference works. \'image\' will process an image. \'video\' will process a video.')
24
+ parser.add_argument('input',
25
+ type=str,
26
+ nargs='?',
27
+ help='Input file to load from in image or video mode')
28
+ parser.add_argument('-m',
29
+ '--model',
30
+ type=str,
31
+ required=False,
32
+ default='yolov7',
33
+ help='Inference model name, default yolov7')
34
+ parser.add_argument('--width',
35
+ type=int,
36
+ required=False,
37
+ default=640,
38
+ help='Inference model input width, default 640')
39
+ parser.add_argument('--height',
40
+ type=int,
41
+ required=False,
42
+ default=640,
43
+ help='Inference model input height, default 640')
44
+ parser.add_argument('-u',
45
+ '--url',
46
+ type=str,
47
+ required=False,
48
+ default='localhost:8001',
49
+ help='Inference server URL, default localhost:8001')
50
+ parser.add_argument('-o',
51
+ '--out',
52
+ type=str,
53
+ required=False,
54
+ default='',
55
+ help='Write output into file instead of displaying it')
56
+ parser.add_argument('-f',
57
+ '--fps',
58
+ type=float,
59
+ required=False,
60
+ default=24.0,
61
+ help='Video output fps, default 24.0 FPS')
62
+ parser.add_argument('-i',
63
+ '--model-info',
64
+ action="store_true",
65
+ required=False,
66
+ default=False,
67
+ help='Print model status, configuration and statistics')
68
+ parser.add_argument('-v',
69
+ '--verbose',
70
+ action="store_true",
71
+ required=False,
72
+ default=False,
73
+ help='Enable verbose client output')
74
+ parser.add_argument('-t',
75
+ '--client-timeout',
76
+ type=float,
77
+ required=False,
78
+ default=None,
79
+ help='Client timeout in seconds, default no timeout')
80
+ parser.add_argument('-s',
81
+ '--ssl',
82
+ action="store_true",
83
+ required=False,
84
+ default=False,
85
+ help='Enable SSL encrypted channel to the server')
86
+ parser.add_argument('-r',
87
+ '--root-certificates',
88
+ type=str,
89
+ required=False,
90
+ default=None,
91
+ help='File holding PEM-encoded root certificates, default none')
92
+ parser.add_argument('-p',
93
+ '--private-key',
94
+ type=str,
95
+ required=False,
96
+ default=None,
97
+ help='File holding PEM-encoded private key, default is none')
98
+ parser.add_argument('-x',
99
+ '--certificate-chain',
100
+ type=str,
101
+ required=False,
102
+ default=None,
103
+ help='File holding PEM-encoded certicate chain default is none')
104
+
105
+ FLAGS = parser.parse_args()
106
+
107
+ # Create server context
108
+ try:
109
+ triton_client = grpcclient.InferenceServerClient(
110
+ url=FLAGS.url,
111
+ verbose=FLAGS.verbose,
112
+ ssl=FLAGS.ssl,
113
+ root_certificates=FLAGS.root_certificates,
114
+ private_key=FLAGS.private_key,
115
+ certificate_chain=FLAGS.certificate_chain)
116
+ except Exception as e:
117
+ print("context creation failed: " + str(e))
118
+ sys.exit()
119
+
120
+ # Health check
121
+ if not triton_client.is_server_live():
122
+ print("FAILED : is_server_live")
123
+ sys.exit(1)
124
+
125
+ if not triton_client.is_server_ready():
126
+ print("FAILED : is_server_ready")
127
+ sys.exit(1)
128
+
129
+ if not triton_client.is_model_ready(FLAGS.model):
130
+ print("FAILED : is_model_ready")
131
+ sys.exit(1)
132
+
133
+ if FLAGS.model_info:
134
+ # Model metadata
135
+ try:
136
+ metadata = triton_client.get_model_metadata(FLAGS.model)
137
+ print(metadata)
138
+ except InferenceServerException as ex:
139
+ if "Request for unknown model" not in ex.message():
140
+ print("FAILED : get_model_metadata")
141
+ print("Got: {}".format(ex.message()))
142
+ sys.exit(1)
143
+ else:
144
+ print("FAILED : get_model_metadata")
145
+ sys.exit(1)
146
+
147
+ # Model configuration
148
+ try:
149
+ config = triton_client.get_model_config(FLAGS.model)
150
+ if not (config.config.name == FLAGS.model):
151
+ print("FAILED: get_model_config")
152
+ sys.exit(1)
153
+ print(config)
154
+ except InferenceServerException as ex:
155
+ print("FAILED : get_model_config")
156
+ print("Got: {}".format(ex.message()))
157
+ sys.exit(1)
158
+
159
+ # DUMMY MODE
160
+ if FLAGS.mode == 'dummy':
161
+ print("Running in 'dummy' mode")
162
+ print("Creating emtpy buffer filled with ones...")
163
+ inputs = []
164
+ outputs = []
165
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
166
+ inputs[0].set_data_from_numpy(np.ones(shape=(1, 3, FLAGS.width, FLAGS.height), dtype=np.float32))
167
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
168
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
169
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
170
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
171
+
172
+ print("Invoking inference...")
173
+ results = triton_client.infer(model_name=FLAGS.model,
174
+ inputs=inputs,
175
+ outputs=outputs,
176
+ client_timeout=FLAGS.client_timeout)
177
+ if FLAGS.model_info:
178
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
179
+ if len(statistics.model_stats) != 1:
180
+ print("FAILED: get_inference_statistics")
181
+ sys.exit(1)
182
+ print(statistics)
183
+ print("Done")
184
+
185
+ for output in OUTPUT_NAMES:
186
+ result = results.as_numpy(output)
187
+ print(f"Received result buffer \"{output}\" of size {result.shape}")
188
+ print(f"Naive buffer sum: {np.sum(result)}")
189
+
190
+ # IMAGE MODE
191
+ if FLAGS.mode == 'image':
192
+ print("Running in 'image' mode")
193
+ if not FLAGS.input:
194
+ print("FAILED: no input image")
195
+ sys.exit(1)
196
+
197
+ inputs = []
198
+ outputs = []
199
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
200
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
201
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
202
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
203
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
204
+
205
+ print("Creating buffer from image file...")
206
+ input_image = cv2.imread(str(FLAGS.input))
207
+ if input_image is None:
208
+ print(f"FAILED: could not load input image {str(FLAGS.input)}")
209
+ sys.exit(1)
210
+ input_image_buffer = preprocess(input_image, [FLAGS.width, FLAGS.height])
211
+ input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
212
+
213
+ inputs[0].set_data_from_numpy(input_image_buffer)
214
+
215
+ print("Invoking inference...")
216
+ results = triton_client.infer(model_name=FLAGS.model,
217
+ inputs=inputs,
218
+ outputs=outputs,
219
+ client_timeout=FLAGS.client_timeout)
220
+ if FLAGS.model_info:
221
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
222
+ if len(statistics.model_stats) != 1:
223
+ print("FAILED: get_inference_statistics")
224
+ sys.exit(1)
225
+ print(statistics)
226
+ print("Done")
227
+
228
+ for output in OUTPUT_NAMES:
229
+ result = results.as_numpy(output)
230
+ print(f"Received result buffer \"{output}\" of size {result.shape}")
231
+ print(f"Naive buffer sum: {np.sum(result)}")
232
+
233
+ num_dets = results.as_numpy(OUTPUT_NAMES[0])
234
+ det_boxes = results.as_numpy(OUTPUT_NAMES[1])
235
+ det_scores = results.as_numpy(OUTPUT_NAMES[2])
236
+ det_classes = results.as_numpy(OUTPUT_NAMES[3])
237
+ detected_objects = postprocess(num_dets, det_boxes, det_scores, det_classes, input_image.shape[1], input_image.shape[0], [FLAGS.width, FLAGS.height])
238
+ print(f"Detected objects: {len(detected_objects)}")
239
+
240
+ for box in detected_objects:
241
+ print(f"{COCOLabels(box.classID).name}: {box.confidence}")
242
+ input_image = render_box(input_image, box.box(), color=tuple(RAND_COLORS[box.classID % 64].tolist()))
243
+ size = get_text_size(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", normalised_scaling=0.6)
244
+ input_image = render_filled_box(input_image, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), color=(220, 220, 220))
245
+ input_image = render_text(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", (box.x1, box.y1), color=(30, 30, 30), normalised_scaling=0.5)
246
+
247
+ if FLAGS.out:
248
+ cv2.imwrite(FLAGS.out, input_image)
249
+ print(f"Saved result to {FLAGS.out}")
250
+ else:
251
+ cv2.imshow('image', input_image)
252
+ cv2.waitKey(0)
253
+ cv2.destroyAllWindows()
254
+
255
+ # VIDEO MODE
256
+ if FLAGS.mode == 'video':
257
+ print("Running in 'video' mode")
258
+ if not FLAGS.input:
259
+ print("FAILED: no input video")
260
+ sys.exit(1)
261
+
262
+ inputs = []
263
+ outputs = []
264
+ inputs.append(grpcclient.InferInput(INPUT_NAMES[0], [1, 3, FLAGS.width, FLAGS.height], "FP32"))
265
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[0]))
266
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[1]))
267
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[2]))
268
+ outputs.append(grpcclient.InferRequestedOutput(OUTPUT_NAMES[3]))
269
+
270
+ print("Opening input video stream...")
271
+ cap = cv2.VideoCapture(FLAGS.input)
272
+ if not cap.isOpened():
273
+ print(f"FAILED: cannot open video {FLAGS.input}")
274
+ sys.exit(1)
275
+
276
+ counter = 0
277
+ out = None
278
+ print("Invoking inference...")
279
+ while True:
280
+ ret, frame = cap.read()
281
+ if not ret:
282
+ print("failed to fetch next frame")
283
+ break
284
+
285
+ if counter == 0 and FLAGS.out:
286
+ print("Opening output video stream...")
287
+ fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
288
+ out = cv2.VideoWriter(FLAGS.out, fourcc, FLAGS.fps, (frame.shape[1], frame.shape[0]))
289
+
290
+ input_image_buffer = preprocess(frame, [FLAGS.width, FLAGS.height])
291
+ input_image_buffer = np.expand_dims(input_image_buffer, axis=0)
292
+
293
+ inputs[0].set_data_from_numpy(input_image_buffer)
294
+
295
+ results = triton_client.infer(model_name=FLAGS.model,
296
+ inputs=inputs,
297
+ outputs=outputs,
298
+ client_timeout=FLAGS.client_timeout)
299
+
300
+ num_dets = results.as_numpy("num_dets")
301
+ det_boxes = results.as_numpy("det_boxes")
302
+ det_scores = results.as_numpy("det_scores")
303
+ det_classes = results.as_numpy("det_classes")
304
+ detected_objects = postprocess(num_dets, det_boxes, det_scores, det_classes, frame.shape[1], frame.shape[0], [FLAGS.width, FLAGS.height])
305
+ print(f"Frame {counter}: {len(detected_objects)} objects")
306
+ counter += 1
307
+
308
+ for box in detected_objects:
309
+ print(f"{COCOLabels(box.classID).name}: {box.confidence}")
310
+ frame = render_box(frame, box.box(), color=tuple(RAND_COLORS[box.classID % 64].tolist()))
311
+ size = get_text_size(frame, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", normalised_scaling=0.6)
312
+ frame = render_filled_box(frame, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), color=(220, 220, 220))
313
+ frame = render_text(frame, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", (box.x1, box.y1), color=(30, 30, 30), normalised_scaling=0.5)
314
+
315
+ if FLAGS.out:
316
+ out.write(frame)
317
+ else:
318
+ cv2.imshow('image', frame)
319
+ if cv2.waitKey(1) == ord('q'):
320
+ break
321
+
322
+ if FLAGS.model_info:
323
+ statistics = triton_client.get_inference_statistics(model_name=FLAGS.model)
324
+ if len(statistics.model_stats) != 1:
325
+ print("FAILED: get_inference_statistics")
326
+ sys.exit(1)
327
+ print(statistics)
328
+ print("Done")
329
+
330
+ cap.release()
331
+ if FLAGS.out:
332
+ out.release()
333
+ else:
334
+ cv2.destroyAllWindows()
deploy/triton-inference-server/labels.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ class COCOLabels(Enum):
4
+ PERSON = 0
5
+ BICYCLE = 1
6
+ CAR = 2
7
+ MOTORBIKE = 3
8
+ AEROPLANE = 4
9
+ BUS = 5
10
+ TRAIN = 6
11
+ TRUCK = 7
12
+ BOAT = 8
13
+ TRAFFIC_LIGHT = 9
14
+ FIRE_HYDRANT = 10
15
+ STOP_SIGN = 11
16
+ PARKING_METER = 12
17
+ BENCH = 13
18
+ BIRD = 14
19
+ CAT = 15
20
+ DOG = 16
21
+ HORSE = 17
22
+ SHEEP = 18
23
+ COW = 19
24
+ ELEPHANT = 20
25
+ BEAR = 21
26
+ ZEBRA = 22
27
+ GIRAFFE = 23
28
+ BACKPACK = 24
29
+ UMBRELLA = 25
30
+ HANDBAG = 26
31
+ TIE = 27
32
+ SUITCASE = 28
33
+ FRISBEE = 29
34
+ SKIS = 30
35
+ SNOWBOARD = 31
36
+ SPORTS_BALL = 32
37
+ KITE = 33
38
+ BASEBALL_BAT = 34
39
+ BASEBALL_GLOVE = 35
40
+ SKATEBOARD = 36
41
+ SURFBOARD = 37
42
+ TENNIS_RACKET = 38
43
+ BOTTLE = 39
44
+ WINE_GLASS = 40
45
+ CUP = 41
46
+ FORK = 42
47
+ KNIFE = 43
48
+ SPOON = 44
49
+ BOWL = 45
50
+ BANANA = 46
51
+ APPLE = 47
52
+ SANDWICH = 48
53
+ ORANGE = 49
54
+ BROCCOLI = 50
55
+ CARROT = 51
56
+ HOT_DOG = 52
57
+ PIZZA = 53
58
+ DONUT = 54
59
+ CAKE = 55
60
+ CHAIR = 56
61
+ SOFA = 57
62
+ POTTEDPLANT = 58
63
+ BED = 59
64
+ DININGTABLE = 60
65
+ TOILET = 61
66
+ TVMONITOR = 62
67
+ LAPTOP = 63
68
+ MOUSE = 64
69
+ REMOTE = 65
70
+ KEYBOARD = 66
71
+ CELL_PHONE = 67
72
+ MICROWAVE = 68
73
+ OVEN = 69
74
+ TOASTER = 70
75
+ SINK = 71
76
+ REFRIGERATOR = 72
77
+ BOOK = 73
78
+ CLOCK = 74
79
+ VASE = 75
80
+ SCISSORS = 76
81
+ TEDDY_BEAR = 77
82
+ HAIR_DRIER = 78
83
+ TOOTHBRUSH = 79
deploy/triton-inference-server/processing.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from boundingbox import BoundingBox
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+ def preprocess(img, input_shape, letter_box=True):
7
+ if letter_box:
8
+ img_h, img_w, _ = img.shape
9
+ new_h, new_w = input_shape[0], input_shape[1]
10
+ offset_h, offset_w = 0, 0
11
+ if (new_w / img_w) <= (new_h / img_h):
12
+ new_h = int(img_h * new_w / img_w)
13
+ offset_h = (input_shape[0] - new_h) // 2
14
+ else:
15
+ new_w = int(img_w * new_h / img_h)
16
+ offset_w = (input_shape[1] - new_w) // 2
17
+ resized = cv2.resize(img, (new_w, new_h))
18
+ img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8)
19
+ img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized
20
+ else:
21
+ img = cv2.resize(img, (input_shape[1], input_shape[0]))
22
+
23
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
24
+ img = img.transpose((2, 0, 1)).astype(np.float32)
25
+ img /= 255.0
26
+ return img
27
+
28
+ def postprocess(num_dets, det_boxes, det_scores, det_classes, img_w, img_h, input_shape, letter_box=True):
29
+ boxes = det_boxes[0, :num_dets[0][0]] / np.array([input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=np.float32)
30
+ scores = det_scores[0, :num_dets[0][0]]
31
+ classes = det_classes[0, :num_dets[0][0]].astype(np.int)
32
+
33
+ old_h, old_w = img_h, img_w
34
+ offset_h, offset_w = 0, 0
35
+ if letter_box:
36
+ if (img_w / input_shape[1]) >= (img_h / input_shape[0]):
37
+ old_h = int(input_shape[0] * img_w / input_shape[1])
38
+ offset_h = (old_h - img_h) // 2
39
+ else:
40
+ old_w = int(input_shape[1] * img_h / input_shape[0])
41
+ offset_w = (old_w - img_w) // 2
42
+
43
+ boxes = boxes * np.array([old_w, old_h, old_w, old_h], dtype=np.float32)
44
+ if letter_box:
45
+ boxes -= np.array([offset_w, offset_h, offset_w, offset_h], dtype=np.float32)
46
+ boxes = boxes.astype(np.int)
47
+
48
+ detected_objects = []
49
+ for box, score, label in zip(boxes, scores, classes):
50
+ detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], img_w, img_h))
51
+ return detected_objects
deploy/triton-inference-server/render.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ import cv2
4
+
5
+ from math import sqrt
6
+
7
+ _LINE_THICKNESS_SCALING = 500.0
8
+
9
+ np.random.seed(0)
10
+ RAND_COLORS = np.random.randint(50, 255, (64, 3), "int") # used for class visu
11
+ RAND_COLORS[0] = [220, 220, 220]
12
+
13
+ def render_box(img, box, color=(200, 200, 200)):
14
+ """
15
+ Render a box. Calculates scaling and thickness automatically.
16
+ :param img: image to render into
17
+ :param box: (x1, y1, x2, y2) - box coordinates
18
+ :param color: (b, g, r) - box color
19
+ :return: updated image
20
+ """
21
+ x1, y1, x2, y2 = box
22
+ thickness = int(
23
+ round(
24
+ (img.shape[0] * img.shape[1])
25
+ / (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING)
26
+ )
27
+ )
28
+ thickness = max(1, thickness)
29
+ img = cv2.rectangle(
30
+ img,
31
+ (int(x1), int(y1)),
32
+ (int(x2), int(y2)),
33
+ color,
34
+ thickness=thickness
35
+ )
36
+ return img
37
+
38
+ def render_filled_box(img, box, color=(200, 200, 200)):
39
+ """
40
+ Render a box. Calculates scaling and thickness automatically.
41
+ :param img: image to render into
42
+ :param box: (x1, y1, x2, y2) - box coordinates
43
+ :param color: (b, g, r) - box color
44
+ :return: updated image
45
+ """
46
+ x1, y1, x2, y2 = box
47
+ img = cv2.rectangle(
48
+ img,
49
+ (int(x1), int(y1)),
50
+ (int(x2), int(y2)),
51
+ color,
52
+ thickness=cv2.FILLED
53
+ )
54
+ return img
55
+
56
+ _TEXT_THICKNESS_SCALING = 700.0
57
+ _TEXT_SCALING = 520.0
58
+
59
+
60
+ def get_text_size(img, text, normalised_scaling=1.0):
61
+ """
62
+ Get calculated text size (as box width and height)
63
+ :param img: image reference, used to determine appropriate text scaling
64
+ :param text: text to display
65
+ :param normalised_scaling: additional normalised scaling. Default 1.0.
66
+ :return: (width, height) - width and height of text box
67
+ """
68
+ thickness = int(
69
+ round(
70
+ (img.shape[0] * img.shape[1])
71
+ / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
72
+ )
73
+ * normalised_scaling
74
+ )
75
+ thickness = max(1, thickness)
76
+ scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
77
+ return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0]
78
+
79
+
80
+ def render_text(img, text, pos, color=(200, 200, 200), normalised_scaling=1.0):
81
+ """
82
+ Render a text into the image. Calculates scaling and thickness automatically.
83
+ :param img: image to render into
84
+ :param text: text to display
85
+ :param pos: (x, y) - upper left coordinates of render position
86
+ :param color: (b, g, r) - text color
87
+ :param normalised_scaling: additional normalised scaling. Default 1.0.
88
+ :return: updated image
89
+ """
90
+ x, y = pos
91
+ thickness = int(
92
+ round(
93
+ (img.shape[0] * img.shape[1])
94
+ / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
95
+ )
96
+ * normalised_scaling
97
+ )
98
+ thickness = max(1, thickness)
99
+ scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
100
+ size = get_text_size(img, text, normalised_scaling)
101
+ cv2.putText(
102
+ img,
103
+ text,
104
+ (int(x), int(y + size[1])),
105
+ cv2.FONT_HERSHEY_SIMPLEX,
106
+ scaling,
107
+ color,
108
+ thickness=thickness,
109
+ )
110
+ return img
environment.yml ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: myominhtet
2
+ channels:
3
+ - conda-forge
4
+ - pytorch
5
+ - defaults
6
+ dependencies:
7
+ - _libgcc_mutex=0.1=main
8
+ - _openmp_mutex=5.1=1_gnu
9
+ - asttokens=3.0.0=py311h06a4308_0
10
+ - blas=1.0=mkl
11
+ - brotli-python=1.0.9=py311h6a678d5_9
12
+ - bzip2=1.0.8=h5eee18b_6
13
+ - ca-certificates=2025.2.25=h06a4308_0
14
+ - certifi=2025.1.31=py311h06a4308_0
15
+ - comm=0.2.1=py311h06a4308_0
16
+ - cpuonly=2.0=0
17
+ - debugpy=1.8.11=py311h6a678d5_0
18
+ - decorator=5.1.1=pyhd3eb1b0_0
19
+ - executing=0.8.3=pyhd3eb1b0_0
20
+ - filelock=3.13.1=py311h06a4308_0
21
+ - gmp=6.3.0=h6a678d5_0
22
+ - gmpy2=2.2.1=py311h5eee18b_0
23
+ - intel-openmp=2023.1.0=hdb19cb5_46306
24
+ - ipykernel=6.29.0=pyhd33586a_0
25
+ - ipython=8.30.0=py311h06a4308_0
26
+ - jedi=0.19.2=py311h06a4308_0
27
+ - jinja2=3.1.6=py311h06a4308_0
28
+ - jupyter_client=8.6.3=py311h06a4308_0
29
+ - jupyter_core=5.7.2=py311h06a4308_0
30
+ - ld_impl_linux-64=2.40=h12ee557_0
31
+ - libffi=3.4.4=h6a678d5_1
32
+ - libgcc-ng=11.2.0=h1234567_1
33
+ - libgomp=11.2.0=h1234567_1
34
+ - libllvm14=14.0.6=hecde1de_4
35
+ - libsodium=1.0.18=h7b6447c_0
36
+ - libstdcxx-ng=11.2.0=h1234567_1
37
+ - libuuid=1.41.5=h5eee18b_0
38
+ - llvm-openmp=14.0.6=h9e868ea_0
39
+ - llvmlite=0.41.0=py311he621ea3_0
40
+ - markupsafe=3.0.2=py311h5eee18b_0
41
+ - matplotlib-inline=0.1.6=py311h06a4308_0
42
+ - mkl=2023.1.0=h213fc3f_46344
43
+ - mkl-service=2.4.0=py311h5eee18b_2
44
+ - mkl_fft=1.3.11=py311h5eee18b_0
45
+ - mkl_random=1.2.8=py311ha02d727_0
46
+ - mpc=1.3.1=h5eee18b_0
47
+ - mpfr=4.2.1=h5eee18b_0
48
+ - mpmath=1.3.0=py311h06a4308_0
49
+ - ncurses=6.4=h6a678d5_0
50
+ - nest-asyncio=1.6.0=py311h06a4308_0
51
+ - networkx=3.4.2=py311h06a4308_0
52
+ - numba=0.58.1=py311ha02d727_0
53
+ - numpy=1.26.4=py311h08b1b3b_0
54
+ - numpy-base=1.26.4=py311hf175353_0
55
+ - openssl=3.0.16=h5eee18b_0
56
+ - packaging=24.2=py311h06a4308_0
57
+ - parso=0.8.4=py311h06a4308_0
58
+ - pexpect=4.8.0=pyhd3eb1b0_3
59
+ - pip=25.0=py311h06a4308_0
60
+ - platformdirs=3.10.0=py311h06a4308_0
61
+ - prompt-toolkit=3.0.43=py311h06a4308_0
62
+ - prompt_toolkit=3.0.43=hd3eb1b0_0
63
+ - psutil=5.9.0=py311h5eee18b_1
64
+ - ptyprocess=0.7.0=pyhd3eb1b0_2
65
+ - pure_eval=0.2.2=pyhd3eb1b0_0
66
+ - pygments=2.15.1=py311h06a4308_1
67
+ - pysocks=1.7.1=py311h06a4308_0
68
+ - python=3.11.11=he870216_0
69
+ - python-dateutil=2.9.0post0=py311h06a4308_2
70
+ - pytorch-mutex=1.0=cpu
71
+ - pyyaml=6.0.2=py311h5eee18b_0
72
+ - pyzmq=26.2.0=py311h6a678d5_0
73
+ - readline=8.2=h5eee18b_0
74
+ - setuptools=75.8.0=py311h06a4308_0
75
+ - six=1.16.0=pyhd3eb1b0_1
76
+ - sqlite=3.45.3=h5eee18b_0
77
+ - stack_data=0.2.0=pyhd3eb1b0_0
78
+ - tbb=2021.8.0=hdb19cb5_0
79
+ - tk=8.6.14=h39e8969_0
80
+ - torchaudio=2.1.2=py311_cpu
81
+ - tornado=6.4.2=py311h5eee18b_0
82
+ - traitlets=5.14.3=py311h06a4308_0
83
+ - typing_extensions=4.12.2=py311h06a4308_0
84
+ - urllib3=2.3.0=py311h06a4308_0
85
+ - wcwidth=0.2.5=pyhd3eb1b0_0
86
+ - wheel=0.45.1=py311h06a4308_0
87
+ - xz=5.6.4=h5eee18b_1
88
+ - yaml=0.2.5=h7b6447c_0
89
+ - zeromq=4.3.5=h6a678d5_0
90
+ - zlib=1.2.13=h5eee18b_1
91
+ - pip:
92
+ - absl-py==2.1.0
93
+ - accelerate==0.28.0
94
+ - aiofiles==23.2.1
95
+ - aiohttp==3.9.3
96
+ - aiosignal==1.3.1
97
+ - albumentations==1.4.0
98
+ - alembic==1.13.1
99
+ - amqp==5.2.0
100
+ - annotated-types==0.6.0
101
+ - antlr4-python3-runtime==4.9.3
102
+ - anyio==4.3.0
103
+ - appdirs==1.4.4
104
+ - attrs==23.2.0
105
+ - basemap==1.4.0
106
+ - basemap-data==1.3.2
107
+ - bcrypt==4.1.3
108
+ - beautifulsoup4==4.12.3
109
+ - black==24.3.0
110
+ - blinker==1.7.0
111
+ - blis==0.7.11
112
+ - bs4==0.0.2
113
+ - cachetools==5.3.3
114
+ - catalogue==2.0.10
115
+ - catboost==1.2.2
116
+ - charset-normalizer==3.4.1
117
+ - click==8.1.7
118
+ - click-plugins==1.1.1
119
+ - cligj==0.7.2
120
+ - cloudpathlib==0.18.1
121
+ - cloudpickle==3.0.0
122
+ - colorama==0.4.6
123
+ - coloredlogs==15.0.1
124
+ - colorlog==6.8.2
125
+ - confection==0.1.5
126
+ - configparser==6.0.1
127
+ - contourpy==1.2.0
128
+ - crayons==0.4.0
129
+ - croniter==2.0.5
130
+ - cssselect==1.2.0
131
+ - cycler==0.12.1
132
+ - cymem==2.0.8
133
+ - dataclasses-json==0.6.4
134
+ - datasets==2.18.0
135
+ - dateparser==1.2.0
136
+ - debtcollector==3.0.0
137
+ - demjson3==3.0.6
138
+ - detectron2==0.6
139
+ - dill==0.3.8
140
+ - distlib==0.3.8
141
+ - distro==1.9.0
142
+ - dnspython==2.6.1
143
+ - dogpile-cache==1.3.3
144
+ - efficientnet-pytorch==0.7.1
145
+ - emoji==2.11.1
146
+ - en-core-web-sm==3.7.1
147
+ - et-xmlfile==1.1.0
148
+ - evdev==1.7.0
149
+ - eventlet==0.36.1
150
+ - facebook-page-scraper==5.0.2
151
+ - facebook-scraper==0.2.59
152
+ - facebook-sdk==3.1.0
153
+ - fake-useragent==1.4.0
154
+ - fastapi==0.115.4
155
+ - fasteners==0.19
156
+ - fastjsonschema==2.19.1
157
+ - fasttext==0.9.2
158
+ - ffmpeg-python==0.2.0
159
+ - ffmpy==0.4.0
160
+ - fiona==1.9.5
161
+ - fire==0.6.0
162
+ - flatbuffers==24.3.25
163
+ - fonttools==4.47.2
164
+ - frozenlist==1.4.1
165
+ - fsspec==2023.12.2
166
+ - future==1.0.0
167
+ - futurist==3.0.0
168
+ - fvcore==0.1.5.post20221221
169
+ - gdown==5.2.0
170
+ - gensim==4.3.2
171
+ - geographiclib==2.0
172
+ - geopandas==0.14.3
173
+ - geopy==2.4.1
174
+ - gkeepapi==0.16.0
175
+ - gmplot==1.4.1
176
+ - google-api-core==2.22.0
177
+ - google-auth==2.35.0
178
+ - google-auth-oauthlib==1.2.1
179
+ - google-cloud-vision==3.8.0
180
+ - google-images-download==2.8.0
181
+ - googleapis-common-protos==1.65.0
182
+ - gpsoauth==1.1.1
183
+ - gradio==4.44.0
184
+ - gradio-client==1.3.0
185
+ - greenlet==3.0.3
186
+ - grpcio==1.67.1
187
+ - grpcio-status==1.67.1
188
+ - h11==0.14.0
189
+ - h2==4.1.0
190
+ - h5py==3.10.0
191
+ - hpack==4.0.0
192
+ - httpcore==1.0.5
193
+ - httptools==0.6.1
194
+ - httpx==0.27.0
195
+ - huggingface-hub==0.24.7
196
+ - humanfriendly==10.0
197
+ - hydra-core==1.3.2
198
+ - hyperframe==6.0.1
199
+ - idna==3.10
200
+ - imagecodecs==2024.12.30
201
+ - imageio==2.34.0
202
+ - imbalanced-learn==0.12.2
203
+ - imblearn==0.0
204
+ - imgviz==1.7.5
205
+ - importlib-resources==6.4.5
206
+ - iopath==0.1.9
207
+ - iso8601==2.1.0
208
+ - jax==0.5.2
209
+ - jaxlib==0.5.1
210
+ - joblib==1.3.2
211
+ - jsonpatch==1.33
212
+ - jsonpointer==2.4
213
+ - jsonschema==4.21.1
214
+ - jsonschema-specifications==2023.12.1
215
+ - kaitaistruct==0.10
216
+ - keyboard==0.13.5
217
+ - keystoneauth1==5.6.0
218
+ - keystonemiddleware==10.7.0
219
+ - kiwisolver==1.4.5
220
+ - kombu==5.3.7
221
+ - labelme==5.5.0
222
+ - labelme2coco==0.2.6
223
+ - langchain==0.1.12
224
+ - langchain-community==0.0.28
225
+ - langchain-core==0.1.32
226
+ - langchain-text-splitters==0.0.1
227
+ - langcodes==3.4.0
228
+ - langdetect==1.0.9
229
+ - langsmith==0.1.27
230
+ - language-data==1.2.0
231
+ - lazy-loader==0.3
232
+ - lightgbm==4.2.0
233
+ - logutils==0.3.5
234
+ - lxml==5.1.0
235
+ - mako==1.3.5
236
+ - marisa-trie==1.2.0
237
+ - markdown==3.6
238
+ - markdown-it-py==3.0.0
239
+ - marshmallow==3.21.1
240
+ - matplotlib==3.8.2
241
+ - mdurl==0.1.2
242
+ - mediapipe==0.10.21
243
+ - mistral==18.0.1
244
+ - mistral-lib==3.0.0
245
+ - mistralai==0.4.1
246
+ - ml-dtypes==0.5.1
247
+ - msgpack==1.0.8
248
+ - multidict==6.0.5
249
+ - multiprocess==0.70.16
250
+ - munch==4.0.0
251
+ - murmurhash==1.0.10
252
+ - myanmartools==1.2.1
253
+ - mypy-extensions==1.0.0
254
+ - natsort==8.4.0
255
+ - nbformat==5.9.2
256
+ - netaddr==1.3.0
257
+ - netifaces==0.11.0
258
+ - nltk==3.8.1
259
+ - nvidia-cublas-cu12==12.4.5.8
260
+ - nvidia-cuda-cupti-cu12==12.4.127
261
+ - nvidia-cuda-nvrtc-cu12==12.4.127
262
+ - nvidia-cuda-runtime-cu12==12.4.127
263
+ - nvidia-cudnn-cu12==9.1.0.70
264
+ - nvidia-cufft-cu12==11.2.1.3
265
+ - nvidia-curand-cu12==10.3.5.147
266
+ - nvidia-cusolver-cu12==11.6.1.9
267
+ - nvidia-cusparse-cu12==12.3.1.170
268
+ - nvidia-cusparselt-cu12==0.6.2
269
+ - nvidia-nccl-cu12==2.21.5
270
+ - nvidia-nvjitlink-cu12==12.4.127
271
+ - nvidia-nvtx-cu12==12.4.127
272
+ - oauthlib==3.2.2
273
+ - omegaconf==2.3.0
274
+ - onnxruntime==1.19.0
275
+ - openai==0.28.0
276
+ - opencv-contrib-python==4.11.0.86
277
+ - opencv-python==4.9.0.80
278
+ - openpyxl==3.1.2
279
+ - opt-einsum==3.4.0
280
+ - optuna==3.6.1
281
+ - orjson==3.9.15
282
+ - os-service-types==1.7.0
283
+ - oslo-cache==3.7.0
284
+ - oslo-concurrency==6.0.0
285
+ - oslo-config==9.4.0
286
+ - oslo-context==5.5.0
287
+ - oslo-db==15.1.0
288
+ - oslo-i18n==6.3.0
289
+ - oslo-log==6.0.0
290
+ - oslo-messaging==14.8.0
291
+ - oslo-metrics==0.8.0
292
+ - oslo-middleware==6.1.0
293
+ - oslo-policy==4.3.0
294
+ - oslo-serialization==5.4.0
295
+ - oslo-service==3.5.0
296
+ - oslo-utils==7.1.0
297
+ - osprofiler==4.1.0
298
+ - outcome==1.3.0.post0
299
+ - pandas==2.2.0
300
+ - paramiko==3.4.0
301
+ - parse==1.20.1
302
+ - paste==3.10.1
303
+ - pastedeploy==3.1.0
304
+ - pathspec==0.12.1
305
+ - pbr==6.0.0
306
+ - pdf2image==1.17.0
307
+ - pecan==1.5.1
308
+ - pillow==11.1.0
309
+ - plotly==5.18.0
310
+ - pluggy==0.3.1
311
+ - plum-dispatch==1.7.4
312
+ - ply==3.11
313
+ - polars==0.20.23
314
+ - portalocker==2.8.2
315
+ - preshed==3.0.9
316
+ - pretrainedmodels==0.7.4
317
+ - prettytable==3.10.0
318
+ - prometheus-client==0.20.0
319
+ - proto-plus==1.25.0
320
+ - protobuf==4.25.6
321
+ - py==1.11.0
322
+ - pyarrow==15.0.1
323
+ - pyarrow-hotfix==0.6
324
+ - pyasn1==0.5.1
325
+ - pyasn1-modules==0.4.1
326
+ - pybboxes==0.1.6
327
+ - pybind11==2.12.0
328
+ - pycadf==3.1.1
329
+ - pycocotools==2.0.7
330
+ - pycryptodomex==3.21.0
331
+ - pydantic==2.6.3
332
+ - pydantic-core==2.16.3
333
+ - pydub==0.25.1
334
+ - pyee==8.2.2
335
+ - pyheif==0.7.1
336
+ - pyicu==2.13.1
337
+ - pyidaungsu==0.1.4
338
+ - pyjwt==2.8.0
339
+ - pymupdf==1.24.5
340
+ - pymupdfb==1.24.3
341
+ - pynacl==1.5.0
342
+ - pynput==1.7.6
343
+ - pyparsing==3.1.1
344
+ - pypdf==4.1.0
345
+ - pypdf2==3.0.1
346
+ - pyppeteer==1.0.2
347
+ - pyproj==3.6.1
348
+ - pyqt5==5.15.11
349
+ - pyqt5-qt5==5.15.14
350
+ - pyqt5-sip==12.15.0
351
+ - pyquery==2.0.0
352
+ - pyshark==0.6
353
+ - pyshp==2.3.1
354
+ - pytesseract==0.3.10
355
+ - python-crfsuite==0.9.10
356
+ - python-docx==1.1.2
357
+ - python-dotenv==1.0.1
358
+ - python-graphviz==0.20.1
359
+ - python-keystoneclient==5.4.0
360
+ - python-multipart==0.0.12
361
+ - python-xlib==0.33
362
+ - pytz==2023.3.post1
363
+ - qtpy==2.4.1
364
+ - qudida==0.0.4
365
+ - rabbit==1.2.0
366
+ - referencing==0.33.0
367
+ - regex==2023.12.25
368
+ - repoze-lru==0.7
369
+ - requests==2.32.3
370
+ - requests-html==0.10.0
371
+ - requests-oauthlib==1.3.1
372
+ - rfc3986==2.0.0
373
+ - rich==13.7.1
374
+ - routes==2.5.1
375
+ - rpds-py==0.18.0
376
+ - rsa==4.9
377
+ - ruff==0.6.5
378
+ - safehttpx==0.1.1
379
+ - safetensors==0.4.1
380
+ - sahi==0.11.18
381
+ - scikit-image==0.22.0
382
+ - scikit-learn==1.4.0
383
+ - scipy==1.11.4
384
+ - seaborn==0.13.1
385
+ - segmentation-models-pytorch==0.3.3
386
+ - selenium==4.24.0
387
+ - selenium-wire==5.1.0
388
+ - semantic-version==2.10.0
389
+ - sentence-transformers==3.0.1
390
+ - sentencepiece==0.2.0
391
+ - shapely==2.0.2
392
+ - shellingham==1.5.4
393
+ - simplegeneric==0.8.1
394
+ - smart-open==7.0.4
395
+ - sniffio==1.3.0
396
+ - sortedcontainers==2.4.0
397
+ - sounddevice==0.5.1
398
+ - soupsieve==2.5
399
+ - spacy==3.7.5
400
+ - spacy-legacy==3.0.12
401
+ - spacy-loggers==1.0.5
402
+ - spire-doc==12.4.0
403
+ - sqlalchemy==2.0.28
404
+ - srsly==2.4.8
405
+ - starlette==0.41.2
406
+ - statsd==4.0.1
407
+ - stevedore==5.2.0
408
+ - super-image==0.1.7
409
+ - sympy==1.13.1
410
+ - tabpfn==0.1.10
411
+ - tabulate==0.9.0
412
+ - tenacity==8.2.3
413
+ - tensorboard==2.16.2
414
+ - tensorboard-data-server==0.7.2
415
+ - termcolor==2.4.0
416
+ - terminaltables==3.1.10
417
+ - testresources==2.0.1
418
+ - testscenarios==0.5.0
419
+ - testtools==2.7.2
420
+ - thinc==8.2.5
421
+ - thop==0.1.1-2209072238
422
+ - threadpoolctl==3.2.0
423
+ - tifffile==2024.2.12
424
+ - tika==2.6.0
425
+ - timm==0.9.2
426
+ - tokenizers==0.20.1
427
+ - tomlkit==0.12.0
428
+ - tooz==6.2.0
429
+ - torch==2.6.0
430
+ - torchvision==0.21.0
431
+ - tox==2.2.1
432
+ - tqdm==4.66.1
433
+ - transformers==4.46.1
434
+ - trio==0.24.0
435
+ - trio-websocket==0.11.1
436
+ - triton==3.2.0
437
+ - tweepy==4.14.0
438
+ - typer==0.12.3
439
+ - typing-inspect==0.9.0
440
+ - tzdata==2023.4
441
+ - tzlocal==5.2
442
+ - urllib3-secure-extra==0.1.0
443
+ - uvicorn==0.27.1
444
+ - uvloop==0.19.0
445
+ - vine==5.1.0
446
+ - virtualenv==20.26.1
447
+ - voluptuous==0.15.1
448
+ - w3lib==2.1.2
449
+ - warcio==1.7.4
450
+ - wasabi==1.1.3
451
+ - watchfiles==0.21.0
452
+ - weasel==0.4.1
453
+ - webdriver-manager==3.2.2
454
+ - webob==1.8.7
455
+ - websocket-client==1.8.0
456
+ - websockets==10.4
457
+ - werkzeug==3.0.1
458
+ - wikipedia==1.4.0
459
+ - wrapt==1.16.0
460
+ - wsme==0.12.1
461
+ - wsproto==1.2.0
462
+ - xgboost==2.0.3
463
+ - xxhash==3.4.1
464
+ - yacs==0.1.8
465
+ - yappi==1.6.0
466
+ - yaql==3.0.0
467
+ - yarl==1.9.4
468
+ - zstandard==0.22.0
469
+ prefix: /home/myominhtet/anaconda3/envs/myominhtet
export.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ import time
4
+ import warnings
5
+
6
+ sys.path.append('./') # to run '$ python *.py' files in subdirectories
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from torch.utils.mobile_optimizer import optimize_for_mobile
11
+
12
+ import models
13
+ from models.experimental import attempt_load, End2End
14
+ from utils.activations import Hardswish, SiLU
15
+ from utils.general import set_logging, check_img_size
16
+ from utils.torch_utils import select_device
17
+ from utils.add_nms import RegisterNMS
18
+
19
+ if __name__ == '__main__':
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument('--weights', type=str, default='./yolor-csp-c.pt', help='weights path')
22
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
23
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
24
+ parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
25
+ parser.add_argument('--dynamic-batch', action='store_true', help='dynamic batch onnx for tensorrt and onnx-runtime')
26
+ parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
27
+ parser.add_argument('--end2end', action='store_true', help='export end2end onnx')
28
+ parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms')
29
+ parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
30
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
31
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS')
32
+ parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
33
+ parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
34
+ parser.add_argument('--include-nms', action='store_true', help='export end2end onnx')
35
+ parser.add_argument('--fp16', action='store_true', help='CoreML FP16 half-precision export')
36
+ parser.add_argument('--int8', action='store_true', help='CoreML INT8 quantization')
37
+ opt = parser.parse_args()
38
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
39
+ opt.dynamic = opt.dynamic and not opt.end2end
40
+ opt.dynamic = False if opt.dynamic_batch else opt.dynamic
41
+ print(opt)
42
+ set_logging()
43
+ t = time.time()
44
+
45
+ # Load PyTorch model
46
+ device = select_device(opt.device)
47
+ model = attempt_load(opt.weights, map_location=device) # load FP32 model
48
+ labels = model.names
49
+
50
+ # Checks
51
+ gs = int(max(model.stride)) # grid size (max stride)
52
+ opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
53
+
54
+ # Input
55
+ img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
56
+
57
+ # Update model
58
+ for k, m in model.named_modules():
59
+ m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
60
+ if isinstance(m, models.common.Conv): # assign export-friendly activations
61
+ if isinstance(m.act, nn.Hardswish):
62
+ m.act = Hardswish()
63
+ elif isinstance(m.act, nn.SiLU):
64
+ m.act = SiLU()
65
+ # elif isinstance(m, models.yolo.Detect):
66
+ # m.forward = m.forward_export # assign forward (optional)
67
+ model.model[-1].export = not opt.grid # set Detect() layer grid export
68
+ y = model(img) # dry run
69
+ if opt.include_nms:
70
+ model.model[-1].include_nms = True
71
+ y = None
72
+
73
+ # TorchScript export
74
+ try:
75
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
76
+ f = opt.weights.replace('.pt', '.torchscript.pt') # filename
77
+ ts = torch.jit.trace(model, img, strict=False)
78
+ ts.save(f)
79
+ print('TorchScript export success, saved as %s' % f)
80
+ except Exception as e:
81
+ print('TorchScript export failure: %s' % e)
82
+
83
+ # CoreML export
84
+ try:
85
+ import coremltools as ct
86
+
87
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88
+ # convert model from torchscript and apply pixel scaling as per detect.py
89
+ ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90
+ bits, mode = (8, 'kmeans_lut') if opt.int8 else (16, 'linear') if opt.fp16 else (32, None)
91
+ if bits < 32:
92
+ if sys.platform.lower() == 'darwin': # quantization only supported on macOS
93
+ with warnings.catch_warnings():
94
+ warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning
95
+ ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
96
+ else:
97
+ print('quantization only supported on macOS, skipping...')
98
+
99
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
100
+ ct_model.save(f)
101
+ print('CoreML export success, saved as %s' % f)
102
+ except Exception as e:
103
+ print('CoreML export failure: %s' % e)
104
+
105
+ # TorchScript-Lite export
106
+ try:
107
+ print('\nStarting TorchScript-Lite export with torch %s...' % torch.__version__)
108
+ f = opt.weights.replace('.pt', '.torchscript.ptl') # filename
109
+ tsl = torch.jit.trace(model, img, strict=False)
110
+ tsl = optimize_for_mobile(tsl)
111
+ tsl._save_for_lite_interpreter(f)
112
+ print('TorchScript-Lite export success, saved as %s' % f)
113
+ except Exception as e:
114
+ print('TorchScript-Lite export failure: %s' % e)
115
+
116
+ # ONNX export
117
+ try:
118
+ import onnx
119
+
120
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
121
+ f = opt.weights.replace('.pt', '.onnx') # filename
122
+ model.eval()
123
+ output_names = ['classes', 'boxes'] if y is None else ['output']
124
+ dynamic_axes = None
125
+ if opt.dynamic:
126
+ dynamic_axes = {'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
127
+ 'output': {0: 'batch', 2: 'y', 3: 'x'}}
128
+ if opt.dynamic_batch:
129
+ opt.batch_size = 'batch'
130
+ dynamic_axes = {
131
+ 'images': {
132
+ 0: 'batch',
133
+ }, }
134
+ if opt.end2end and opt.max_wh is None:
135
+ output_axes = {
136
+ 'num_dets': {0: 'batch'},
137
+ 'det_boxes': {0: 'batch'},
138
+ 'det_scores': {0: 'batch'},
139
+ 'det_classes': {0: 'batch'},
140
+ }
141
+ else:
142
+ output_axes = {
143
+ 'output': {0: 'batch'},
144
+ }
145
+ dynamic_axes.update(output_axes)
146
+ if opt.grid:
147
+ if opt.end2end:
148
+ print('\nStarting export end2end onnx model for %s...' % 'TensorRT' if opt.max_wh is None else 'onnxruntime')
149
+ model = End2End(model,opt.topk_all,opt.iou_thres,opt.conf_thres,opt.max_wh,device,len(labels))
150
+ if opt.end2end and opt.max_wh is None:
151
+ output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes']
152
+ shapes = [opt.batch_size, 1, opt.batch_size, opt.topk_all, 4,
153
+ opt.batch_size, opt.topk_all, opt.batch_size, opt.topk_all]
154
+ else:
155
+ output_names = ['output']
156
+ else:
157
+ model.model[-1].concat = True
158
+
159
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
160
+ output_names=output_names,
161
+ dynamic_axes=dynamic_axes)
162
+
163
+ # Checks
164
+ onnx_model = onnx.load(f) # load onnx model
165
+ onnx.checker.check_model(onnx_model) # check onnx model
166
+
167
+ if opt.end2end and opt.max_wh is None:
168
+ for i in onnx_model.graph.output:
169
+ for j in i.type.tensor_type.shape.dim:
170
+ j.dim_param = str(shapes.pop(0))
171
+
172
+ # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
173
+
174
+ # # Metadata
175
+ # d = {'stride': int(max(model.stride))}
176
+ # for k, v in d.items():
177
+ # meta = onnx_model.metadata_props.add()
178
+ # meta.key, meta.value = k, str(v)
179
+ # onnx.save(onnx_model, f)
180
+
181
+ if opt.simplify:
182
+ try:
183
+ import onnxsim
184
+
185
+ print('\nStarting to simplify ONNX...')
186
+ onnx_model, check = onnxsim.simplify(onnx_model)
187
+ assert check, 'assert check failed'
188
+ except Exception as e:
189
+ print(f'Simplifier failure: {e}')
190
+
191
+ # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
192
+ onnx.save(onnx_model,f)
193
+ print('ONNX export success, saved as %s' % f)
194
+
195
+ if opt.include_nms:
196
+ print('Registering NMS plugin for ONNX...')
197
+ mo = RegisterNMS(f)
198
+ mo.register_nms()
199
+ mo.save(f)
200
+
201
+ except Exception as e:
202
+ print('ONNX export failure: %s' % e)
203
+
204
+ # Finish
205
+ print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
hubconf.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PyTorch Hub models
2
+
3
+ Usage:
4
+ import torch
5
+ model = torch.hub.load('repo', 'model')
6
+ """
7
+
8
+ from pathlib import Path
9
+
10
+ import torch
11
+
12
+ from models.yolo import Model
13
+ from utils.general import check_requirements, set_logging
14
+ from utils.google_utils import attempt_download
15
+ from utils.torch_utils import select_device
16
+
17
+ dependencies = ['torch', 'yaml']
18
+ check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop'))
19
+ set_logging()
20
+
21
+
22
+ def create(name, pretrained, channels, classes, autoshape):
23
+ """Creates a specified model
24
+
25
+ Arguments:
26
+ name (str): name of model, i.e. 'yolov7'
27
+ pretrained (bool): load pretrained weights into the model
28
+ channels (int): number of input channels
29
+ classes (int): number of model classes
30
+
31
+ Returns:
32
+ pytorch model
33
+ """
34
+ try:
35
+ cfg = list((Path(__file__).parent / 'cfg').rglob(f'{name}.yaml'))[0] # model.yaml path
36
+ model = Model(cfg, channels, classes)
37
+ if pretrained:
38
+ fname = f'{name}.pt' # checkpoint filename
39
+ attempt_download(fname) # download if not found locally
40
+ ckpt = torch.load(fname, map_location=torch.device('cpu')) # load
41
+ msd = model.state_dict() # model state_dict
42
+ csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
43
+ csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter
44
+ model.load_state_dict(csd, strict=False) # load
45
+ if len(ckpt['model'].names) == classes:
46
+ model.names = ckpt['model'].names # set class names attribute
47
+ if autoshape:
48
+ model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
49
+ device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available
50
+ return model.to(device)
51
+
52
+ except Exception as e:
53
+ s = 'Cache maybe be out of date, try force_reload=True.'
54
+ raise Exception(s) from e
55
+
56
+
57
+ def custom(path_or_model='path/to/model.pt', autoshape=True):
58
+ """custom mode
59
+
60
+ Arguments (3 options):
61
+ path_or_model (str): 'path/to/model.pt'
62
+ path_or_model (dict): torch.load('path/to/model.pt')
63
+ path_or_model (nn.Module): torch.load('path/to/model.pt')['model']
64
+
65
+ Returns:
66
+ pytorch model
67
+ """
68
+ model = torch.load(path_or_model, map_location=torch.device('cpu')) if isinstance(path_or_model, str) else path_or_model # load checkpoint
69
+ if isinstance(model, dict):
70
+ model = model['ema' if model.get('ema') else 'model'] # load model
71
+
72
+ hub_model = Model(model.yaml).to(next(model.parameters()).device) # create
73
+ hub_model.load_state_dict(model.float().state_dict()) # load state_dict
74
+ hub_model.names = model.names # class names
75
+ if autoshape:
76
+ hub_model = hub_model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
77
+ device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available
78
+ return hub_model.to(device)
79
+
80
+
81
+ def yolov7(pretrained=True, channels=3, classes=80, autoshape=True):
82
+ return create('yolov7', pretrained, channels, classes, autoshape)
83
+
84
+
85
+ if __name__ == '__main__':
86
+ model = custom(path_or_model='yolov7.pt') # custom example
87
+ # model = create(name='yolov7', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example
88
+
89
+ # Verify inference
90
+ import numpy as np
91
+ from PIL import Image
92
+
93
+ imgs = [np.zeros((640, 480, 3))]
94
+
95
+ results = model(imgs) # batched inference
96
+ results.print()
97
+ results.save()
interfacetest2.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import time
3
+ from pathlib import Path
4
+ import os
5
+ import cv2
6
+ import torch
7
+ import torch.backends.cudnn as cudnn
8
+ from numpy import random
9
+ import numpy as np
10
+ from models.experimental import attempt_load
11
+ from utils.datasets import LoadImages
12
+ from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging, increment_path
13
+ from utils.plots import plot_one_box
14
+ from utils.torch_utils import select_device, time_synchronized
15
+ import gradio as gr
16
+ import ffmpeg
17
+ from fastapi import FastAPI, Request
18
+ from starlette.responses import HTMLResponse
19
+ import uvicorn
20
+ def convert_to_h264(input_path):
21
+ # Construct output path manually by appending '_h264' before the extension
22
+ output_path = str(Path(input_path).with_suffix('')) + "_h264.mp4"
23
+ try:
24
+ stream = ffmpeg.input(input_path)
25
+ stream = ffmpeg.output(stream, output_path, vcodec='libx264', acodec='aac', format='mp4', pix_fmt='yuv420p')
26
+ ffmpeg.run(stream, overwrite_output=True)
27
+ return output_path
28
+ except ffmpeg.Error as e:
29
+ print(f"FFmpeg conversion error: {e.stderr.decode()}")
30
+ return input_path
31
+
32
+ # IoU and scanner movement functions (unchanged)
33
+ def compute_iou(box1, box2):
34
+ x1, y1, x2, y2 = box1
35
+ x1_, y1_, x2_, y2_ = box2
36
+ xi1 = max(x1, x1_)
37
+ yi1 = max(y1, y1_)
38
+ xi2 = min(x2, x2_)
39
+ yi2 = min(y2, y2_)
40
+ inter_width = max(0, xi2 - xi1)
41
+ inter_height = max(0, yi2 - yi1)
42
+ inter_area = inter_width * inter_height
43
+ box1_area = (x2 - x1) * (y2 - y1)
44
+ box2_area = (x2_ - x1_) * (y2_ - y1_)
45
+ union_area = box1_area + box2_area - inter_area
46
+ return inter_area / union_area if union_area != 0 else 0.0
47
+
48
+ def is_scanner_moving(prev_centroids, curr_box, scanner_id, threshold=5.0):
49
+ x1, y1, x2, y2 = curr_box
50
+ curr_centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
51
+ if scanner_id in prev_centroids:
52
+ prev_x, prev_y = prev_centroids[scanner_id]
53
+ distance = np.sqrt((curr_centroid[0] - prev_x)**2 + (curr_centroid[1] - prev_y)**2)
54
+ return distance > threshold
55
+ return False
56
+
57
+ def detect_video(video_path, weights, conf_thres=0.25, iou_thres=0.45, img_size=640, device='', save_dir='runs/detect/exp'):
58
+ save_dir = Path(increment_path(Path(save_dir), exist_ok=True))
59
+ save_dir.mkdir(parents=True, exist_ok=True)
60
+
61
+ set_logging()
62
+ device = select_device(device)
63
+ half = device.type != 'cpu'
64
+ model = attempt_load(weights, map_location=device)
65
+ stride = int(model.stride.max())
66
+ imgsz = check_img_size(img_size, s=stride)
67
+ if half:
68
+ model.half()
69
+
70
+ dataset = LoadImages(video_path, img_size=imgsz, stride=stride)
71
+ names = model.module.names if hasattr(model, 'module') else model.names
72
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
73
+
74
+ vid_path, vid_writer = None, None
75
+ prev_centroids = {}
76
+ scanner_id_counter = 0
77
+
78
+ for path, img, im0s, vid_cap in dataset:
79
+ img = torch.from_numpy(img).to(device)
80
+ img = img.half() if half else img.float()
81
+ img /= 255.0
82
+ if img.ndimension() == 3:
83
+ img = img.unsqueeze(0)
84
+
85
+ with torch.no_grad():
86
+ pred = model(img)[0]
87
+ pred = non_max_suppression(pred, conf_thres, iou_thres)
88
+
89
+ for i, det in enumerate(pred):
90
+ p = Path(path)
91
+ save_path = str(save_dir / p.name.replace('.mp4', '_output.mp4'))
92
+ im0 = im0s
93
+
94
+ if len(det):
95
+ det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
96
+ item_boxes, scanner_data, phone_boxes = [], [], []
97
+ curr_scanner_boxes = []
98
+
99
+ for *xyxy, conf, cls in det:
100
+ x1, y1, x2, y2 = map(int, xyxy)
101
+ class_name = names[int(cls)]
102
+ color = colors[int(cls)]
103
+ if class_name.lower() == "item":
104
+ item_boxes.append([x1, y1, x2, y2])
105
+ elif class_name.lower() == "phone":
106
+ phone_boxes.append([x1, y1, x2, y2])
107
+ elif class_name.lower() == "scanner":
108
+ curr_scanner_boxes.append([x1, y1, x2, y2])
109
+ plot_one_box(xyxy, im0, label=class_name, color=color, line_thickness=2)
110
+
111
+ new_prev_centroids = {}
112
+ if prev_centroids and curr_scanner_boxes:
113
+ for curr_box in curr_scanner_boxes:
114
+ curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
115
+ best_match_id = min(prev_centroids.keys(),
116
+ key=lambda k: np.sqrt((curr_centroid[0] - prev_centroids[k][0])**2 +
117
+ (curr_centroid[1] - prev_centroids[k][1])**2),
118
+ default=None)
119
+ if best_match_id is not None and np.sqrt((curr_centroid[0] - prev_centroids[best_match_id][0])**2 +
120
+ (curr_centroid[1] - prev_centroids[best_match_id][1])**2) < 50:
121
+ scanner_id = best_match_id
122
+ else:
123
+ scanner_id = scanner_id_counter
124
+ scanner_id_counter += 1
125
+ is_moving = is_scanner_moving(prev_centroids, curr_box, scanner_id)
126
+ movement_status = "Scanning" if is_moving else "Idle"
127
+ scanner_data.append([curr_box, movement_status, scanner_id])
128
+ new_prev_centroids[scanner_id] = curr_centroid
129
+ elif curr_scanner_boxes:
130
+ for curr_box in curr_scanner_boxes:
131
+ scanner_id = scanner_id_counter
132
+ scanner_id_counter += 1
133
+ movement_status = "Idle"
134
+ curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
135
+ scanner_data.append([curr_box, movement_status, scanner_id])
136
+ new_prev_centroids[scanner_id] = curr_centroid
137
+
138
+ prev_centroids = new_prev_centroids
139
+
140
+ for scanner_box, movement_status, scanner_id in scanner_data:
141
+ x1, y1, x2, y2 = scanner_box
142
+ label = f"scanner {movement_status} (ID: {scanner_id})"
143
+ plot_one_box([x1, y1, x2, y2], im0, label=label, color=colors[names.index("scanner")], line_thickness=2)
144
+
145
+ product_scanning_status = ""
146
+ payment_scanning_status = ""
147
+ for scanner_box, movement_status, _ in scanner_data:
148
+ for item_box in item_boxes:
149
+ if movement_status == "Scanning" and compute_iou(scanner_box, item_box) > 0.1:
150
+ product_scanning_status = "Product scanning is finished"
151
+ for phone_box in phone_boxes:
152
+ if movement_status == "Scanning" and compute_iou(scanner_box, phone_box) > 0.1:
153
+ payment_scanning_status = "Payment scanning is finished"
154
+
155
+ if product_scanning_status:
156
+ cv2.putText(im0, product_scanning_status, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)
157
+ if payment_scanning_status:
158
+ cv2.putText(im0, payment_scanning_status, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)
159
+
160
+ if vid_path != save_path:
161
+ vid_path = save_path
162
+ if isinstance(vid_writer, cv2.VideoWriter):
163
+ vid_writer.release()
164
+ fps = vid_cap.get(cv2.CAP_PROP_FPS) if vid_cap else 30
165
+ w, h = im0.shape[1], im0.shape[0]
166
+ vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
167
+ vid_writer.write(im0)
168
+
169
+ if isinstance(vid_writer, cv2.VideoWriter):
170
+ vid_writer.release()
171
+
172
+ # Convert to H.264 for browser compatibility
173
+ output_h264 = str(Path(save_path).with_name(f"{Path(save_path).stem}_h264.mp4"))
174
+ try:
175
+ stream = ffmpeg.input(save_path)
176
+ stream = ffmpeg.output(stream, output_h264, vcodec='libx264', acodec='aac', format='mp4', pix_fmt='yuv420p')
177
+ ffmpeg.run(stream, overwrite_output=True)
178
+ os.remove(save_path) # Remove original
179
+ return output_h264
180
+ except ffmpeg.Error as e:
181
+ print(f"FFmpeg error: {e.stderr.decode()}")
182
+ return save_path
183
+
184
+ def gradio_interface(video, conf_thres, iou_thres):
185
+ weights = "/home/myominhtet/Desktop/deepsortfromscratch/yolov7/best.pt"
186
+ img_size = 640
187
+ video = convert_to_h264(video)
188
+ output_video = detect_video(video, weights, conf_thres, iou_thres, img_size)
189
+ return output_video if output_video else "Error processing video."
190
+
191
+ # Create Gradio interface
192
+ interface = gr.Interface(
193
+ fn=gradio_interface,
194
+ inputs=[
195
+ gr.Video(label="Upload Video"),
196
+ gr.Slider(0, 1, value=0.25, step=0.05, label="Confidence Threshold"),
197
+ gr.Slider(0, 1, value=0.45, step=0.05, label="IoU Threshold"),
198
+ ],
199
+ outputs=gr.Video(label="Processed Video"),
200
+ title="YOLO Video Detection",
201
+ description="Upload a video to run YOLO detection with custom parameters."
202
+ )
203
+
204
+ # Initialize FastAPI app
205
+ app = FastAPI()
206
+
207
+ # Mount Gradio interface to FastAPI
208
+ app = gr.mount_gradio_app(app, interface, path="/gradio")
209
+
210
+ # Optional: Add a simple root endpoint
211
+ @app.get("/", response_class=HTMLResponse)
212
+ async def root():
213
+ return """
214
+ <html>
215
+ <body>
216
+ <h1>Welcome to YOLO Video Detection API</h1>
217
+ <p>Visit <a href="/gradio">/gradio</a> to access the interactive UI.</p>
218
+ </body>
219
+ </html>
220
+ """
221
+
222
+ if __name__ == "__main__":
223
+ uvicorn.run(app, host="0.0.0.0", port=8000)
models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # init
models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (179 Bytes). View file