Shivdutta commited on
Commit
6e11613
1 Parent(s): 35360e6

Upload 242 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. app.ipynb +138 -0
  3. yolov9/LICENSE.md +674 -0
  4. yolov9/README.md +329 -0
  5. yolov9/__pycache__/call_detection.cpython-310.pyc +0 -0
  6. yolov9/__pycache__/detect_dual.cpython-310.pyc +0 -0
  7. yolov9/__pycache__/detect_dual.cpython-311.pyc +0 -0
  8. yolov9/__pycache__/export.cpython-310.pyc +0 -0
  9. yolov9/__pycache__/export.cpython-311.pyc +0 -0
  10. yolov9/__pycache__/val.cpython-310.pyc +0 -0
  11. yolov9/__pycache__/val_dual.cpython-310.pyc +0 -0
  12. yolov9/app.py +58 -0
  13. yolov9/benchmarks.py +142 -0
  14. yolov9/call_detection.py +8 -0
  15. yolov9/classify/predict.py +224 -0
  16. yolov9/classify/train.py +333 -0
  17. yolov9/classify/val.py +170 -0
  18. yolov9/data/coco.yaml +125 -0
  19. yolov9/data/hyps/hyp.scratch-high.yaml +30 -0
  20. yolov9/data/images/a.mp4 +3 -0
  21. yolov9/data/images/b.mp4 +3 -0
  22. yolov9/data/images/horses.jpg +0 -0
  23. yolov9/data/images/lamborghini-aventador-2932196_1280.jpg +0 -0
  24. yolov9/data/vehicle_dataset/classes.txt +6 -0
  25. yolov9/data/vehicle_dataset/data.yaml +5 -0
  26. yolov9/detect.py +233 -0
  27. yolov9/detect_dual.py +279 -0
  28. yolov9/export.py +686 -0
  29. yolov9/figure/horses_prediction.jpg +0 -0
  30. yolov9/figure/multitask.png +3 -0
  31. yolov9/figure/performance.png +0 -0
  32. yolov9/hubconf.py +107 -0
  33. yolov9/models/__init__.py +1 -0
  34. yolov9/models/__pycache__/__init__.cpython-310.pyc +0 -0
  35. yolov9/models/__pycache__/__init__.cpython-311.pyc +0 -0
  36. yolov9/models/__pycache__/common.cpython-310.pyc +0 -0
  37. yolov9/models/__pycache__/common.cpython-311.pyc +0 -0
  38. yolov9/models/__pycache__/experimental.cpython-310.pyc +0 -0
  39. yolov9/models/__pycache__/experimental.cpython-311.pyc +0 -0
  40. yolov9/models/__pycache__/yolo.cpython-310.pyc +0 -0
  41. yolov9/models/__pycache__/yolo.cpython-311.pyc +0 -0
  42. yolov9/models/common.py +1212 -0
  43. yolov9/models/detect/gelan-c.yaml +80 -0
  44. yolov9/models/detect/gelan-e.yaml +121 -0
  45. yolov9/models/detect/gelan.yaml +80 -0
  46. yolov9/models/detect/yolov7-af.yaml +137 -0
  47. yolov9/models/detect/yolov9-c.yaml +124 -0
  48. yolov9/models/detect/yolov9-e.yaml +144 -0
  49. yolov9/models/detect/yolov9.yaml +117 -0
  50. yolov9/models/experimental.py +275 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ yolov9/data/images/a.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ yolov9/data/images/b.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ yolov9/figure/multitask.png filter=lfs diff=lfs merge=lfs -text
39
+ yolov9/runs/detect/50b8f560-0240-4176-9511-c3896ff4bce8/a.mp4 filter=lfs diff=lfs merge=lfs -text
40
+ yolov9/runs/detect/664a80cf-ce01-4bde-a090-59a93c91e364/cut_a_2.mp4 filter=lfs diff=lfs merge=lfs -text
41
+ yolov9/runs/detect/6e9c17bc-cd28-4b60-b336-04e918a062ef/a.mp4 filter=lfs diff=lfs merge=lfs -text
42
+ yolov9/runs/detect/a9aee41f-4238-4ba5-940b-7de1e91a5eae/cut_a_1.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ yolov9/runs/detect/f807164a-496b-413c-bb47-f5daf8803dcd/cut_a_1.mp4 filter=lfs diff=lfs merge=lfs -text
44
+ yolov9/runs/detect/fd766784-e39b-4b4e-b439-881682683266/a.mp4 filter=lfs diff=lfs merge=lfs -text
45
+ yolov9/runs/detect/yolov9_c_640_detect2/a.mp4 filter=lfs diff=lfs merge=lfs -text
46
+ yolov9/runs/detect/yolov9_c_640_detect3/b.mp4 filter=lfs diff=lfs merge=lfs -text
app.ipynb ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/home/shiv-nlp-mldl-cv/anaconda3/envs/S15-Yolo1/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "Running on local URL: http://127.0.0.1:7860\n",
21
+ "Running on public URL: https://115f49f564465f60ec.gradio.live\n",
22
+ "\n",
23
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
24
+ ]
25
+ },
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div><iframe src=\"https://115f49f564465f60ec.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
30
+ ],
31
+ "text/plain": [
32
+ "<IPython.core.display.HTML object>"
33
+ ]
34
+ },
35
+ "metadata": {},
36
+ "output_type": "display_data"
37
+ },
38
+ {
39
+ "data": {
40
+ "text/plain": []
41
+ },
42
+ "execution_count": 1,
43
+ "metadata": {},
44
+ "output_type": "execute_result"
45
+ }
46
+ ],
47
+ "source": [
48
+ "import gradio as gr\n",
49
+ "from PIL import Image\n",
50
+ "import numpy as np\n",
51
+ "import os\n",
52
+ "import uuid\n",
53
+ "\n",
54
+ "def inference(input_img):\n",
55
+ " temp = uuid.uuid4()\n",
56
+ " shell = f\"python yolov9/detect.py --source {input_img} --img 640 --device cpu --weights yolov9/runs/train/exp/weights/best.pt --name {temp}\"\n",
57
+ " os.system(shell)\n",
58
+ " return f\"yolov9/runs/detect/{temp}/{input_img.split('/')[-1]}\"\n",
59
+ " #return \"yolov9/runs/detect/f807164a-496b-413c-bb47-f5daf8803dcd/cut_a_1.mp4\"\n",
60
+ "\n",
61
+ "def inference_video(input_img):\n",
62
+ " org_img = input_img\n",
63
+ " return input_img\n",
64
+ "\n",
65
+ "with gr.Blocks() as demo:\n",
66
+ " gr.Markdown(\n",
67
+ " \"\"\"\n",
68
+ " # Vehicle detection using Yolo-v9\n",
69
+ " Upload the vehicle image or video for detection\n",
70
+ " \"\"\"\n",
71
+ " )\n",
72
+ "\n",
73
+ " with gr.Tab(\"Video\"):\n",
74
+ " gr.Markdown(\n",
75
+ " \"\"\"\n",
76
+ " Upload video mp4 file and detect the count of vehicles passing by\n",
77
+ " \"\"\"\n",
78
+ " )\n",
79
+ " gr.Markdown(\n",
80
+ " \"\"\"\n",
81
+ " Upload image file and detect vehicles present in the image\n",
82
+ " \"\"\"\n",
83
+ " )\n",
84
+ " with gr.Row():\n",
85
+ " img_input = [gr.PlayableVideo(label=\"Input Image\", autoplay=True, width=300, height=300)]\n",
86
+ " pred_outputs = [gr.PlayableVideo(label=\"Output Image\",width=640, autoplay=True, height=640)]\n",
87
+ " \n",
88
+ " image_button = gr.Button(\"Predict\")\n",
89
+ " image_button.click(inference, inputs=img_input, outputs=pred_outputs)\n",
90
+ "\n",
91
+ " with gr.Tab(\"Image\"):\n",
92
+ " gr.Markdown(\n",
93
+ " \"\"\"\n",
94
+ " Upload image file and detect vehicles present in the image\n",
95
+ " \"\"\"\n",
96
+ " )\n",
97
+ " with gr.Row():\n",
98
+ " img_input = [gr.Image(type=\"filepath\",label=\"Input Image\",width=300, height=300)]\n",
99
+ " pred_outputs = [gr.Image(label=\"Output Image\",width=640, height=640)]\n",
100
+ " \n",
101
+ " image_button = gr.Button(\"Predict\")\n",
102
+ " image_button.click(inference, inputs=img_input, outputs=pred_outputs)\n",
103
+ "\n",
104
+ "\n",
105
+ "\n",
106
+ "demo.launch(share=True)\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": []
115
+ }
116
+ ],
117
+ "metadata": {
118
+ "kernelspec": {
119
+ "display_name": "S6-VSCode",
120
+ "language": "python",
121
+ "name": "python3"
122
+ },
123
+ "language_info": {
124
+ "codemirror_mode": {
125
+ "name": "ipython",
126
+ "version": 3
127
+ },
128
+ "file_extension": ".py",
129
+ "mimetype": "text/x-python",
130
+ "name": "python",
131
+ "nbconvert_exporter": "python",
132
+ "pygments_lexer": "ipython3",
133
+ "version": "3.10.14"
134
+ }
135
+ },
136
+ "nbformat": 4,
137
+ "nbformat_minor": 2
138
+ }
yolov9/LICENSE.md ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU General Public License is a free, copyleft license for
11
+ software and other kinds of works.
12
+
13
+ The licenses for most software and other practical works are designed
14
+ to take away your freedom to share and change the works. By contrast,
15
+ the GNU General Public License is intended to guarantee your freedom to
16
+ share and change all versions of a program--to make sure it remains free
17
+ software for all its users. We, the Free Software Foundation, use the
18
+ GNU General Public License for most of our software; it applies also to
19
+ any other work released this way by its authors. You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ them if you wish), that you receive source code or can get it if you
26
+ want it, that you can change the software or use pieces of it in new
27
+ free programs, and that you know you can do these things.
28
+
29
+ To protect your rights, we need to prevent others from denying you
30
+ these rights or asking you to surrender the rights. Therefore, you have
31
+ certain responsibilities if you distribute copies of the software, or if
32
+ you modify it: responsibilities to respect the freedom of others.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must pass on to the recipients the same
36
+ freedoms that you received. You must make sure that they, too, receive
37
+ or can get the source code. And you must show them these terms so they
38
+ know their rights.
39
+
40
+ Developers that use the GNU GPL protect your rights with two steps:
41
+ (1) assert copyright on the software, and (2) offer you this License
42
+ giving you legal permission to copy, distribute and/or modify it.
43
+
44
+ For the developers' and authors' protection, the GPL clearly explains
45
+ that there is no warranty for this free software. For both users' and
46
+ authors' sake, the GPL requires that modified versions be marked as
47
+ changed, so that their problems will not be attributed erroneously to
48
+ authors of previous versions.
49
+
50
+ Some devices are designed to deny users access to install or run
51
+ modified versions of the software inside them, although the manufacturer
52
+ can do so. This is fundamentally incompatible with the aim of
53
+ protecting users' freedom to change the software. The systematic
54
+ pattern of such abuse occurs in the area of products for individuals to
55
+ use, which is precisely where it is most unacceptable. Therefore, we
56
+ have designed this version of the GPL to prohibit the practice for those
57
+ products. If such problems arise substantially in other domains, we
58
+ stand ready to extend this provision to those domains in future versions
59
+ of the GPL, as needed to protect the freedom of users.
60
+
61
+ Finally, every program is threatened constantly by software patents.
62
+ States should not allow patents to restrict development and use of
63
+ software on general-purpose computers, but in those that do, we wish to
64
+ avoid the special danger that patents applied to a free program could
65
+ make it effectively proprietary. To prevent this, the GPL assures that
66
+ patents cannot be used to render the program non-free.
67
+
68
+ The precise terms and conditions for copying, distribution and
69
+ modification follow.
70
+
71
+ TERMS AND CONDITIONS
72
+
73
+ 0. Definitions.
74
+
75
+ "This License" refers to version 3 of the GNU General Public License.
76
+
77
+ "Copyright" also means copyright-like laws that apply to other kinds of
78
+ works, such as semiconductor masks.
79
+
80
+ "The Program" refers to any copyrightable work licensed under this
81
+ License. Each licensee is addressed as "you". "Licensees" and
82
+ "recipients" may be individuals or organizations.
83
+
84
+ To "modify" a work means to copy from or adapt all or part of the work
85
+ in a fashion requiring copyright permission, other than the making of an
86
+ exact copy. The resulting work is called a "modified version" of the
87
+ earlier work or a work "based on" the earlier work.
88
+
89
+ A "covered work" means either the unmodified Program or a work based
90
+ on the Program.
91
+
92
+ To "propagate" a work means to do anything with it that, without
93
+ permission, would make you directly or secondarily liable for
94
+ infringement under applicable copyright law, except executing it on a
95
+ computer or modifying a private copy. Propagation includes copying,
96
+ distribution (with or without modification), making available to the
97
+ public, and in some countries other activities as well.
98
+
99
+ To "convey" a work means any kind of propagation that enables other
100
+ parties to make or receive copies. Mere interaction with a user through
101
+ a computer network, with no transfer of a copy, is not conveying.
102
+
103
+ An interactive user interface displays "Appropriate Legal Notices"
104
+ to the extent that it includes a convenient and prominently visible
105
+ feature that (1) displays an appropriate copyright notice, and (2)
106
+ tells the user that there is no warranty for the work (except to the
107
+ extent that warranties are provided), that licensees may convey the
108
+ work under this License, and how to view a copy of this License. If
109
+ the interface presents a list of user commands or options, such as a
110
+ menu, a prominent item in the list meets this criterion.
111
+
112
+ 1. Source Code.
113
+
114
+ The "source code" for a work means the preferred form of the work
115
+ for making modifications to it. "Object code" means any non-source
116
+ form of a work.
117
+
118
+ A "Standard Interface" means an interface that either is an official
119
+ standard defined by a recognized standards body, or, in the case of
120
+ interfaces specified for a particular programming language, one that
121
+ is widely used among developers working in that language.
122
+
123
+ The "System Libraries" of an executable work include anything, other
124
+ than the work as a whole, that (a) is included in the normal form of
125
+ packaging a Major Component, but which is not part of that Major
126
+ Component, and (b) serves only to enable use of the work with that
127
+ Major Component, or to implement a Standard Interface for which an
128
+ implementation is available to the public in source code form. A
129
+ "Major Component", in this context, means a major essential component
130
+ (kernel, window system, and so on) of the specific operating system
131
+ (if any) on which the executable work runs, or a compiler used to
132
+ produce the work, or an object code interpreter used to run it.
133
+
134
+ The "Corresponding Source" for a work in object code form means all
135
+ the source code needed to generate, install, and (for an executable
136
+ work) run the object code and to modify the work, including scripts to
137
+ control those activities. However, it does not include the work's
138
+ System Libraries, or general-purpose tools or generally available free
139
+ programs which are used unmodified in performing those activities but
140
+ which are not part of the work. For example, Corresponding Source
141
+ includes interface definition files associated with source files for
142
+ the work, and the source code for shared libraries and dynamically
143
+ linked subprograms that the work is specifically designed to require,
144
+ such as by intimate data communication or control flow between those
145
+ subprograms and other parts of the work.
146
+
147
+ The Corresponding Source need not include anything that users
148
+ can regenerate automatically from other parts of the Corresponding
149
+ Source.
150
+
151
+ The Corresponding Source for a work in source code form is that
152
+ same work.
153
+
154
+ 2. Basic Permissions.
155
+
156
+ All rights granted under this License are granted for the term of
157
+ copyright on the Program, and are irrevocable provided the stated
158
+ conditions are met. This License explicitly affirms your unlimited
159
+ permission to run the unmodified Program. The output from running a
160
+ covered work is covered by this License only if the output, given its
161
+ content, constitutes a covered work. This License acknowledges your
162
+ rights of fair use or other equivalent, as provided by copyright law.
163
+
164
+ You may make, run and propagate covered works that you do not
165
+ convey, without conditions so long as your license otherwise remains
166
+ in force. You may convey covered works to others for the sole purpose
167
+ of having them make modifications exclusively for you, or provide you
168
+ with facilities for running those works, provided that you comply with
169
+ the terms of this License in conveying all material for which you do
170
+ not control copyright. Those thus making or running the covered works
171
+ for you must do so exclusively on your behalf, under your direction
172
+ and control, on terms that prohibit them from making any copies of
173
+ your copyrighted material outside their relationship with you.
174
+
175
+ Conveying under any other circumstances is permitted solely under
176
+ the conditions stated below. Sublicensing is not allowed; section 10
177
+ makes it unnecessary.
178
+
179
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180
+
181
+ No covered work shall be deemed part of an effective technological
182
+ measure under any applicable law fulfilling obligations under article
183
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184
+ similar laws prohibiting or restricting circumvention of such
185
+ measures.
186
+
187
+ When you convey a covered work, you waive any legal power to forbid
188
+ circumvention of technological measures to the extent such circumvention
189
+ is effected by exercising rights under this License with respect to
190
+ the covered work, and you disclaim any intention to limit operation or
191
+ modification of the work as a means of enforcing, against the work's
192
+ users, your or third parties' legal rights to forbid circumvention of
193
+ technological measures.
194
+
195
+ 4. Conveying Verbatim Copies.
196
+
197
+ You may convey verbatim copies of the Program's source code as you
198
+ receive it, in any medium, provided that you conspicuously and
199
+ appropriately publish on each copy an appropriate copyright notice;
200
+ keep intact all notices stating that this License and any
201
+ non-permissive terms added in accord with section 7 apply to the code;
202
+ keep intact all notices of the absence of any warranty; and give all
203
+ recipients a copy of this License along with the Program.
204
+
205
+ You may charge any price or no price for each copy that you convey,
206
+ and you may offer support or warranty protection for a fee.
207
+
208
+ 5. Conveying Modified Source Versions.
209
+
210
+ You may convey a work based on the Program, or the modifications to
211
+ produce it from the Program, in the form of source code under the
212
+ terms of section 4, provided that you also meet all of these conditions:
213
+
214
+ a) The work must carry prominent notices stating that you modified
215
+ it, and giving a relevant date.
216
+
217
+ b) The work must carry prominent notices stating that it is
218
+ released under this License and any conditions added under section
219
+ 7. This requirement modifies the requirement in section 4 to
220
+ "keep intact all notices".
221
+
222
+ c) You must license the entire work, as a whole, under this
223
+ License to anyone who comes into possession of a copy. This
224
+ License will therefore apply, along with any applicable section 7
225
+ additional terms, to the whole of the work, and all its parts,
226
+ regardless of how they are packaged. This License gives no
227
+ permission to license the work in any other way, but it does not
228
+ invalidate such permission if you have separately received it.
229
+
230
+ d) If the work has interactive user interfaces, each must display
231
+ Appropriate Legal Notices; however, if the Program has interactive
232
+ interfaces that do not display Appropriate Legal Notices, your
233
+ work need not make them do so.
234
+
235
+ A compilation of a covered work with other separate and independent
236
+ works, which are not by their nature extensions of the covered work,
237
+ and which are not combined with it such as to form a larger program,
238
+ in or on a volume of a storage or distribution medium, is called an
239
+ "aggregate" if the compilation and its resulting copyright are not
240
+ used to limit the access or legal rights of the compilation's users
241
+ beyond what the individual works permit. Inclusion of a covered work
242
+ in an aggregate does not cause this License to apply to the other
243
+ parts of the aggregate.
244
+
245
+ 6. Conveying Non-Source Forms.
246
+
247
+ You may convey a covered work in object code form under the terms
248
+ of sections 4 and 5, provided that you also convey the
249
+ machine-readable Corresponding Source under the terms of this License,
250
+ in one of these ways:
251
+
252
+ a) Convey the object code in, or embodied in, a physical product
253
+ (including a physical distribution medium), accompanied by the
254
+ Corresponding Source fixed on a durable physical medium
255
+ customarily used for software interchange.
256
+
257
+ b) Convey the object code in, or embodied in, a physical product
258
+ (including a physical distribution medium), accompanied by a
259
+ written offer, valid for at least three years and valid for as
260
+ long as you offer spare parts or customer support for that product
261
+ model, to give anyone who possesses the object code either (1) a
262
+ copy of the Corresponding Source for all the software in the
263
+ product that is covered by this License, on a durable physical
264
+ medium customarily used for software interchange, for a price no
265
+ more than your reasonable cost of physically performing this
266
+ conveying of source, or (2) access to copy the
267
+ Corresponding Source from a network server at no charge.
268
+
269
+ c) Convey individual copies of the object code with a copy of the
270
+ written offer to provide the Corresponding Source. This
271
+ alternative is allowed only occasionally and noncommercially, and
272
+ only if you received the object code with such an offer, in accord
273
+ with subsection 6b.
274
+
275
+ d) Convey the object code by offering access from a designated
276
+ place (gratis or for a charge), and offer equivalent access to the
277
+ Corresponding Source in the same way through the same place at no
278
+ further charge. You need not require recipients to copy the
279
+ Corresponding Source along with the object code. If the place to
280
+ copy the object code is a network server, the Corresponding Source
281
+ may be on a different server (operated by you or a third party)
282
+ that supports equivalent copying facilities, provided you maintain
283
+ clear directions next to the object code saying where to find the
284
+ Corresponding Source. Regardless of what server hosts the
285
+ Corresponding Source, you remain obligated to ensure that it is
286
+ available for as long as needed to satisfy these requirements.
287
+
288
+ e) Convey the object code using peer-to-peer transmission, provided
289
+ you inform other peers where the object code and Corresponding
290
+ Source of the work are being offered to the general public at no
291
+ charge under subsection 6d.
292
+
293
+ A separable portion of the object code, whose source code is excluded
294
+ from the Corresponding Source as a System Library, need not be
295
+ included in conveying the object code work.
296
+
297
+ A "User Product" is either (1) a "consumer product", which means any
298
+ tangible personal property which is normally used for personal, family,
299
+ or household purposes, or (2) anything designed or sold for incorporation
300
+ into a dwelling. In determining whether a product is a consumer product,
301
+ doubtful cases shall be resolved in favor of coverage. For a particular
302
+ product received by a particular user, "normally used" refers to a
303
+ typical or common use of that class of product, regardless of the status
304
+ of the particular user or of the way in which the particular user
305
+ actually uses, or expects or is expected to use, the product. A product
306
+ is a consumer product regardless of whether the product has substantial
307
+ commercial, industrial or non-consumer uses, unless such uses represent
308
+ the only significant mode of use of the product.
309
+
310
+ "Installation Information" for a User Product means any methods,
311
+ procedures, authorization keys, or other information required to install
312
+ and execute modified versions of a covered work in that User Product from
313
+ a modified version of its Corresponding Source. The information must
314
+ suffice to ensure that the continued functioning of the modified object
315
+ code is in no case prevented or interfered with solely because
316
+ modification has been made.
317
+
318
+ If you convey an object code work under this section in, or with, or
319
+ specifically for use in, a User Product, and the conveying occurs as
320
+ part of a transaction in which the right of possession and use of the
321
+ User Product is transferred to the recipient in perpetuity or for a
322
+ fixed term (regardless of how the transaction is characterized), the
323
+ Corresponding Source conveyed under this section must be accompanied
324
+ by the Installation Information. But this requirement does not apply
325
+ if neither you nor any third party retains the ability to install
326
+ modified object code on the User Product (for example, the work has
327
+ been installed in ROM).
328
+
329
+ The requirement to provide Installation Information does not include a
330
+ requirement to continue to provide support service, warranty, or updates
331
+ for a work that has been modified or installed by the recipient, or for
332
+ the User Product in which it has been modified or installed. Access to a
333
+ network may be denied when the modification itself materially and
334
+ adversely affects the operation of the network or violates the rules and
335
+ protocols for communication across the network.
336
+
337
+ Corresponding Source conveyed, and Installation Information provided,
338
+ in accord with this section must be in a format that is publicly
339
+ documented (and with an implementation available to the public in
340
+ source code form), and must require no special password or key for
341
+ unpacking, reading or copying.
342
+
343
+ 7. Additional Terms.
344
+
345
+ "Additional permissions" are terms that supplement the terms of this
346
+ License by making exceptions from one or more of its conditions.
347
+ Additional permissions that are applicable to the entire Program shall
348
+ be treated as though they were included in this License, to the extent
349
+ that they are valid under applicable law. If additional permissions
350
+ apply only to part of the Program, that part may be used separately
351
+ under those permissions, but the entire Program remains governed by
352
+ this License without regard to the additional permissions.
353
+
354
+ When you convey a copy of a covered work, you may at your option
355
+ remove any additional permissions from that copy, or from any part of
356
+ it. (Additional permissions may be written to require their own
357
+ removal in certain cases when you modify the work.) You may place
358
+ additional permissions on material, added by you to a covered work,
359
+ for which you have or can give appropriate copyright permission.
360
+
361
+ Notwithstanding any other provision of this License, for material you
362
+ add to a covered work, you may (if authorized by the copyright holders of
363
+ that material) supplement the terms of this License with terms:
364
+
365
+ a) Disclaiming warranty or limiting liability differently from the
366
+ terms of sections 15 and 16 of this License; or
367
+
368
+ b) Requiring preservation of specified reasonable legal notices or
369
+ author attributions in that material or in the Appropriate Legal
370
+ Notices displayed by works containing it; or
371
+
372
+ c) Prohibiting misrepresentation of the origin of that material, or
373
+ requiring that modified versions of such material be marked in
374
+ reasonable ways as different from the original version; or
375
+
376
+ d) Limiting the use for publicity purposes of names of licensors or
377
+ authors of the material; or
378
+
379
+ e) Declining to grant rights under trademark law for use of some
380
+ trade names, trademarks, or service marks; or
381
+
382
+ f) Requiring indemnification of licensors and authors of that
383
+ material by anyone who conveys the material (or modified versions of
384
+ it) with contractual assumptions of liability to the recipient, for
385
+ any liability that these contractual assumptions directly impose on
386
+ those licensors and authors.
387
+
388
+ All other non-permissive additional terms are considered "further
389
+ restrictions" within the meaning of section 10. If the Program as you
390
+ received it, or any part of it, contains a notice stating that it is
391
+ governed by this License along with a term that is a further
392
+ restriction, you may remove that term. If a license document contains
393
+ a further restriction but permits relicensing or conveying under this
394
+ License, you may add to a covered work material governed by the terms
395
+ of that license document, provided that the further restriction does
396
+ not survive such relicensing or conveying.
397
+
398
+ If you add terms to a covered work in accord with this section, you
399
+ must place, in the relevant source files, a statement of the
400
+ additional terms that apply to those files, or a notice indicating
401
+ where to find the applicable terms.
402
+
403
+ Additional terms, permissive or non-permissive, may be stated in the
404
+ form of a separately written license, or stated as exceptions;
405
+ the above requirements apply either way.
406
+
407
+ 8. Termination.
408
+
409
+ You may not propagate or modify a covered work except as expressly
410
+ provided under this License. Any attempt otherwise to propagate or
411
+ modify it is void, and will automatically terminate your rights under
412
+ this License (including any patent licenses granted under the third
413
+ paragraph of section 11).
414
+
415
+ However, if you cease all violation of this License, then your
416
+ license from a particular copyright holder is reinstated (a)
417
+ provisionally, unless and until the copyright holder explicitly and
418
+ finally terminates your license, and (b) permanently, if the copyright
419
+ holder fails to notify you of the violation by some reasonable means
420
+ prior to 60 days after the cessation.
421
+
422
+ Moreover, your license from a particular copyright holder is
423
+ reinstated permanently if the copyright holder notifies you of the
424
+ violation by some reasonable means, this is the first time you have
425
+ received notice of violation of this License (for any work) from that
426
+ copyright holder, and you cure the violation prior to 30 days after
427
+ your receipt of the notice.
428
+
429
+ Termination of your rights under this section does not terminate the
430
+ licenses of parties who have received copies or rights from you under
431
+ this License. If your rights have been terminated and not permanently
432
+ reinstated, you do not qualify to receive new licenses for the same
433
+ material under section 10.
434
+
435
+ 9. Acceptance Not Required for Having Copies.
436
+
437
+ You are not required to accept this License in order to receive or
438
+ run a copy of the Program. Ancillary propagation of a covered work
439
+ occurring solely as a consequence of using peer-to-peer transmission
440
+ to receive a copy likewise does not require acceptance. However,
441
+ nothing other than this License grants you permission to propagate or
442
+ modify any covered work. These actions infringe copyright if you do
443
+ not accept this License. Therefore, by modifying or propagating a
444
+ covered work, you indicate your acceptance of this License to do so.
445
+
446
+ 10. Automatic Licensing of Downstream Recipients.
447
+
448
+ Each time you convey a covered work, the recipient automatically
449
+ receives a license from the original licensors, to run, modify and
450
+ propagate that work, subject to this License. You are not responsible
451
+ for enforcing compliance by third parties with this License.
452
+
453
+ An "entity transaction" is a transaction transferring control of an
454
+ organization, or substantially all assets of one, or subdividing an
455
+ organization, or merging organizations. If propagation of a covered
456
+ work results from an entity transaction, each party to that
457
+ transaction who receives a copy of the work also receives whatever
458
+ licenses to the work the party's predecessor in interest had or could
459
+ give under the previous paragraph, plus a right to possession of the
460
+ Corresponding Source of the work from the predecessor in interest, if
461
+ the predecessor has it or can get it with reasonable efforts.
462
+
463
+ You may not impose any further restrictions on the exercise of the
464
+ rights granted or affirmed under this License. For example, you may
465
+ not impose a license fee, royalty, or other charge for exercise of
466
+ rights granted under this License, and you may not initiate litigation
467
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
468
+ any patent claim is infringed by making, using, selling, offering for
469
+ sale, or importing the Program or any portion of it.
470
+
471
+ 11. Patents.
472
+
473
+ A "contributor" is a copyright holder who authorizes use under this
474
+ License of the Program or a work on which the Program is based. The
475
+ work thus licensed is called the contributor's "contributor version".
476
+
477
+ A contributor's "essential patent claims" are all patent claims
478
+ owned or controlled by the contributor, whether already acquired or
479
+ hereafter acquired, that would be infringed by some manner, permitted
480
+ by this License, of making, using, or selling its contributor version,
481
+ but do not include claims that would be infringed only as a
482
+ consequence of further modification of the contributor version. For
483
+ purposes of this definition, "control" includes the right to grant
484
+ patent sublicenses in a manner consistent with the requirements of
485
+ this License.
486
+
487
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
488
+ patent license under the contributor's essential patent claims, to
489
+ make, use, sell, offer for sale, import and otherwise run, modify and
490
+ propagate the contents of its contributor version.
491
+
492
+ In the following three paragraphs, a "patent license" is any express
493
+ agreement or commitment, however denominated, not to enforce a patent
494
+ (such as an express permission to practice a patent or covenant not to
495
+ sue for patent infringement). To "grant" such a patent license to a
496
+ party means to make such an agreement or commitment not to enforce a
497
+ patent against the party.
498
+
499
+ If you convey a covered work, knowingly relying on a patent license,
500
+ and the Corresponding Source of the work is not available for anyone
501
+ to copy, free of charge and under the terms of this License, through a
502
+ publicly available network server or other readily accessible means,
503
+ then you must either (1) cause the Corresponding Source to be so
504
+ available, or (2) arrange to deprive yourself of the benefit of the
505
+ patent license for this particular work, or (3) arrange, in a manner
506
+ consistent with the requirements of this License, to extend the patent
507
+ license to downstream recipients. "Knowingly relying" means you have
508
+ actual knowledge that, but for the patent license, your conveying the
509
+ covered work in a country, or your recipient's use of the covered work
510
+ in a country, would infringe one or more identifiable patents in that
511
+ country that you have reason to believe are valid.
512
+
513
+ If, pursuant to or in connection with a single transaction or
514
+ arrangement, you convey, or propagate by procuring conveyance of, a
515
+ covered work, and grant a patent license to some of the parties
516
+ receiving the covered work authorizing them to use, propagate, modify
517
+ or convey a specific copy of the covered work, then the patent license
518
+ you grant is automatically extended to all recipients of the covered
519
+ work and works based on it.
520
+
521
+ A patent license is "discriminatory" if it does not include within
522
+ the scope of its coverage, prohibits the exercise of, or is
523
+ conditioned on the non-exercise of one or more of the rights that are
524
+ specifically granted under this License. You may not convey a covered
525
+ work if you are a party to an arrangement with a third party that is
526
+ in the business of distributing software, under which you make payment
527
+ to the third party based on the extent of your activity of conveying
528
+ the work, and under which the third party grants, to any of the
529
+ parties who would receive the covered work from you, a discriminatory
530
+ patent license (a) in connection with copies of the covered work
531
+ conveyed by you (or copies made from those copies), or (b) primarily
532
+ for and in connection with specific products or compilations that
533
+ contain the covered work, unless you entered into that arrangement,
534
+ or that patent license was granted, prior to 28 March 2007.
535
+
536
+ Nothing in this License shall be construed as excluding or limiting
537
+ any implied license or other defenses to infringement that may
538
+ otherwise be available to you under applicable patent law.
539
+
540
+ 12. No Surrender of Others' Freedom.
541
+
542
+ If conditions are imposed on you (whether by court order, agreement or
543
+ otherwise) that contradict the conditions of this License, they do not
544
+ excuse you from the conditions of this License. If you cannot convey a
545
+ covered work so as to satisfy simultaneously your obligations under this
546
+ License and any other pertinent obligations, then as a consequence you may
547
+ not convey it at all. For example, if you agree to terms that obligate you
548
+ to collect a royalty for further conveying from those to whom you convey
549
+ the Program, the only way you could satisfy both those terms and this
550
+ License would be to refrain entirely from conveying the Program.
551
+
552
+ 13. Use with the GNU Affero General Public License.
553
+
554
+ Notwithstanding any other provision of this License, you have
555
+ permission to link or combine any covered work with a work licensed
556
+ under version 3 of the GNU Affero General Public License into a single
557
+ combined work, and to convey the resulting work. The terms of this
558
+ License will continue to apply to the part which is the covered work,
559
+ but the special requirements of the GNU Affero General Public License,
560
+ section 13, concerning interaction through a network will apply to the
561
+ combination as such.
562
+
563
+ 14. Revised Versions of this License.
564
+
565
+ The Free Software Foundation may publish revised and/or new versions of
566
+ the GNU General Public License from time to time. Such new versions will
567
+ be similar in spirit to the present version, but may differ in detail to
568
+ address new problems or concerns.
569
+
570
+ Each version is given a distinguishing version number. If the
571
+ Program specifies that a certain numbered version of the GNU General
572
+ Public License "or any later version" applies to it, you have the
573
+ option of following the terms and conditions either of that numbered
574
+ version or of any later version published by the Free Software
575
+ Foundation. If the Program does not specify a version number of the
576
+ GNU General Public License, you may choose any version ever published
577
+ by the Free Software Foundation.
578
+
579
+ If the Program specifies that a proxy can decide which future
580
+ versions of the GNU General Public License can be used, that proxy's
581
+ public statement of acceptance of a version permanently authorizes you
582
+ to choose that version for the Program.
583
+
584
+ Later license versions may give you additional or different
585
+ permissions. However, no additional obligations are imposed on any
586
+ author or copyright holder as a result of your choosing to follow a
587
+ later version.
588
+
589
+ 15. Disclaimer of Warranty.
590
+
591
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
+
600
+ 16. Limitation of Liability.
601
+
602
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
+ SUCH DAMAGES.
611
+
612
+ 17. Interpretation of Sections 15 and 16.
613
+
614
+ If the disclaimer of warranty and limitation of liability provided
615
+ above cannot be given local legal effect according to their terms,
616
+ reviewing courts shall apply local law that most closely approximates
617
+ an absolute waiver of all civil liability in connection with the
618
+ Program, unless a warranty or assumption of liability accompanies a
619
+ copy of the Program in return for a fee.
620
+
621
+ END OF TERMS AND CONDITIONS
622
+
623
+ How to Apply These Terms to Your New Programs
624
+
625
+ If you develop a new program, and you want it to be of the greatest
626
+ possible use to the public, the best way to achieve this is to make it
627
+ free software which everyone can redistribute and change under these terms.
628
+
629
+ To do so, attach the following notices to the program. It is safest
630
+ to attach them to the start of each source file to most effectively
631
+ state the exclusion of warranty; and each file should have at least
632
+ the "copyright" line and a pointer to where the full notice is found.
633
+
634
+ <one line to give the program's name and a brief idea of what it does.>
635
+ Copyright (C) <year> <name of author>
636
+
637
+ This program is free software: you can redistribute it and/or modify
638
+ it under the terms of the GNU General Public License as published by
639
+ the Free Software Foundation, either version 3 of the License, or
640
+ (at your option) any later version.
641
+
642
+ This program is distributed in the hope that it will be useful,
643
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
644
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
+ GNU General Public License for more details.
646
+
647
+ You should have received a copy of the GNU General Public License
648
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
649
+
650
+ Also add information on how to contact you by electronic and paper mail.
651
+
652
+ If the program does terminal interaction, make it output a short
653
+ notice like this when it starts in an interactive mode:
654
+
655
+ <program> Copyright (C) <year> <name of author>
656
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657
+ This is free software, and you are welcome to redistribute it
658
+ under certain conditions; type `show c' for details.
659
+
660
+ The hypothetical commands `show w' and `show c' should show the appropriate
661
+ parts of the General Public License. Of course, your program's commands
662
+ might be different; for a GUI interface, you would use an "about box".
663
+
664
+ You should also get your employer (if you work as a programmer) or school,
665
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
666
+ For more information on this, and how to apply and follow the GNU GPL, see
667
+ <https://www.gnu.org/licenses/>.
668
+
669
+ The GNU General Public License does not permit incorporating your program
670
+ into proprietary programs. If your program is a subroutine library, you
671
+ may consider it more useful to permit linking proprietary applications with
672
+ the library. If this is what you want to do, use the GNU Lesser General
673
+ Public License instead of this License. But first, please read
674
+ <https://www.gnu.org/licenses/why-not-lgpl.html>.
yolov9/README.md ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ Implementation of paper - [YOLOv9: Learning What You Want to Learn Using Programmable Gradient Information](https://arxiv.org/abs/2402.13616)
4
+
5
+ [![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2402.13616-B31B1B.svg)](https://arxiv.org/abs/2402.13616)
6
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/kadirnar/Yolov9)
7
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/merve/yolov9)
8
+ [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/train-yolov9-object-detection-on-custom-dataset.ipynb)
9
+ [![OpenCV](https://img.shields.io/badge/OpenCV-BlogPost-black?logo=opencv&labelColor=blue&color=black)](https://learnopencv.com/yolov9-advancing-the-yolo-legacy/)
10
+
11
+ <div align="center">
12
+ <a href="./">
13
+ <img src="./figure/performance.png" width="79%"/>
14
+ </a>
15
+ </div>
16
+
17
+
18
+ ## Performance
19
+
20
+ MS COCO
21
+
22
+ | Model | Test Size | AP<sup>val</sup> | AP<sub>50</sub><sup>val</sup> | AP<sub>75</sub><sup>val</sup> | Param. | FLOPs |
23
+ | :-- | :-: | :-: | :-: | :-: | :-: | :-: |
24
+ | [**YOLOv9-T**]() | 640 | **38.3%** | **53.1%** | **41.3%** | **2.0M** | **7.7G** |
25
+ | [**YOLOv9-S**]() | 640 | **46.8%** | **63.4%** | **50.7%** | **7.1M** | **26.4G** |
26
+ | [**YOLOv9-M**]() | 640 | **51.4%** | **68.1%** | **56.1%** | **20.0M** | **76.3G** |
27
+ | [**YOLOv9-C**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c-converted.pt) | 640 | **53.0%** | **70.2%** | **57.8%** | **25.3M** | **102.1G** |
28
+ | [**YOLOv9-E**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e-converted.pt) | 640 | **55.6%** | **72.8%** | **60.6%** | **57.3M** | **189.0G** |
29
+ <!-- | [**YOLOv9 (ReLU)**]() | 640 | **51.9%** | **69.1%** | **56.5%** | **25.3M** | **102.1G** | -->
30
+
31
+ <!-- tiny, small, and medium models will be released after the paper be accepted and published. -->
32
+
33
+ ## Useful Links
34
+
35
+ <details><summary> <b>Expand</b> </summary>
36
+
37
+ Custom training: https://github.com/WongKinYiu/yolov9/issues/30#issuecomment-1960955297
38
+
39
+ ONNX export: https://github.com/WongKinYiu/yolov9/issues/2#issuecomment-1960519506 https://github.com/WongKinYiu/yolov9/issues/40#issue-2150697688 https://github.com/WongKinYiu/yolov9/issues/130#issue-2162045461
40
+
41
+ ONNX export for segmentation: https://github.com/WongKinYiu/yolov9/issues/260#issue-2191162150
42
+
43
+ TensorRT inference: https://github.com/WongKinYiu/yolov9/issues/143#issuecomment-1975049660 https://github.com/WongKinYiu/yolov9/issues/34#issue-2150393690 https://github.com/WongKinYiu/yolov9/issues/79#issue-2153547004 https://github.com/WongKinYiu/yolov9/issues/143#issue-2164002309
44
+
45
+ QAT TensorRT: https://github.com/WongKinYiu/yolov9/issues/327#issue-2229284136 https://github.com/WongKinYiu/yolov9/issues/253#issue-2189520073
46
+
47
+ TFLite: https://github.com/WongKinYiu/yolov9/issues/374#issuecomment-2065751706
48
+
49
+ OpenVINO: https://github.com/WongKinYiu/yolov9/issues/164#issue-2168540003
50
+
51
+ C# ONNX inference: https://github.com/WongKinYiu/yolov9/issues/95#issue-2155974619
52
+
53
+ C# OpenVINO inference: https://github.com/WongKinYiu/yolov9/issues/95#issuecomment-1968131244
54
+
55
+ OpenCV: https://github.com/WongKinYiu/yolov9/issues/113#issuecomment-1971327672
56
+
57
+ Hugging Face demo: https://github.com/WongKinYiu/yolov9/issues/45#issuecomment-1961496943
58
+
59
+ CoLab demo: https://github.com/WongKinYiu/yolov9/pull/18
60
+
61
+ ONNXSlim export: https://github.com/WongKinYiu/yolov9/pull/37
62
+
63
+ YOLOv9 ROS: https://github.com/WongKinYiu/yolov9/issues/144#issue-2164210644
64
+
65
+ YOLOv9 ROS TensorRT: https://github.com/WongKinYiu/yolov9/issues/145#issue-2164218595
66
+
67
+ YOLOv9 Julia: https://github.com/WongKinYiu/yolov9/issues/141#issuecomment-1973710107
68
+
69
+ YOLOv9 MLX: https://github.com/WongKinYiu/yolov9/issues/258#issue-2190586540
70
+
71
+ YOLOv9 StrongSORT with OSNet: https://github.com/WongKinYiu/yolov9/issues/299#issue-2212093340
72
+
73
+ YOLOv9 ByteTrack: https://github.com/WongKinYiu/yolov9/issues/78#issue-2153512879
74
+
75
+ YOLOv9 DeepSORT: https://github.com/WongKinYiu/yolov9/issues/98#issue-2156172319
76
+
77
+ YOLOv9 counting: https://github.com/WongKinYiu/yolov9/issues/84#issue-2153904804
78
+
79
+ YOLOv9 face detection: https://github.com/WongKinYiu/yolov9/issues/121#issue-2160218766
80
+
81
+ YOLOv9 segmentation onnxruntime: https://github.com/WongKinYiu/yolov9/issues/151#issue-2165667350
82
+
83
+ Comet logging: https://github.com/WongKinYiu/yolov9/pull/110
84
+
85
+ MLflow logging: https://github.com/WongKinYiu/yolov9/pull/87
86
+
87
+ AnyLabeling tool: https://github.com/WongKinYiu/yolov9/issues/48#issue-2152139662
88
+
89
+ AX650N deploy: https://github.com/WongKinYiu/yolov9/issues/96#issue-2156115760
90
+
91
+ Conda environment: https://github.com/WongKinYiu/yolov9/pull/93
92
+
93
+ AutoDL docker environment: https://github.com/WongKinYiu/yolov9/issues/112#issue-2158203480
94
+
95
+ </details>
96
+
97
+
98
+ ## Installation
99
+
100
+ Docker environment (recommended)
101
+ <details><summary> <b>Expand</b> </summary>
102
+
103
+ ``` shell
104
+ # create the docker container, you can change the share memory size if you have more.
105
+ nvidia-docker run --name yolov9 -it -v your_coco_path/:/coco/ -v your_code_path/:/yolov9 --shm-size=64g nvcr.io/nvidia/pytorch:21.11-py3
106
+
107
+ # apt install required packages
108
+ apt update
109
+ apt install -y zip htop screen libgl1-mesa-glx
110
+
111
+ # pip install required packages
112
+ pip install seaborn thop
113
+
114
+ # go to code folder
115
+ cd /yolov9
116
+ ```
117
+
118
+ </details>
119
+
120
+
121
+ ## Evaluation
122
+
123
+ [`yolov9-c-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c-converted.pt) [`yolov9-e-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e-converted.pt) [`yolov9-c.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c.pt) [`yolov9-e.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e.pt) [`gelan-c.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c.pt) [`gelan-e.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-e.pt)
124
+
125
+ ``` shell
126
+ # evaluate converted yolov9 models
127
+ python val.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './yolov9-c-converted.pt' --save-json --name yolov9_c_c_640_val
128
+
129
+ # evaluate yolov9 models
130
+ # python val_dual.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './yolov9-c.pt' --save-json --name yolov9_c_640_val
131
+
132
+ # evaluate gelan models
133
+ # python val.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './gelan-c.pt' --save-json --name gelan_c_640_val
134
+ ```
135
+
136
+ You will get the results:
137
+
138
+ ```
139
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.530
140
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.702
141
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.578
142
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.362
143
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.585
144
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693
145
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.392
146
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.652
147
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.702
148
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.541
149
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.760
150
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.844
151
+ ```
152
+
153
+
154
+ ## Training
155
+
156
+ Data preparation
157
+
158
+ ``` shell
159
+ bash scripts/get_coco.sh
160
+ ```
161
+
162
+ * Download MS COCO dataset images ([train](http://images.cocodataset.org/zips/train2017.zip), [val](http://images.cocodataset.org/zips/val2017.zip), [test](http://images.cocodataset.org/zips/test2017.zip)) and [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip). If you have previously used a different version of YOLO, we strongly recommend that you delete `train2017.cache` and `val2017.cache` files, and redownload [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip)
163
+
164
+ Single GPU training
165
+
166
+ ``` shell
167
+ # train yolov9 models
168
+ python train_dual.py --workers 8 --device 0 --batch 16 --data data/coco.yaml --img 640 --cfg models/detect/yolov9-c.yaml --weights '' --name yolov9-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15
169
+
170
+ # train gelan models
171
+ # python train.py --workers 8 --device 0 --batch 32 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15
172
+ ```
173
+
174
+ Multiple GPU training
175
+
176
+ ``` shell
177
+ # train yolov9 models
178
+ python -m torch.distributed.launch --nproc_per_node 8 --master_port 9527 train_dual.py --workers 8 --device 0,1,2,3,4,5,6,7 --sync-bn --batch 128 --data data/coco.yaml --img 640 --cfg models/detect/yolov9-c.yaml --weights '' --name yolov9-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15
179
+
180
+ # train gelan models
181
+ # python -m torch.distributed.launch --nproc_per_node 4 --master_port 9527 train.py --workers 8 --device 0,1,2,3 --sync-bn --batch 128 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15
182
+ ```
183
+
184
+
185
+ ## Re-parameterization
186
+
187
+ See [reparameterization.ipynb](https://github.com/WongKinYiu/yolov9/blob/main/tools/reparameterization.ipynb).
188
+
189
+
190
+ ## Inference
191
+
192
+ <div align="center">
193
+ <a href="./">
194
+ <img src="./figure/horses_prediction.jpg" width="49%"/>
195
+ </a>
196
+ </div>
197
+
198
+ ``` shell
199
+ # inference converted yolov9 models
200
+ python detect.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './yolov9-c-converted.pt' --name yolov9_c_c_640_detect
201
+
202
+ # inference yolov9 models
203
+ # python detect_dual.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './yolov9-c.pt' --name yolov9_c_640_detect
204
+
205
+ # inference gelan models
206
+ # python detect.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './gelan-c.pt' --name gelan_c_c_640_detect
207
+ ```
208
+
209
+
210
+ ## Citation
211
+
212
+ ```
213
+ @article{wang2024yolov9,
214
+ title={{YOLOv9}: Learning What You Want to Learn Using Programmable Gradient Information},
215
+ author={Wang, Chien-Yao and Liao, Hong-Yuan Mark},
216
+ booktitle={arXiv preprint arXiv:2402.13616},
217
+ year={2024}
218
+ }
219
+ ```
220
+
221
+ ```
222
+ @article{chang2023yolor,
223
+ title={{YOLOR}-Based Multi-Task Learning},
224
+ author={Chang, Hung-Shuo and Wang, Chien-Yao and Wang, Richard Robert and Chou, Gene and Liao, Hong-Yuan Mark},
225
+ journal={arXiv preprint arXiv:2309.16921},
226
+ year={2023}
227
+ }
228
+ ```
229
+
230
+
231
+ ## Teaser
232
+
233
+ Parts of code of [YOLOR-Based Multi-Task Learning](https://arxiv.org/abs/2309.16921) are released in the repository.
234
+
235
+ <div align="center">
236
+ <a href="./">
237
+ <img src="./figure/multitask.png" width="99%"/>
238
+ </a>
239
+ </div>
240
+
241
+ #### Object Detection
242
+
243
+ [`gelan-c-det.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-det.pt)
244
+
245
+ `object detection`
246
+
247
+ ``` shell
248
+ # coco/labels/{split}/*.txt
249
+ # bbox or polygon (1 instance 1 line)
250
+ python train.py --workers 8 --device 0 --batch 32 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c-det --hyp hyp.scratch-high.yaml --min-items 0 --epochs 300 --close-mosaic 10
251
+ ```
252
+
253
+ | Model | Test Size | Param. | FLOPs | AP<sup>box</sup> |
254
+ | :-- | :-: | :-: | :-: | :-: |
255
+ | [**GELAN-C-DET**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-det.pt) | 640 | 25.3M | 102.1G |**52.3%** |
256
+ | [**YOLOv9-C-DET**]() | 640 | 25.3M | 102.1G | **53.0%** |
257
+
258
+ #### Instance Segmentation
259
+
260
+ [`gelan-c-seg.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-seg.pt)
261
+
262
+ `object detection` `instance segmentation`
263
+
264
+ ``` shell
265
+ # coco/labels/{split}/*.txt
266
+ # polygon (1 instance 1 line)
267
+ python segment/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/segment/gelan-c-seg.yaml --weights '' --name gelan-c-seg --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10
268
+ ```
269
+
270
+ | Model | Test Size | Param. | FLOPs | AP<sup>box</sup> | AP<sup>mask</sup> |
271
+ | :-- | :-: | :-: | :-: | :-: | :-: |
272
+ | [**GELAN-C-SEG**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-seg.pt) | 640 | 27.4M | 144.6G | **52.3%** | **42.4%** |
273
+ | [**YOLOv9-C-SEG**]() | 640 | 27.4M | 145.5G | **53.3%** | **43.5%** |
274
+
275
+ #### Panoptic Segmentation
276
+
277
+ [`gelan-c-pan.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-pan.pt)
278
+
279
+ `object detection` `instance segmentation` `semantic segmentation` `stuff segmentation` `panoptic segmentation`
280
+
281
+ ``` shell
282
+ # coco/labels/{split}/*.txt
283
+ # polygon (1 instance 1 line)
284
+ # coco/stuff/{split}/*.txt
285
+ # polygon (1 semantic 1 line)
286
+ python panoptic/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/panoptic/gelan-c-pan.yaml --weights '' --name gelan-c-pan --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10
287
+ ```
288
+
289
+ | Model | Test Size | Param. | FLOPs | AP<sup>box</sup> | AP<sup>mask</sup> | mIoU<sub>164k/10k</sub><sup>semantic</sup> | mIoU<sup>stuff</sup> | PQ<sup>panoptic</sup> |
290
+ | :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
291
+ | [**GELAN-C-PAN**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-pan.pt) | 640 | 27.6M | 146.7G | **52.6%** | **42.5%** | **39.0%/48.3%** | **52.7%** | **39.4%** |
292
+ | [**YOLOv9-C-PAN**]() | 640 | 28.8M | 187.0G | **52.7%** | **43.0%** | **39.8%/-** | **52.2%** | **40.5%** |
293
+
294
+ #### Image Captioning (not yet released)
295
+
296
+ <!--[`gelan-c-cap.pt`]()-->
297
+
298
+ `object detection` `instance segmentation` `semantic segmentation` `stuff segmentation` `panoptic segmentation` `image captioning`
299
+
300
+ ``` shell
301
+ # coco/labels/{split}/*.txt
302
+ # polygon (1 instance 1 line)
303
+ # coco/stuff/{split}/*.txt
304
+ # polygon (1 semantic 1 line)
305
+ # coco/annotations/*.json
306
+ # json (1 split 1 file)
307
+ python caption/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/caption/gelan-c-cap.yaml --weights '' --name gelan-c-cap --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10
308
+ ```
309
+
310
+ | Model | Test Size | Param. | FLOPs | AP<sup>box</sup> | AP<sup>mask</sup> | mIoU<sub>164k/10k</sub><sup>semantic</sup> | mIoU<sup>stuff</sup> | PQ<sup>panoptic</sup> | BLEU@4<sup>caption</sup> | CIDEr<sup>caption</sup> |
311
+ | :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
312
+ | [**GELAN-C-CAP**]() | 640 | 47.5M | - | **51.9%** | **42.6%** | **42.5%/-** | **56.5%** | **41.7%** | **38.8** | **122.3** |
313
+ | [**YOLOv9-C-CAP**]() | 640 | 47.5M | - | **52.1%** | **42.6%** | **43.0%/-** | **56.4%** | **42.1%** | **39.1** | **122.0** |
314
+ <!--| [**YOLOR-MT**]() | 640 | 79.3M | - | **51.0%** | **41.7%** | **-/49.6%** | **55.9%** | **40.5%** | **35.7** | **112.7** |-->
315
+
316
+
317
+ ## Acknowledgements
318
+
319
+ <details><summary> <b>Expand</b> </summary>
320
+
321
+ * [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)
322
+ * [https://github.com/WongKinYiu/yolor](https://github.com/WongKinYiu/yolor)
323
+ * [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)
324
+ * [https://github.com/VDIGPKU/DynamicDet](https://github.com/VDIGPKU/DynamicDet)
325
+ * [https://github.com/DingXiaoH/RepVGG](https://github.com/DingXiaoH/RepVGG)
326
+ * [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5)
327
+ * [https://github.com/meituan/YOLOv6](https://github.com/meituan/YOLOv6)
328
+
329
+ </details>
yolov9/__pycache__/call_detection.cpython-310.pyc ADDED
Binary file (582 Bytes). View file
 
yolov9/__pycache__/detect_dual.cpython-310.pyc ADDED
Binary file (7.86 kB). View file
 
yolov9/__pycache__/detect_dual.cpython-311.pyc ADDED
Binary file (17 kB). View file
 
yolov9/__pycache__/export.cpython-310.pyc ADDED
Binary file (24.3 kB). View file
 
yolov9/__pycache__/export.cpython-311.pyc ADDED
Binary file (47.9 kB). View file
 
yolov9/__pycache__/val.cpython-310.pyc ADDED
Binary file (13 kB). View file
 
yolov9/__pycache__/val_dual.cpython-310.pyc ADDED
Binary file (13 kB). View file
 
yolov9/app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ import os
5
+ import uuid
6
+
7
+ def inference(input_img):
8
+ temp = uuid.uuid4()
9
+ shell = f"python yolov9/detect.py --source {input_img} --img 640 --device cpu --weights yolov9/runs/train/exp/weights/best.pt --name {temp}"
10
+ os.system(shell)
11
+ return f"yolov9/runs/detect/{temp}/{input_img.split('/')[-1]}"
12
+
13
+ def inference_video(input_img):
14
+ org_img = input_img
15
+ return input_img
16
+
17
+ with gr.Blocks() as demo:
18
+ gr.Markdown(
19
+ """
20
+ # Vehicle detection using Yolo-v9
21
+ Upload the vehicle image or video for detection
22
+ """
23
+ )
24
+
25
+ with gr.Tab("Video"):
26
+ gr.Markdown(
27
+ """
28
+ Upload video mp4 file and detect the count of vehicles passing by
29
+ """
30
+ )
31
+ gr.Markdown(
32
+ """
33
+ Upload image file and detect vehicles present in the image
34
+ """
35
+ )
36
+ with gr.Row():
37
+ img_input = [gr.Video(label="Input Image",width=300, height=300)]
38
+ pred_outputs = [gr.Video(label="Output Image",width=300, height=300)]
39
+
40
+ image_button = gr.Button("Predict")
41
+ image_button.click(inference, inputs=img_input, outputs=pred_outputs)
42
+
43
+ with gr.Tab("Image"):
44
+ gr.Markdown(
45
+ """
46
+ Upload image file and detect vehicles present in the image
47
+ """
48
+ )
49
+ with gr.Row():
50
+ img_input = [gr.Image(type="filepath",label="Input Image",width=300, height=300)]
51
+ pred_outputs = [gr.Image(label="Output Image",width=640, height=640)]
52
+
53
+ image_button = gr.Button("Predict")
54
+ image_button.click(inference, inputs=img_input, outputs=pred_outputs)
55
+
56
+
57
+
58
+ demo.launch(share=True)
yolov9/benchmarks.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import platform
3
+ import sys
4
+ import time
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+
9
+ FILE = Path(__file__).resolve()
10
+ ROOT = FILE.parents[0] # YOLO root directory
11
+ if str(ROOT) not in sys.path:
12
+ sys.path.append(str(ROOT)) # add ROOT to PATH
13
+ # ROOT = ROOT.relative_to(Path.cwd()) # relative
14
+
15
+ import export
16
+ from models.experimental import attempt_load
17
+ from models.yolo import SegmentationModel
18
+ from segment.val import run as val_seg
19
+ from utils import notebook_init
20
+ from utils.general import LOGGER, check_yaml, file_size, print_args
21
+ from utils.torch_utils import select_device
22
+ from val import run as val_det
23
+
24
+
25
+ def run(
26
+ weights=ROOT / 'yolo.pt', # weights path
27
+ imgsz=640, # inference size (pixels)
28
+ batch_size=1, # batch size
29
+ data=ROOT / 'data/coco.yaml', # dataset.yaml path
30
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
31
+ half=False, # use FP16 half-precision inference
32
+ test=False, # test exports only
33
+ pt_only=False, # test PyTorch only
34
+ hard_fail=False, # throw error on benchmark failure
35
+ ):
36
+ y, t = [], time.time()
37
+ device = select_device(device)
38
+ model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc.
39
+ for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU)
40
+ try:
41
+ assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported
42
+ assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML
43
+ if 'cpu' in device.type:
44
+ assert cpu, 'inference not supported on CPU'
45
+ if 'cuda' in device.type:
46
+ assert gpu, 'inference not supported on GPU'
47
+
48
+ # Export
49
+ if f == '-':
50
+ w = weights # PyTorch format
51
+ else:
52
+ w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others
53
+ assert suffix in str(w), 'export failed'
54
+
55
+ # Validate
56
+ if model_type == SegmentationModel:
57
+ result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
58
+ metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls))
59
+ else: # DetectionModel:
60
+ result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
61
+ metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls))
62
+ speed = result[2][1] # times (preprocess, inference, postprocess)
63
+ y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference
64
+ except Exception as e:
65
+ if hard_fail:
66
+ assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}'
67
+ LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}')
68
+ y.append([name, None, None, None]) # mAP, t_inference
69
+ if pt_only and i == 0:
70
+ break # break after PyTorch
71
+
72
+ # Print results
73
+ LOGGER.info('\n')
74
+ parse_opt()
75
+ notebook_init() # print system info
76
+ c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', '']
77
+ py = pd.DataFrame(y, columns=c)
78
+ LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)')
79
+ LOGGER.info(str(py if map else py.iloc[:, :2]))
80
+ if hard_fail and isinstance(hard_fail, str):
81
+ metrics = py['mAP50-95'].array # values to compare to floor
82
+ floor = eval(hard_fail) # minimum metric floor to pass
83
+ assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}'
84
+ return py
85
+
86
+
87
+ def test(
88
+ weights=ROOT / 'yolo.pt', # weights path
89
+ imgsz=640, # inference size (pixels)
90
+ batch_size=1, # batch size
91
+ data=ROOT / 'data/coco128.yaml', # dataset.yaml path
92
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
93
+ half=False, # use FP16 half-precision inference
94
+ test=False, # test exports only
95
+ pt_only=False, # test PyTorch only
96
+ hard_fail=False, # throw error on benchmark failure
97
+ ):
98
+ y, t = [], time.time()
99
+ device = select_device(device)
100
+ for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable)
101
+ try:
102
+ w = weights if f == '-' else \
103
+ export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights
104
+ assert suffix in str(w), 'export failed'
105
+ y.append([name, True])
106
+ except Exception:
107
+ y.append([name, False]) # mAP, t_inference
108
+
109
+ # Print results
110
+ LOGGER.info('\n')
111
+ parse_opt()
112
+ notebook_init() # print system info
113
+ py = pd.DataFrame(y, columns=['Format', 'Export'])
114
+ LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)')
115
+ LOGGER.info(str(py))
116
+ return py
117
+
118
+
119
+ def parse_opt():
120
+ parser = argparse.ArgumentParser()
121
+ parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path')
122
+ parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
123
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
124
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
125
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
126
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
127
+ parser.add_argument('--test', action='store_true', help='test exports only')
128
+ parser.add_argument('--pt-only', action='store_true', help='test PyTorch only')
129
+ parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric')
130
+ opt = parser.parse_args()
131
+ opt.data = check_yaml(opt.data) # check YAML
132
+ print_args(vars(opt))
133
+ return opt
134
+
135
+
136
+ def main(opt):
137
+ test(**vars(opt)) if opt.test else run(**vars(opt))
138
+
139
+
140
+ if __name__ == "__main__":
141
+ opt = parse_opt()
142
+ main(opt)
yolov9/call_detection.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from detect_dual import parse_opt,main
2
+
3
+ source ='/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/data/images/lamborghini-aventador-2932196_1280.jpg'
4
+ img=640,
5
+ weights='/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/runs/train/exp/weights/best.pt'
6
+ opt = parse_opt(source,img,weights)
7
+ path = main(opt)
8
+ print(path)
yolov9/classify/predict.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Run YOLOv5 classification inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
4
+
5
+ Usage - sources:
6
+ $ python classify/predict.py --weights yolov5s-cls.pt --source 0 # webcam
7
+ img.jpg # image
8
+ vid.mp4 # video
9
+ screen # screenshot
10
+ path/ # directory
11
+ 'path/*.jpg' # glob
12
+ 'https://youtu.be/Zgi9g1ksQHc' # YouTube
13
+ 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
14
+
15
+ Usage - formats:
16
+ $ python classify/predict.py --weights yolov5s-cls.pt # PyTorch
17
+ yolov5s-cls.torchscript # TorchScript
18
+ yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn
19
+ yolov5s-cls_openvino_model # OpenVINO
20
+ yolov5s-cls.engine # TensorRT
21
+ yolov5s-cls.mlmodel # CoreML (macOS-only)
22
+ yolov5s-cls_saved_model # TensorFlow SavedModel
23
+ yolov5s-cls.pb # TensorFlow GraphDef
24
+ yolov5s-cls.tflite # TensorFlow Lite
25
+ yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU
26
+ yolov5s-cls_paddle_model # PaddlePaddle
27
+ """
28
+
29
+ import argparse
30
+ import os
31
+ import platform
32
+ import sys
33
+ from pathlib import Path
34
+
35
+ import torch
36
+ import torch.nn.functional as F
37
+
38
+ FILE = Path(__file__).resolve()
39
+ ROOT = FILE.parents[1] # YOLOv5 root directory
40
+ if str(ROOT) not in sys.path:
41
+ sys.path.append(str(ROOT)) # add ROOT to PATH
42
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
43
+
44
+ from models.common import DetectMultiBackend
45
+ from utils.augmentations import classify_transforms
46
+ from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
47
+ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
48
+ increment_path, print_args, strip_optimizer)
49
+ from utils.plots import Annotator
50
+ from utils.torch_utils import select_device, smart_inference_mode
51
+
52
+
53
+ @smart_inference_mode()
54
+ def run(
55
+ weights=ROOT / 'yolov5s-cls.pt', # model.pt path(s)
56
+ source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
57
+ data=ROOT / 'data/coco128.yaml', # dataset.yaml path
58
+ imgsz=(224, 224), # inference size (height, width)
59
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
60
+ view_img=False, # show results
61
+ save_txt=False, # save results to *.txt
62
+ nosave=False, # do not save images/videos
63
+ augment=False, # augmented inference
64
+ visualize=False, # visualize features
65
+ update=False, # update all models
66
+ project=ROOT / 'runs/predict-cls', # save results to project/name
67
+ name='exp', # save results to project/name
68
+ exist_ok=False, # existing project/name ok, do not increment
69
+ half=False, # use FP16 half-precision inference
70
+ dnn=False, # use OpenCV DNN for ONNX inference
71
+ vid_stride=1, # video frame-rate stride
72
+ ):
73
+ source = str(source)
74
+ save_img = not nosave and not source.endswith('.txt') # save inference images
75
+ is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
76
+ is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
77
+ webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
78
+ screenshot = source.lower().startswith('screen')
79
+ if is_url and is_file:
80
+ source = check_file(source) # download
81
+
82
+ # Directories
83
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
84
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
85
+
86
+ # Load model
87
+ device = select_device(device)
88
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
89
+ stride, names, pt = model.stride, model.names, model.pt
90
+ imgsz = check_img_size(imgsz, s=stride) # check image size
91
+
92
+ # Dataloader
93
+ bs = 1 # batch_size
94
+ if webcam:
95
+ view_img = check_imshow(warn=True)
96
+ dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
97
+ bs = len(dataset)
98
+ elif screenshot:
99
+ dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
100
+ else:
101
+ dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
102
+ vid_path, vid_writer = [None] * bs, [None] * bs
103
+
104
+ # Run inference
105
+ model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
106
+ seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
107
+ for path, im, im0s, vid_cap, s in dataset:
108
+ with dt[0]:
109
+ im = torch.Tensor(im).to(model.device)
110
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
111
+ if len(im.shape) == 3:
112
+ im = im[None] # expand for batch dim
113
+
114
+ # Inference
115
+ with dt[1]:
116
+ results = model(im)
117
+
118
+ # Post-process
119
+ with dt[2]:
120
+ pred = F.softmax(results, dim=1) # probabilities
121
+
122
+ # Process predictions
123
+ for i, prob in enumerate(pred): # per image
124
+ seen += 1
125
+ if webcam: # batch_size >= 1
126
+ p, im0, frame = path[i], im0s[i].copy(), dataset.count
127
+ s += f'{i}: '
128
+ else:
129
+ p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
130
+
131
+ p = Path(p) # to Path
132
+ save_path = str(save_dir / p.name) # im.jpg
133
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
134
+
135
+ s += '%gx%g ' % im.shape[2:] # print string
136
+ annotator = Annotator(im0, example=str(names), pil=True)
137
+
138
+ # Print results
139
+ top5i = prob.argsort(0, descending=True)[:5].tolist() # top 5 indices
140
+ s += f"{', '.join(f'{names[j]} {prob[j]:.2f}' for j in top5i)}, "
141
+
142
+ # Write results
143
+ text = '\n'.join(f'{prob[j]:.2f} {names[j]}' for j in top5i)
144
+ if save_img or view_img: # Add bbox to image
145
+ annotator.text((32, 32), text, txt_color=(255, 255, 255))
146
+ if save_txt: # Write to file
147
+ with open(f'{txt_path}.txt', 'a') as f:
148
+ f.write(text + '\n')
149
+
150
+ # Stream results
151
+ im0 = annotator.result()
152
+ if view_img:
153
+ if platform.system() == 'Linux' and p not in windows:
154
+ windows.append(p)
155
+ cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
156
+ cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
157
+ cv2.imshow(str(p), im0)
158
+ cv2.waitKey(1) # 1 millisecond
159
+
160
+ # Save results (image with detections)
161
+ if save_img:
162
+ if dataset.mode == 'image':
163
+ cv2.imwrite(save_path, im0)
164
+ else: # 'video' or 'stream'
165
+ if vid_path[i] != save_path: # new video
166
+ vid_path[i] = save_path
167
+ if isinstance(vid_writer[i], cv2.VideoWriter):
168
+ vid_writer[i].release() # release previous video writer
169
+ if vid_cap: # video
170
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
171
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
172
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
173
+ else: # stream
174
+ fps, w, h = 30, im0.shape[1], im0.shape[0]
175
+ save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
176
+ vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
177
+ vid_writer[i].write(im0)
178
+
179
+ # Print time (inference-only)
180
+ LOGGER.info(f"{s}{dt[1].dt * 1E3:.1f}ms")
181
+
182
+ # Print results
183
+ t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
184
+ LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
185
+ if save_txt or save_img:
186
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
187
+ LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
188
+ if update:
189
+ strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
190
+
191
+
192
+ def parse_opt():
193
+ parser = argparse.ArgumentParser()
194
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-cls.pt', help='model path(s)')
195
+ parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
196
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
197
+ parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[224], help='inference size h,w')
198
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
199
+ parser.add_argument('--view-img', action='store_true', help='show results')
200
+ parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
201
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
202
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
203
+ parser.add_argument('--visualize', action='store_true', help='visualize features')
204
+ parser.add_argument('--update', action='store_true', help='update all models')
205
+ parser.add_argument('--project', default=ROOT / 'runs/predict-cls', help='save results to project/name')
206
+ parser.add_argument('--name', default='exp', help='save results to project/name')
207
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
208
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
209
+ parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
210
+ parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
211
+ opt = parser.parse_args()
212
+ opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
213
+ print_args(vars(opt))
214
+ return opt
215
+
216
+
217
+ def main(opt):
218
+ check_requirements(exclude=('tensorboard', 'thop'))
219
+ run(**vars(opt))
220
+
221
+
222
+ if __name__ == "__main__":
223
+ opt = parse_opt()
224
+ main(opt)
yolov9/classify/train.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Train a YOLOv5 classifier model on a classification dataset
4
+
5
+ Usage - Single-GPU training:
6
+ $ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224
7
+
8
+ Usage - Multi-GPU DDP training:
9
+ $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
10
+
11
+ Datasets: --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data'
12
+ YOLOv5-cls models: --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt
13
+ Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html
14
+ """
15
+
16
+ import argparse
17
+ import os
18
+ import subprocess
19
+ import sys
20
+ import time
21
+ from copy import deepcopy
22
+ from datetime import datetime
23
+ from pathlib import Path
24
+
25
+ import torch
26
+ import torch.distributed as dist
27
+ import torch.hub as hub
28
+ import torch.optim.lr_scheduler as lr_scheduler
29
+ import torchvision
30
+ from torch.cuda import amp
31
+ from tqdm import tqdm
32
+
33
+ FILE = Path(__file__).resolve()
34
+ ROOT = FILE.parents[1] # YOLOv5 root directory
35
+ if str(ROOT) not in sys.path:
36
+ sys.path.append(str(ROOT)) # add ROOT to PATH
37
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
38
+
39
+ from classify import val as validate
40
+ from models.experimental import attempt_load
41
+ from models.yolo import ClassificationModel, DetectionModel
42
+ from utils.dataloaders import create_classification_dataloader
43
+ from utils.general import (DATASETS_DIR, LOGGER, TQDM_BAR_FORMAT, WorkingDirectory, check_git_info, check_git_status,
44
+ check_requirements, colorstr, download, increment_path, init_seeds, print_args, yaml_save)
45
+ from utils.loggers import GenericLogger
46
+ from utils.plots import imshow_cls
47
+ from utils.torch_utils import (ModelEMA, model_info, reshape_classifier_output, select_device, smart_DDP,
48
+ smart_optimizer, smartCrossEntropyLoss, torch_distributed_zero_first)
49
+
50
+ LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
51
+ RANK = int(os.getenv('RANK', -1))
52
+ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
53
+ GIT_INFO = check_git_info()
54
+
55
+
56
+ def train(opt, device):
57
+ init_seeds(opt.seed + 1 + RANK, deterministic=True)
58
+ save_dir, data, bs, epochs, nw, imgsz, pretrained = \
59
+ opt.save_dir, Path(opt.data), opt.batch_size, opt.epochs, min(os.cpu_count() - 1, opt.workers), \
60
+ opt.imgsz, str(opt.pretrained).lower() == 'true'
61
+ cuda = device.type != 'cpu'
62
+
63
+ # Directories
64
+ wdir = save_dir / 'weights'
65
+ wdir.mkdir(parents=True, exist_ok=True) # make dir
66
+ last, best = wdir / 'last.pt', wdir / 'best.pt'
67
+
68
+ # Save run settings
69
+ yaml_save(save_dir / 'opt.yaml', vars(opt))
70
+
71
+ # Logger
72
+ logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None
73
+
74
+ # Download Dataset
75
+ with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
76
+ data_dir = data if data.is_dir() else (DATASETS_DIR / data)
77
+ if not data_dir.is_dir():
78
+ LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...')
79
+ t = time.time()
80
+ if str(data) == 'imagenet':
81
+ subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
82
+ else:
83
+ url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{data}.zip'
84
+ download(url, dir=data_dir.parent)
85
+ s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
86
+ LOGGER.info(s)
87
+
88
+ # Dataloaders
89
+ nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes
90
+ trainloader = create_classification_dataloader(path=data_dir / 'train',
91
+ imgsz=imgsz,
92
+ batch_size=bs // WORLD_SIZE,
93
+ augment=True,
94
+ cache=opt.cache,
95
+ rank=LOCAL_RANK,
96
+ workers=nw)
97
+
98
+ test_dir = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val
99
+ if RANK in {-1, 0}:
100
+ testloader = create_classification_dataloader(path=test_dir,
101
+ imgsz=imgsz,
102
+ batch_size=bs // WORLD_SIZE * 2,
103
+ augment=False,
104
+ cache=opt.cache,
105
+ rank=-1,
106
+ workers=nw)
107
+
108
+ # Model
109
+ with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
110
+ if Path(opt.model).is_file() or opt.model.endswith('.pt'):
111
+ model = attempt_load(opt.model, device='cpu', fuse=False)
112
+ elif opt.model in torchvision.models.__dict__: # TorchVision models i.e. resnet50, efficientnet_b0
113
+ model = torchvision.models.__dict__[opt.model](weights='IMAGENET1K_V1' if pretrained else None)
114
+ else:
115
+ m = hub.list('ultralytics/yolov5') # + hub.list('pytorch/vision') # models
116
+ raise ModuleNotFoundError(f'--model {opt.model} not found. Available models are: \n' + '\n'.join(m))
117
+ if isinstance(model, DetectionModel):
118
+ LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'")
119
+ model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10) # convert to classification model
120
+ reshape_classifier_output(model, nc) # update class count
121
+ for m in model.modules():
122
+ if not pretrained and hasattr(m, 'reset_parameters'):
123
+ m.reset_parameters()
124
+ if isinstance(m, torch.nn.Dropout) and opt.dropout is not None:
125
+ m.p = opt.dropout # set dropout
126
+ for p in model.parameters():
127
+ p.requires_grad = True # for training
128
+ model = model.to(device)
129
+
130
+ # Info
131
+ if RANK in {-1, 0}:
132
+ model.names = trainloader.dataset.classes # attach class names
133
+ model.transforms = testloader.dataset.torch_transforms # attach inference transforms
134
+ model_info(model)
135
+ if opt.verbose:
136
+ LOGGER.info(model)
137
+ images, labels = next(iter(trainloader))
138
+ file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / 'train_images.jpg')
139
+ logger.log_images(file, name='Train Examples')
140
+ logger.log_graph(model, imgsz) # log model
141
+
142
+ # Optimizer
143
+ optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay)
144
+
145
+ # Scheduler
146
+ lrf = 0.01 # final lr (fraction of lr0)
147
+ # lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # cosine
148
+ lf = lambda x: (1 - x / epochs) * (1 - lrf) + lrf # linear
149
+ scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
150
+ # scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1,
151
+ # final_div_factor=1 / 25 / lrf)
152
+
153
+ # EMA
154
+ ema = ModelEMA(model) if RANK in {-1, 0} else None
155
+
156
+ # DDP mode
157
+ if cuda and RANK != -1:
158
+ model = smart_DDP(model)
159
+
160
+ # Train
161
+ t0 = time.time()
162
+ criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing) # loss function
163
+ best_fitness = 0.0
164
+ scaler = amp.GradScaler(enabled=cuda)
165
+ val = test_dir.stem # 'val' or 'test'
166
+ LOGGER.info(f'Image sizes {imgsz} train, {imgsz} test\n'
167
+ f'Using {nw * WORLD_SIZE} dataloader workers\n'
168
+ f"Logging results to {colorstr('bold', save_dir)}\n"
169
+ f'Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n'
170
+ f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}")
171
+ for epoch in range(epochs): # loop over the dataset multiple times
172
+ tloss, vloss, fitness = 0.0, 0.0, 0.0 # train loss, val loss, fitness
173
+ model.train()
174
+ if RANK != -1:
175
+ trainloader.sampler.set_epoch(epoch)
176
+ pbar = enumerate(trainloader)
177
+ if RANK in {-1, 0}:
178
+ pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT)
179
+ for i, (images, labels) in pbar: # progress bar
180
+ images, labels = images.to(device, non_blocking=True), labels.to(device)
181
+
182
+ # Forward
183
+ with amp.autocast(enabled=cuda): # stability issues when enabled
184
+ loss = criterion(model(images), labels)
185
+
186
+ # Backward
187
+ scaler.scale(loss).backward()
188
+
189
+ # Optimize
190
+ scaler.unscale_(optimizer) # unscale gradients
191
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients
192
+ scaler.step(optimizer)
193
+ scaler.update()
194
+ optimizer.zero_grad()
195
+ if ema:
196
+ ema.update(model)
197
+
198
+ if RANK in {-1, 0}:
199
+ # Print
200
+ tloss = (tloss * i + loss.item()) / (i + 1) # update mean losses
201
+ mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
202
+ pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + ' ' * 36
203
+
204
+ # Test
205
+ if i == len(pbar) - 1: # last batch
206
+ top1, top5, vloss = validate.run(model=ema.ema,
207
+ dataloader=testloader,
208
+ criterion=criterion,
209
+ pbar=pbar) # test accuracy, loss
210
+ fitness = top1 # define fitness as top1 accuracy
211
+
212
+ # Scheduler
213
+ scheduler.step()
214
+
215
+ # Log metrics
216
+ if RANK in {-1, 0}:
217
+ # Best fitness
218
+ if fitness > best_fitness:
219
+ best_fitness = fitness
220
+
221
+ # Log
222
+ metrics = {
223
+ "train/loss": tloss,
224
+ f"{val}/loss": vloss,
225
+ "metrics/accuracy_top1": top1,
226
+ "metrics/accuracy_top5": top5,
227
+ "lr/0": optimizer.param_groups[0]['lr']} # learning rate
228
+ logger.log_metrics(metrics, epoch)
229
+
230
+ # Save model
231
+ final_epoch = epoch + 1 == epochs
232
+ if (not opt.nosave) or final_epoch:
233
+ ckpt = {
234
+ 'epoch': epoch,
235
+ 'best_fitness': best_fitness,
236
+ 'model': deepcopy(ema.ema).half(), # deepcopy(de_parallel(model)).half(),
237
+ 'ema': None, # deepcopy(ema.ema).half(),
238
+ 'updates': ema.updates,
239
+ 'optimizer': None, # optimizer.state_dict(),
240
+ 'opt': vars(opt),
241
+ 'git': GIT_INFO, # {remote, branch, commit} if a git repo
242
+ 'date': datetime.now().isoformat()}
243
+
244
+ # Save last, best and delete
245
+ torch.save(ckpt, last)
246
+ if best_fitness == fitness:
247
+ torch.save(ckpt, best)
248
+ del ckpt
249
+
250
+ # Train complete
251
+ if RANK in {-1, 0} and final_epoch:
252
+ LOGGER.info(f'\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)'
253
+ f"\nResults saved to {colorstr('bold', save_dir)}"
254
+ f"\nPredict: python classify/predict.py --weights {best} --source im.jpg"
255
+ f"\nValidate: python classify/val.py --weights {best} --data {data_dir}"
256
+ f"\nExport: python export.py --weights {best} --include onnx"
257
+ f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')"
258
+ f"\nVisualize: https://netron.app\n")
259
+
260
+ # Plot examples
261
+ images, labels = (x[:25] for x in next(iter(testloader))) # first 25 images and labels
262
+ pred = torch.max(ema.ema(images.to(device)), 1)[1]
263
+ file = imshow_cls(images, labels, pred, model.names, verbose=False, f=save_dir / 'test_images.jpg')
264
+
265
+ # Log results
266
+ meta = {"epochs": epochs, "top1_acc": best_fitness, "date": datetime.now().isoformat()}
267
+ logger.log_images(file, name='Test Examples (true-predicted)', epoch=epoch)
268
+ logger.log_model(best, epochs, metadata=meta)
269
+
270
+
271
+ def parse_opt(known=False):
272
+ parser = argparse.ArgumentParser()
273
+ parser.add_argument('--model', type=str, default='yolov5s-cls.pt', help='initial weights path')
274
+ parser.add_argument('--data', type=str, default='imagenette160', help='cifar10, cifar100, mnist, imagenet, ...')
275
+ parser.add_argument('--epochs', type=int, default=10, help='total training epochs')
276
+ parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs')
277
+ parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='train, val image size (pixels)')
278
+ parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
279
+ parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
280
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
281
+ parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
282
+ parser.add_argument('--project', default=ROOT / 'runs/train-cls', help='save to project/name')
283
+ parser.add_argument('--name', default='exp', help='save to project/name')
284
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
285
+ parser.add_argument('--pretrained', nargs='?', const=True, default=True, help='start from i.e. --pretrained False')
286
+ parser.add_argument('--optimizer', choices=['SGD', 'Adam', 'AdamW', 'RMSProp'], default='Adam', help='optimizer')
287
+ parser.add_argument('--lr0', type=float, default=0.001, help='initial learning rate')
288
+ parser.add_argument('--decay', type=float, default=5e-5, help='weight decay')
289
+ parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon')
290
+ parser.add_argument('--cutoff', type=int, default=None, help='Model layer cutoff index for Classify() head')
291
+ parser.add_argument('--dropout', type=float, default=None, help='Dropout (fraction)')
292
+ parser.add_argument('--verbose', action='store_true', help='Verbose mode')
293
+ parser.add_argument('--seed', type=int, default=0, help='Global training seed')
294
+ parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
295
+ return parser.parse_known_args()[0] if known else parser.parse_args()
296
+
297
+
298
+ def main(opt):
299
+ # Checks
300
+ if RANK in {-1, 0}:
301
+ print_args(vars(opt))
302
+ check_git_status()
303
+ check_requirements()
304
+
305
+ # DDP mode
306
+ device = select_device(opt.device, batch_size=opt.batch_size)
307
+ if LOCAL_RANK != -1:
308
+ assert opt.batch_size != -1, 'AutoBatch is coming soon for classification, please pass a valid --batch-size'
309
+ assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
310
+ assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
311
+ torch.cuda.set_device(LOCAL_RANK)
312
+ device = torch.device('cuda', LOCAL_RANK)
313
+ dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
314
+
315
+ # Parameters
316
+ opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run
317
+
318
+ # Train
319
+ train(opt, device)
320
+
321
+
322
+ def run(**kwargs):
323
+ # Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
324
+ opt = parse_opt(True)
325
+ for k, v in kwargs.items():
326
+ setattr(opt, k, v)
327
+ main(opt)
328
+ return opt
329
+
330
+
331
+ if __name__ == "__main__":
332
+ opt = parse_opt()
333
+ main(opt)
yolov9/classify/val.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Validate a trained YOLOv5 classification model on a classification dataset
4
+
5
+ Usage:
6
+ $ bash data/scripts/get_imagenet.sh --val # download ImageNet val split (6.3G, 50000 images)
7
+ $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224 # validate ImageNet
8
+
9
+ Usage - formats:
10
+ $ python classify/val.py --weights yolov5s-cls.pt # PyTorch
11
+ yolov5s-cls.torchscript # TorchScript
12
+ yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn
13
+ yolov5s-cls_openvino_model # OpenVINO
14
+ yolov5s-cls.engine # TensorRT
15
+ yolov5s-cls.mlmodel # CoreML (macOS-only)
16
+ yolov5s-cls_saved_model # TensorFlow SavedModel
17
+ yolov5s-cls.pb # TensorFlow GraphDef
18
+ yolov5s-cls.tflite # TensorFlow Lite
19
+ yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU
20
+ yolov5s-cls_paddle_model # PaddlePaddle
21
+ """
22
+
23
+ import argparse
24
+ import os
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ import torch
29
+ from tqdm import tqdm
30
+
31
+ FILE = Path(__file__).resolve()
32
+ ROOT = FILE.parents[1] # YOLOv5 root directory
33
+ if str(ROOT) not in sys.path:
34
+ sys.path.append(str(ROOT)) # add ROOT to PATH
35
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
36
+
37
+ from models.common import DetectMultiBackend
38
+ from utils.dataloaders import create_classification_dataloader
39
+ from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_img_size, check_requirements, colorstr,
40
+ increment_path, print_args)
41
+ from utils.torch_utils import select_device, smart_inference_mode
42
+
43
+
44
+ @smart_inference_mode()
45
+ def run(
46
+ data=ROOT / '../datasets/mnist', # dataset dir
47
+ weights=ROOT / 'yolov5s-cls.pt', # model.pt path(s)
48
+ batch_size=128, # batch size
49
+ imgsz=224, # inference size (pixels)
50
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
51
+ workers=8, # max dataloader workers (per RANK in DDP mode)
52
+ verbose=False, # verbose output
53
+ project=ROOT / 'runs/val-cls', # save to project/name
54
+ name='exp', # save to project/name
55
+ exist_ok=False, # existing project/name ok, do not increment
56
+ half=False, # use FP16 half-precision inference
57
+ dnn=False, # use OpenCV DNN for ONNX inference
58
+ model=None,
59
+ dataloader=None,
60
+ criterion=None,
61
+ pbar=None,
62
+ ):
63
+ # Initialize/load model and set device
64
+ training = model is not None
65
+ if training: # called by train.py
66
+ device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
67
+ half &= device.type != 'cpu' # half precision only supported on CUDA
68
+ model.half() if half else model.float()
69
+ else: # called directly
70
+ device = select_device(device, batch_size=batch_size)
71
+
72
+ # Directories
73
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
74
+ save_dir.mkdir(parents=True, exist_ok=True) # make dir
75
+
76
+ # Load model
77
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
78
+ stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
79
+ imgsz = check_img_size(imgsz, s=stride) # check image size
80
+ half = model.fp16 # FP16 supported on limited backends with CUDA
81
+ if engine:
82
+ batch_size = model.batch_size
83
+ else:
84
+ device = model.device
85
+ if not (pt or jit):
86
+ batch_size = 1 # export.py models default to batch-size 1
87
+ LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models')
88
+
89
+ # Dataloader
90
+ data = Path(data)
91
+ test_dir = data / 'test' if (data / 'test').exists() else data / 'val' # data/test or data/val
92
+ dataloader = create_classification_dataloader(path=test_dir,
93
+ imgsz=imgsz,
94
+ batch_size=batch_size,
95
+ augment=False,
96
+ rank=-1,
97
+ workers=workers)
98
+
99
+ model.eval()
100
+ pred, targets, loss, dt = [], [], 0, (Profile(), Profile(), Profile())
101
+ n = len(dataloader) # number of batches
102
+ action = 'validating' if dataloader.dataset.root.stem == 'val' else 'testing'
103
+ desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}"
104
+ bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0)
105
+ with torch.cuda.amp.autocast(enabled=device.type != 'cpu'):
106
+ for images, labels in bar:
107
+ with dt[0]:
108
+ images, labels = images.to(device, non_blocking=True), labels.to(device)
109
+
110
+ with dt[1]:
111
+ y = model(images)
112
+
113
+ with dt[2]:
114
+ pred.append(y.argsort(1, descending=True)[:, :5])
115
+ targets.append(labels)
116
+ if criterion:
117
+ loss += criterion(y, labels)
118
+
119
+ loss /= n
120
+ pred, targets = torch.cat(pred), torch.cat(targets)
121
+ correct = (targets[:, None] == pred).float()
122
+ acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy
123
+ top1, top5 = acc.mean(0).tolist()
124
+
125
+ if pbar:
126
+ pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}"
127
+ if verbose: # all classes
128
+ LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}")
129
+ LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}")
130
+ for i, c in model.names.items():
131
+ aci = acc[targets == i]
132
+ top1i, top5i = aci.mean(0).tolist()
133
+ LOGGER.info(f"{c:>24}{aci.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}")
134
+
135
+ # Print results
136
+ t = tuple(x.t / len(dataloader.dataset.samples) * 1E3 for x in dt) # speeds per image
137
+ shape = (1, 3, imgsz, imgsz)
138
+ LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}' % t)
139
+ LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
140
+
141
+ return top1, top5, loss
142
+
143
+
144
+ def parse_opt():
145
+ parser = argparse.ArgumentParser()
146
+ parser.add_argument('--data', type=str, default=ROOT / '../datasets/mnist', help='dataset path')
147
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-cls.pt', help='model.pt path(s)')
148
+ parser.add_argument('--batch-size', type=int, default=128, help='batch size')
149
+ parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)')
150
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
151
+ parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
152
+ parser.add_argument('--verbose', nargs='?', const=True, default=True, help='verbose output')
153
+ parser.add_argument('--project', default=ROOT / 'runs/val-cls', help='save to project/name')
154
+ parser.add_argument('--name', default='exp', help='save to project/name')
155
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
156
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
157
+ parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
158
+ opt = parser.parse_args()
159
+ print_args(vars(opt))
160
+ return opt
161
+
162
+
163
+ def main(opt):
164
+ check_requirements(exclude=('tensorboard', 'thop'))
165
+ run(**vars(opt))
166
+
167
+
168
+ if __name__ == "__main__":
169
+ opt = parse_opt()
170
+ main(opt)
yolov9/data/coco.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ path: ../datasets/coco # dataset root dir
2
+ train: train2017.txt # train images (relative to 'path') 118287 images
3
+ val: val2017.txt # val images (relative to 'path') 5000 images
4
+ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
5
+
6
+ # Classes
7
+ names:
8
+ 0: person
9
+ 1: bicycle
10
+ 2: car
11
+ 3: motorcycle
12
+ 4: airplane
13
+ 5: bus
14
+ 6: train
15
+ 7: truck
16
+ 8: boat
17
+ 9: traffic light
18
+ 10: fire hydrant
19
+ 11: stop sign
20
+ 12: parking meter
21
+ 13: bench
22
+ 14: bird
23
+ 15: cat
24
+ 16: dog
25
+ 17: horse
26
+ 18: sheep
27
+ 19: cow
28
+ 20: elephant
29
+ 21: bear
30
+ 22: zebra
31
+ 23: giraffe
32
+ 24: backpack
33
+ 25: umbrella
34
+ 26: handbag
35
+ 27: tie
36
+ 28: suitcase
37
+ 29: frisbee
38
+ 30: skis
39
+ 31: snowboard
40
+ 32: sports ball
41
+ 33: kite
42
+ 34: baseball bat
43
+ 35: baseball glove
44
+ 36: skateboard
45
+ 37: surfboard
46
+ 38: tennis racket
47
+ 39: bottle
48
+ 40: wine glass
49
+ 41: cup
50
+ 42: fork
51
+ 43: knife
52
+ 44: spoon
53
+ 45: bowl
54
+ 46: banana
55
+ 47: apple
56
+ 48: sandwich
57
+ 49: orange
58
+ 50: broccoli
59
+ 51: carrot
60
+ 52: hot dog
61
+ 53: pizza
62
+ 54: donut
63
+ 55: cake
64
+ 56: chair
65
+ 57: couch
66
+ 58: potted plant
67
+ 59: bed
68
+ 60: dining table
69
+ 61: toilet
70
+ 62: tv
71
+ 63: laptop
72
+ 64: mouse
73
+ 65: remote
74
+ 66: keyboard
75
+ 67: cell phone
76
+ 68: microwave
77
+ 69: oven
78
+ 70: toaster
79
+ 71: sink
80
+ 72: refrigerator
81
+ 73: book
82
+ 74: clock
83
+ 75: vase
84
+ 76: scissors
85
+ 77: teddy bear
86
+ 78: hair drier
87
+ 79: toothbrush
88
+
89
+
90
+ # stuff names
91
+ stuff_names: [
92
+ 'banner', 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', 'cage',
93
+ 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', 'cloth', 'clothes', 'clouds', 'counter', 'cupboard',
94
+ 'curtain', 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', 'floor-other', 'floor-stone', 'floor-tile',
95
+ 'floor-wood', 'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel', 'ground-other', 'hill',
96
+ 'house', 'leaves', 'light', 'mat', 'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', 'paper',
97
+ 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', 'playingfield', 'railing', 'railroad', 'river', 'road',
98
+ 'rock', 'roof', 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', 'snow', 'solid-other', 'stairs',
99
+ 'stone', 'straw', 'structural-other', 'table', 'tent', 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick',
100
+ 'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'waterdrops',
101
+ 'window-blind', 'window-other', 'wood',
102
+ # other
103
+ 'other',
104
+ # unlabeled
105
+ 'unlabeled'
106
+ ]
107
+
108
+
109
+ # Download script/URL (optional)
110
+ download: |
111
+ from utils.general import download, Path
112
+
113
+
114
+ # Download labels
115
+ #segments = True # segment or box labels
116
+ #dir = Path(yaml['path']) # dataset root dir
117
+ #url = 'https://github.com/WongKinYiu/yolov7/releases/download/v0.1/'
118
+ #urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
119
+ #download(urls, dir=dir.parent)
120
+
121
+ # Download data
122
+ #urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
123
+ # 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
124
+ # 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
125
+ #download(urls, dir=dir / 'images', threads=3)
yolov9/data/hyps/hyp.scratch-high.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 7.5 # box loss gain
9
+ cls: 0.5 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ dfl: 1.5 # dfl loss gain
14
+ iou_t: 0.20 # IoU training threshold
15
+ anchor_t: 5.0 # anchor-multiple threshold
16
+ # anchors: 3 # anchors per output layer (0 to ignore)
17
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
18
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
19
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
20
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
21
+ degrees: 0.0 # image rotation (+/- deg)
22
+ translate: 0.1 # image translation (+/- fraction)
23
+ scale: 0.9 # image scale (+/- gain)
24
+ shear: 0.0 # image shear (+/- deg)
25
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
26
+ flipud: 0.0 # image flip up-down (probability)
27
+ fliplr: 0.5 # image flip left-right (probability)
28
+ mosaic: 1.0 # image mosaic (probability)
29
+ mixup: 0.15 # image mixup (probability)
30
+ copy_paste: 0.3 # segment copy-paste (probability)
yolov9/data/images/a.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e707d2ae1c912beb3e661a4c9f2b1587250e0abaa34bd524a7ceef0cdd26e93d
3
+ size 9563349
yolov9/data/images/b.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d808ec647d78b7adaae4748bd35fb6405331b6586aab78185a0a76bffa81b0
3
+ size 66360367
yolov9/data/images/horses.jpg ADDED
yolov9/data/images/lamborghini-aventador-2932196_1280.jpg ADDED
yolov9/data/vehicle_dataset/classes.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ car
2
+ threewheel
3
+ bus
4
+ truck
5
+ motorbike
6
+ van
yolov9/data/vehicle_dataset/data.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ train: ../data/vehicle_dataset/train
2
+ val: ../data/vehicle_dataset/valid
3
+
4
+ nc: 6
5
+ names: [car,threewheel,bus,truck,motorbike,van]
yolov9/detect.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import platform
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import torch
8
+
9
+ FILE = Path(__file__).resolve()
10
+ ROOT = FILE.parents[0] # YOLO root directory
11
+ if str(ROOT) not in sys.path:
12
+ sys.path.append(str(ROOT)) # add ROOT to PATH
13
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
14
+
15
+ from models.common import DetectMultiBackend
16
+ from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
17
+ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
18
+ increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
19
+ from utils.plots import Annotator, colors, save_one_box
20
+ from utils.torch_utils import select_device, smart_inference_mode
21
+
22
+
23
+ @smart_inference_mode()
24
+ def run(
25
+ weights=ROOT / 'yolo.pt', # model path or triton URL
26
+ source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
27
+ data=ROOT / 'data/coco.yaml', # dataset.yaml path
28
+ imgsz=(640, 640), # inference size (height, width)
29
+ conf_thres=0.25, # confidence threshold
30
+ iou_thres=0.45, # NMS IOU threshold
31
+ max_det=1000, # maximum detections per image
32
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
33
+ view_img=False, # show results
34
+ save_txt=False, # save results to *.txt
35
+ save_conf=False, # save confidences in --save-txt labels
36
+ save_crop=False, # save cropped prediction boxes
37
+ nosave=False, # do not save images/videos
38
+ classes=None, # filter by class: --class 0, or --class 0 2 3
39
+ agnostic_nms=False, # class-agnostic NMS
40
+ augment=False, # augmented inference
41
+ visualize=False, # visualize features
42
+ update=False, # update all models
43
+ project=ROOT / 'runs/detect', # save results to project/name
44
+ name='exp', # save results to project/name
45
+ exist_ok=False, # existing project/name ok, do not increment
46
+ line_thickness=3, # bounding box thickness (pixels)
47
+ hide_labels=False, # hide labels
48
+ hide_conf=False, # hide confidences
49
+ half=False, # use FP16 half-precision inference
50
+ dnn=False, # use OpenCV DNN for ONNX inference
51
+ vid_stride=1, # video frame-rate stride
52
+ ):
53
+ source = str(source)
54
+ save_img = not nosave and not source.endswith('.txt') # save inference images
55
+ is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
56
+ is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
57
+ webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
58
+ screenshot = source.lower().startswith('screen')
59
+ if is_url and is_file:
60
+ source = check_file(source) # download
61
+
62
+ # Directories
63
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
64
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
65
+
66
+ # Load model
67
+ device = select_device(device)
68
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
69
+ stride, names, pt = model.stride, model.names, model.pt
70
+ imgsz = check_img_size(imgsz, s=stride) # check image size
71
+
72
+ # Dataloader
73
+ bs = 1 # batch_size
74
+ if webcam:
75
+ view_img = check_imshow(warn=True)
76
+ dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
77
+ bs = len(dataset)
78
+ elif screenshot:
79
+ dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
80
+ else:
81
+ dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
82
+ vid_path, vid_writer = [None] * bs, [None] * bs
83
+
84
+ # Run inference
85
+ model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
86
+ seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
87
+ for path, im, im0s, vid_cap, s in dataset:
88
+ with dt[0]:
89
+ im = torch.from_numpy(im).to(model.device)
90
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
91
+ im /= 255 # 0 - 255 to 0.0 - 1.0
92
+ if len(im.shape) == 3:
93
+ im = im[None] # expand for batch dim
94
+
95
+ # Inference
96
+ with dt[1]:
97
+ visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
98
+ pred = model(im, augment=augment, visualize=visualize)
99
+
100
+ # NMS
101
+ with dt[2]:
102
+ pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
103
+
104
+ # Second-stage classifier (optional)
105
+ # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
106
+
107
+ # Process predictions
108
+ for i, det in enumerate(pred): # per image
109
+ seen += 1
110
+ if webcam: # batch_size >= 1
111
+ p, im0, frame = path[i], im0s[i].copy(), dataset.count
112
+ s += f'{i}: '
113
+ else:
114
+ p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
115
+
116
+ p = Path(p) # to Path
117
+ save_path = str(save_dir / p.name) # im.jpg
118
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
119
+ s += '%gx%g ' % im.shape[2:] # print string
120
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
121
+ imc = im0.copy() if save_crop else im0 # for save_crop
122
+ annotator = Annotator(im0, line_width=line_thickness, example=str(names))
123
+ if len(det):
124
+ # Rescale boxes from img_size to im0 size
125
+ det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
126
+
127
+ # Print results
128
+ for c in det[:, 5].unique():
129
+ n = (det[:, 5] == c).sum() # detections per class
130
+ s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
131
+
132
+ # Write results
133
+ for *xyxy, conf, cls in reversed(det):
134
+ if save_txt: # Write to file
135
+ xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
136
+ line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
137
+ with open(f'{txt_path}.txt', 'a') as f:
138
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
139
+
140
+ if save_img or save_crop or view_img: # Add bbox to image
141
+ c = int(cls) # integer class
142
+ label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
143
+ annotator.box_label(xyxy, label, color=colors(c, True))
144
+ if save_crop:
145
+ save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
146
+
147
+ # Stream results
148
+ im0 = annotator.result()
149
+ if view_img:
150
+ if platform.system() == 'Linux' and p not in windows:
151
+ windows.append(p)
152
+ cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
153
+ cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
154
+ cv2.imshow(str(p), im0)
155
+ cv2.waitKey(1) # 1 millisecond
156
+
157
+ # Save results (image with detections)
158
+ if save_img:
159
+ if dataset.mode == 'image':
160
+ cv2.imwrite(save_path, im0)
161
+ else: # 'video' or 'stream'
162
+ if vid_path[i] != save_path: # new video
163
+ vid_path[i] = save_path
164
+ if isinstance(vid_writer[i], cv2.VideoWriter):
165
+ vid_writer[i].release() # release previous video writer
166
+ if vid_cap: # video
167
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
168
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
169
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
170
+ else: # stream
171
+ fps, w, h = 30, im0.shape[1], im0.shape[0]
172
+ save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
173
+ vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
174
+ vid_writer[i].write(im0)
175
+
176
+ # Print time (inference-only)
177
+ LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
178
+
179
+ # Print results
180
+ t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
181
+ LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
182
+ if save_txt or save_img:
183
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
184
+ LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
185
+ if update:
186
+ strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
187
+
188
+ print(save_path)
189
+ return save_path
190
+
191
+ def parse_opt():
192
+ parser = argparse.ArgumentParser()
193
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path or triton URL')
194
+ parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
195
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
196
+ parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
197
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
198
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
199
+ parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
200
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
201
+ parser.add_argument('--view-img', action='store_true', help='show results')
202
+ parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
203
+ parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
204
+ parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
205
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
206
+ parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
207
+ parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
208
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
209
+ parser.add_argument('--visualize', action='store_true', help='visualize features')
210
+ parser.add_argument('--update', action='store_true', help='update all models')
211
+ parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
212
+ parser.add_argument('--name', default='exp', help='save results to project/name')
213
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
214
+ parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
215
+ parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
216
+ parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
217
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
218
+ parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
219
+ parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
220
+ opt = parser.parse_args()
221
+ opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
222
+ print_args(vars(opt))
223
+ return opt
224
+
225
+
226
+ def main(opt):
227
+ # check_requirements(exclude=('tensorboard', 'thop'))
228
+ run(**vars(opt))
229
+
230
+
231
+ if __name__ == "__main__":
232
+ opt = parse_opt()
233
+ main(opt)
yolov9/detect_dual.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import platform
4
+ import sys
5
+ from pathlib import Path
6
+ from PIL import Image
7
+ import torch
8
+
9
+ FILE = Path(__file__).resolve()
10
+ ROOT = FILE.parents[0] # YOLO root directory
11
+ if str(ROOT) not in sys.path:
12
+ sys.path.append(str(ROOT)) # add ROOT to PATH
13
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
14
+
15
+ from models.common import DetectMultiBackend
16
+ from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
17
+ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
18
+ increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
19
+ from utils.plots import Annotator, colors, save_one_box
20
+ from utils.torch_utils import select_device, smart_inference_mode
21
+
22
+
23
+ @smart_inference_mode()
24
+ def run(
25
+ weights=ROOT / 'yolo.pt', # model path or triton URL
26
+ source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
27
+ data=ROOT / 'data/coco.yaml', # dataset.yaml path
28
+ imgsz=(640, 640), # inference size (height, width)
29
+ conf_thres=0.25, # confidence threshold
30
+ iou_thres=0.45, # NMS IOU threshold
31
+ max_det=1000, # maximum detections per image
32
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
33
+ view_img=False, # show results
34
+ save_txt=False, # save results to *.txt
35
+ save_conf=False, # save confidences in --save-txt labels
36
+ save_crop=False, # save cropped prediction boxes
37
+ nosave=False, # do not save images/videos
38
+ classes=None, # filter by class: --class 0, or --class 0 2 3
39
+ agnostic_nms=False, # class-agnostic NMS
40
+ augment=False, # augmented inference
41
+ visualize=False, # visualize features
42
+ update=False, # update all models
43
+ project=ROOT / 'runs/detect', # save results to project/name
44
+ name='exp', # save results to project/name
45
+ exist_ok=False, # existing project/name ok, do not increment
46
+ line_thickness=3, # bounding box thickness (pixels)
47
+ hide_labels=False, # hide labels
48
+ hide_conf=False, # hide confidences
49
+ half=False, # use FP16 half-precision inference
50
+ dnn=False, # use OpenCV DNN for ONNX inference
51
+ vid_stride=1, # video frame-rate stride
52
+ ):
53
+ source = str(source)
54
+ Image.open(source)
55
+ save_img = not nosave and not source.endswith('.txt') # save inference images
56
+ is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
57
+ is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
58
+ webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
59
+ screenshot = source.lower().startswith('screen')
60
+ if is_url and is_file:
61
+ source = check_file(source) # download
62
+
63
+ # Directories
64
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
65
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
66
+
67
+ # Load model
68
+ device = select_device(device)
69
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
70
+ stride, names, pt = model.stride, model.names, model.pt
71
+ imgsz = check_img_size(imgsz, s=stride) # check image size
72
+
73
+ # Dataloader
74
+ bs = 1 # batch_size
75
+ if webcam:
76
+ view_img = check_imshow(warn=True)
77
+ dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
78
+ bs = len(dataset)
79
+ elif screenshot:
80
+ dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
81
+ else:
82
+ dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
83
+ vid_path, vid_writer = [None] * bs, [None] * bs
84
+
85
+ # Run inference
86
+ model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
87
+ seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
88
+ for path, im, im0s, vid_cap, s in dataset:
89
+ with dt[0]:
90
+ im = torch.from_numpy(im).to(model.device)
91
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
92
+ im /= 255 # 0 - 255 to 0.0 - 1.0
93
+ if len(im.shape) == 3:
94
+ im = im[None] # expand for batch dim
95
+
96
+ # Inference
97
+ with dt[1]:
98
+ visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
99
+ pred = model(im, augment=augment, visualize=visualize)
100
+ pred = pred[0][1]
101
+
102
+ # NMS
103
+ with dt[2]:
104
+ pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
105
+
106
+ # Second-stage classifier (optional)
107
+ # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
108
+
109
+ # Process predictions
110
+ for i, det in enumerate(pred): # per image
111
+ seen += 1
112
+ if webcam: # batch_size >= 1
113
+ p, im0, frame = path[i], im0s[i].copy(), dataset.count
114
+ s += f'{i}: '
115
+ else:
116
+ p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
117
+
118
+ p = Path(p) # to Path
119
+ save_path = str(save_dir / p.name) # im.jpg
120
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
121
+ s += '%gx%g ' % im.shape[2:] # print string
122
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
123
+ imc = im0.copy() if save_crop else im0 # for save_crop
124
+ annotator = Annotator(im0, line_width=line_thickness, example=str(names))
125
+ if len(det):
126
+ # Rescale boxes from img_size to im0 size
127
+ det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
128
+
129
+ # Print results
130
+ for c in det[:, 5].unique():
131
+ n = (det[:, 5] == c).sum() # detections per class
132
+ s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
133
+
134
+ # Write results
135
+ for *xyxy, conf, cls in reversed(det):
136
+ if save_txt: # Write to file
137
+ xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
138
+ line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
139
+ with open(f'{txt_path}.txt', 'a') as f:
140
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
141
+
142
+ if save_img or save_crop or view_img: # Add bbox to image
143
+ c = int(cls) # integer class
144
+ label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
145
+ annotator.box_label(xyxy, label, color=colors(c, True))
146
+ if save_crop:
147
+ save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
148
+
149
+ # Stream results
150
+ im0 = annotator.result()
151
+ if view_img:
152
+ if platform.system() == 'Linux' and p not in windows:
153
+ windows.append(p)
154
+ cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
155
+ cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
156
+ cv2.imshow(str(p), im0)
157
+ cv2.waitKey(1) # 1 millisecond
158
+
159
+ # Save results (image with detections)
160
+ if save_img:
161
+ if dataset.mode == 'image':
162
+ cv2.imwrite(save_path, im0)
163
+ else: # 'video' or 'stream'
164
+ if vid_path[i] != save_path: # new video
165
+ vid_path[i] = save_path
166
+ if isinstance(vid_writer[i], cv2.VideoWriter):
167
+ vid_writer[i].release() # release previous video writer
168
+ if vid_cap: # video
169
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
170
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
171
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
172
+ else: # stream
173
+ fps, w, h = 30, im0.shape[1], im0.shape[0]
174
+ save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
175
+ vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
176
+ vid_writer[i].write(im0)
177
+
178
+ # Print time (inference-only)
179
+ LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
180
+
181
+ # Print results
182
+ t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
183
+ LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
184
+ if save_txt or save_img:
185
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
186
+ LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
187
+ if update:
188
+ strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
189
+
190
+ return save_path
191
+
192
+ def parse_opt(source,img,weights):
193
+ parser = argparse.ArgumentParser()
194
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path or triton URL')
195
+ parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
196
+ #parser.add_argument('--weights', nargs='+', type=str, default=weights, help='model path or triton URL')
197
+ #parser.add_argument('--source', type=str, default=source, help='file/dir/URL/glob/screen/0(webcam)')
198
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
199
+ parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
200
+ #parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=img, help='inference size h,w')
201
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
202
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
203
+ parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
204
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
205
+ parser.add_argument('--view-img', action='store_true', help='show results')
206
+ parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
207
+ parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
208
+ parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
209
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
210
+ parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
211
+ parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
212
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
213
+ parser.add_argument('--visualize', action='store_true', help='visualize features')
214
+ parser.add_argument('--update', action='store_true', help='update all models')
215
+ parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
216
+ parser.add_argument('--name', default='exp', help='save results to project/name')
217
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
218
+ parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
219
+ parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
220
+ parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
221
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
222
+ parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
223
+ parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
224
+ opt = parser.parse_args()
225
+ opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
226
+ print_args(vars(opt))
227
+ return opt
228
+
229
+
230
+ def main(opt):
231
+ # check_requirements(exclude=('tensorboard', 'thop'))
232
+ path = run(**vars(opt))
233
+ print(path)
234
+ return path
235
+
236
+
237
+ #if __name__ == "__main__":
238
+ """
239
+ !python detect_dual.py
240
+ --source /home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/data/images/lamborghini-aventador-2932196_1280.jpg
241
+ --img 640 --device cpu
242
+ --weights /home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/runs/train/exp/weights/best.pt
243
+ --name yolov9_c_640_detect1
244
+
245
+ """
246
+
247
+ """
248
+ detect_dual:
249
+ weights=['/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/runs/train/exp/weights/best.pt'],
250
+ source=/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/data/images/lamborghini-aventador-2932196_1280.jpg,
251
+ data=data/coco128.yaml,
252
+ imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=cpu, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=yolov9_c_640_detect15, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
253
+
254
+ """
255
+ """
256
+ print("Hi")
257
+ source ='/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/data/images/lamborghini-aventador-2932196_1280.jpg'
258
+ img=640,
259
+ weights='/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/runs/train/exp/weights/best.pt'
260
+ opt = parse_opt(source,img,weights)
261
+ main(opt)
262
+ """
263
+ # import subprocess
264
+ # import os
265
+ # HOME = os.getcwd()
266
+ # print(HOME)
267
+ # # # Construct a list of strings containing the arguments and their values
268
+ # args_list = [
269
+ # "yolov9/detect_dual.py",
270
+ # "--source", "/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/data/images/lamborghini-aventador-2932196_1280.jpg",
271
+ # "--img", "640",
272
+ # "--device", "cpu",
273
+ # "--weights", "/home/shiv-nlp-mldl-cv/Documents/ERA2Code/ERA2-Session-15_Inference_Code/yolov9/runs/train/exp/weights/best.pt",
274
+ # "--name", "yolov9_c_640_detect15"
275
+ # ]
276
+
277
+ # # # Execute the script with subprocess
278
+ # subprocess.run(["python"] + args_list)
279
+
yolov9/export.py ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import contextlib
3
+ import json
4
+ import os
5
+ import platform
6
+ import re
7
+ import subprocess
8
+ import sys
9
+ import time
10
+ import warnings
11
+ from pathlib import Path
12
+
13
+ import pandas as pd
14
+ import torch
15
+ from torch.utils.mobile_optimizer import optimize_for_mobile
16
+
17
+ FILE = Path(__file__).resolve()
18
+ ROOT = FILE.parents[0] # YOLO root directory
19
+ if str(ROOT) not in sys.path:
20
+ sys.path.append(str(ROOT)) # add ROOT to PATH
21
+ if platform.system() != 'Windows':
22
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
23
+
24
+ from models.experimental import attempt_load, End2End
25
+ from models.yolo import ClassificationModel, Detect, DDetect, DualDetect, DualDDetect, DetectionModel, SegmentationModel
26
+ from utils.dataloaders import LoadImages
27
+ from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version,
28
+ check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save)
29
+ from utils.torch_utils import select_device, smart_inference_mode
30
+
31
+ MACOS = platform.system() == 'Darwin' # macOS environment
32
+
33
+
34
+ def export_formats():
35
+ # YOLO export formats
36
+ x = [
37
+ ['PyTorch', '-', '.pt', True, True],
38
+ ['TorchScript', 'torchscript', '.torchscript', True, True],
39
+ ['ONNX', 'onnx', '.onnx', True, True],
40
+ ['ONNX END2END', 'onnx_end2end', '_end2end.onnx', True, True],
41
+ ['OpenVINO', 'openvino', '_openvino_model', True, False],
42
+ ['TensorRT', 'engine', '.engine', False, True],
43
+ ['CoreML', 'coreml', '.mlmodel', True, False],
44
+ ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
45
+ ['TensorFlow GraphDef', 'pb', '.pb', True, True],
46
+ ['TensorFlow Lite', 'tflite', '.tflite', True, False],
47
+ ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
48
+ ['TensorFlow.js', 'tfjs', '_web_model', False, False],
49
+ ['PaddlePaddle', 'paddle', '_paddle_model', True, True],]
50
+ return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
51
+
52
+
53
+ def try_export(inner_func):
54
+ # YOLO export decorator, i..e @try_export
55
+ inner_args = get_default_args(inner_func)
56
+
57
+ def outer_func(*args, **kwargs):
58
+ prefix = inner_args['prefix']
59
+ try:
60
+ with Profile() as dt:
61
+ f, model = inner_func(*args, **kwargs)
62
+ LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')
63
+ return f, model
64
+ except Exception as e:
65
+ LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')
66
+ return None, None
67
+
68
+ return outer_func
69
+
70
+
71
+ @try_export
72
+ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
73
+ # YOLO TorchScript model export
74
+ LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
75
+ f = file.with_suffix('.torchscript')
76
+
77
+ ts = torch.jit.trace(model, im, strict=False)
78
+ d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
79
+ extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
80
+ if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
81
+ optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
82
+ else:
83
+ ts.save(str(f), _extra_files=extra_files)
84
+ return f, None
85
+
86
+
87
+ @try_export
88
+ def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
89
+ # YOLO ONNX export
90
+ check_requirements('onnx')
91
+ import onnx
92
+
93
+ LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
94
+ f = file.with_suffix('.onnx')
95
+
96
+ output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']
97
+ if dynamic:
98
+ dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640)
99
+ if isinstance(model, SegmentationModel):
100
+ dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
101
+ dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160)
102
+ elif isinstance(model, DetectionModel):
103
+ dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
104
+
105
+ torch.onnx.export(
106
+ model.cpu() if dynamic else model, # --dynamic only compatible with cpu
107
+ im.cpu() if dynamic else im,
108
+ f,
109
+ verbose=False,
110
+ opset_version=opset,
111
+ do_constant_folding=True,
112
+ input_names=['images'],
113
+ output_names=output_names,
114
+ dynamic_axes=dynamic or None)
115
+
116
+ # Checks
117
+ model_onnx = onnx.load(f) # load onnx model
118
+ onnx.checker.check_model(model_onnx) # check onnx model
119
+
120
+ # Metadata
121
+ d = {'stride': int(max(model.stride)), 'names': model.names}
122
+ for k, v in d.items():
123
+ meta = model_onnx.metadata_props.add()
124
+ meta.key, meta.value = k, str(v)
125
+ onnx.save(model_onnx, f)
126
+
127
+ # Simplify
128
+ if simplify:
129
+ try:
130
+ cuda = torch.cuda.is_available()
131
+ check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
132
+ import onnxsim
133
+
134
+ LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
135
+ model_onnx, check = onnxsim.simplify(model_onnx)
136
+ assert check, 'assert check failed'
137
+ onnx.save(model_onnx, f)
138
+ except Exception as e:
139
+ LOGGER.info(f'{prefix} simplifier failure: {e}')
140
+ return f, model_onnx
141
+
142
+
143
+ @try_export
144
+ def export_onnx_end2end(model, im, file, simplify, topk_all, iou_thres, conf_thres, device, labels, prefix=colorstr('ONNX END2END:')):
145
+ # YOLO ONNX export
146
+ check_requirements('onnx')
147
+ import onnx
148
+ LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
149
+ f = os.path.splitext(file)[0] + "-end2end.onnx"
150
+ batch_size = 'batch'
151
+
152
+ dynamic_axes = {'images': {0 : 'batch', 2: 'height', 3:'width'}, } # variable length axes
153
+
154
+ output_axes = {
155
+ 'num_dets': {0: 'batch'},
156
+ 'det_boxes': {0: 'batch'},
157
+ 'det_scores': {0: 'batch'},
158
+ 'det_classes': {0: 'batch'},
159
+ }
160
+ dynamic_axes.update(output_axes)
161
+ model = End2End(model, topk_all, iou_thres, conf_thres, None ,device, labels)
162
+
163
+ output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes']
164
+ shapes = [ batch_size, 1, batch_size, topk_all, 4,
165
+ batch_size, topk_all, batch_size, topk_all]
166
+
167
+ torch.onnx.export(model,
168
+ im,
169
+ f,
170
+ verbose=False,
171
+ export_params=True, # store the trained parameter weights inside the model file
172
+ opset_version=12,
173
+ do_constant_folding=True, # whether to execute constant folding for optimization
174
+ input_names=['images'],
175
+ output_names=output_names,
176
+ dynamic_axes=dynamic_axes)
177
+
178
+ # Checks
179
+ model_onnx = onnx.load(f) # load onnx model
180
+ onnx.checker.check_model(model_onnx) # check onnx model
181
+ for i in model_onnx.graph.output:
182
+ for j in i.type.tensor_type.shape.dim:
183
+ j.dim_param = str(shapes.pop(0))
184
+
185
+ if simplify:
186
+ try:
187
+ import onnxsim
188
+
189
+ print('\nStarting to simplify ONNX...')
190
+ model_onnx, check = onnxsim.simplify(model_onnx)
191
+ assert check, 'assert check failed'
192
+ except Exception as e:
193
+ print(f'Simplifier failure: {e}')
194
+
195
+ # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
196
+ onnx.save(model_onnx,f)
197
+ print('ONNX export success, saved as %s' % f)
198
+ return f, model_onnx
199
+
200
+
201
+ @try_export
202
+ def export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')):
203
+ # YOLO OpenVINO export
204
+ check_requirements('openvino-dev') # requires openvino-dev: https://pypi.org/project/openvino-dev/
205
+ import openvino.inference_engine as ie
206
+
207
+ LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')
208
+ f = str(file).replace('.pt', f'_openvino_model{os.sep}')
209
+
210
+ #cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}"
211
+ #cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} {"--compress_to_fp16" if half else ""}"
212
+ half_arg = "--compress_to_fp16" if half else ""
213
+ cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} {half_arg}"
214
+ subprocess.run(cmd.split(), check=True, env=os.environ) # export
215
+ yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml
216
+ return f, None
217
+
218
+
219
+ @try_export
220
+ def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):
221
+ # YOLO Paddle export
222
+ check_requirements(('paddlepaddle', 'x2paddle'))
223
+ import x2paddle
224
+ from x2paddle.convert import pytorch2paddle
225
+
226
+ LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...')
227
+ f = str(file).replace('.pt', f'_paddle_model{os.sep}')
228
+
229
+ pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im]) # export
230
+ yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml
231
+ return f, None
232
+
233
+
234
+ @try_export
235
+ def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
236
+ # YOLO CoreML export
237
+ check_requirements('coremltools')
238
+ import coremltools as ct
239
+
240
+ LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
241
+ f = file.with_suffix('.mlmodel')
242
+
243
+ ts = torch.jit.trace(model, im, strict=False) # TorchScript model
244
+ ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
245
+ bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
246
+ if bits < 32:
247
+ if MACOS: # quantization only supported on macOS
248
+ with warnings.catch_warnings():
249
+ warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning
250
+ ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
251
+ else:
252
+ print(f'{prefix} quantization only supported on macOS, skipping...')
253
+ ct_model.save(f)
254
+ return f, ct_model
255
+
256
+
257
+ @try_export
258
+ def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
259
+ # YOLO TensorRT export https://developer.nvidia.com/tensorrt
260
+ assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'
261
+ try:
262
+ import tensorrt as trt
263
+ except Exception:
264
+ if platform.system() == 'Linux':
265
+ check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')
266
+ import tensorrt as trt
267
+
268
+ if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012
269
+ grid = model.model[-1].anchor_grid
270
+ model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]
271
+ export_onnx(model, im, file, 12, dynamic, simplify) # opset 12
272
+ model.model[-1].anchor_grid = grid
273
+ else: # TensorRT >= 8
274
+ check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0
275
+ export_onnx(model, im, file, 12, dynamic, simplify) # opset 12
276
+ onnx = file.with_suffix('.onnx')
277
+
278
+ LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
279
+ assert onnx.exists(), f'failed to export ONNX file: {onnx}'
280
+ f = file.with_suffix('.engine') # TensorRT engine file
281
+ logger = trt.Logger(trt.Logger.INFO)
282
+ if verbose:
283
+ logger.min_severity = trt.Logger.Severity.VERBOSE
284
+
285
+ builder = trt.Builder(logger)
286
+ config = builder.create_builder_config()
287
+ config.max_workspace_size = workspace * 1 << 30
288
+ # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice
289
+
290
+ flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
291
+ network = builder.create_network(flag)
292
+ parser = trt.OnnxParser(network, logger)
293
+ if not parser.parse_from_file(str(onnx)):
294
+ raise RuntimeError(f'failed to load ONNX file: {onnx}')
295
+
296
+ inputs = [network.get_input(i) for i in range(network.num_inputs)]
297
+ outputs = [network.get_output(i) for i in range(network.num_outputs)]
298
+ for inp in inputs:
299
+ LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
300
+ for out in outputs:
301
+ LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
302
+
303
+ if dynamic:
304
+ if im.shape[0] <= 1:
305
+ LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument")
306
+ profile = builder.create_optimization_profile()
307
+ for inp in inputs:
308
+ profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)
309
+ config.add_optimization_profile(profile)
310
+
311
+ LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}')
312
+ if builder.platform_has_fast_fp16 and half:
313
+ config.set_flag(trt.BuilderFlag.FP16)
314
+ with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
315
+ t.write(engine.serialize())
316
+ return f, None
317
+
318
+
319
+ @try_export
320
+ def export_saved_model(model,
321
+ im,
322
+ file,
323
+ dynamic,
324
+ tf_nms=False,
325
+ agnostic_nms=False,
326
+ topk_per_class=100,
327
+ topk_all=100,
328
+ iou_thres=0.45,
329
+ conf_thres=0.25,
330
+ keras=False,
331
+ prefix=colorstr('TensorFlow SavedModel:')):
332
+ # YOLO TensorFlow SavedModel export
333
+ try:
334
+ import tensorflow as tf
335
+ except Exception:
336
+ check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}")
337
+ import tensorflow as tf
338
+ from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
339
+
340
+ from models.tf import TFModel
341
+
342
+ LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
343
+ f = str(file).replace('.pt', '_saved_model')
344
+ batch_size, ch, *imgsz = list(im.shape) # BCHW
345
+
346
+ tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
347
+ im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow
348
+ _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
349
+ inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)
350
+ outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
351
+ keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
352
+ keras_model.trainable = False
353
+ keras_model.summary()
354
+ if keras:
355
+ keras_model.save(f, save_format='tf')
356
+ else:
357
+ spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)
358
+ m = tf.function(lambda x: keras_model(x)) # full model
359
+ m = m.get_concrete_function(spec)
360
+ frozen_func = convert_variables_to_constants_v2(m)
361
+ tfm = tf.Module()
362
+ tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec])
363
+ tfm.__call__(im)
364
+ tf.saved_model.save(tfm,
365
+ f,
366
+ options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version(
367
+ tf.__version__, '2.6') else tf.saved_model.SaveOptions())
368
+ return f, keras_model
369
+
370
+
371
+ @try_export
372
+ def export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')):
373
+ # YOLO TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
374
+ import tensorflow as tf
375
+ from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
376
+
377
+ LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
378
+ f = file.with_suffix('.pb')
379
+
380
+ m = tf.function(lambda x: keras_model(x)) # full model
381
+ m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
382
+ frozen_func = convert_variables_to_constants_v2(m)
383
+ frozen_func.graph.as_graph_def()
384
+ tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
385
+ return f, None
386
+
387
+
388
+ @try_export
389
+ def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
390
+ # YOLOv5 TensorFlow Lite export
391
+ import tensorflow as tf
392
+
393
+ LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
394
+ batch_size, ch, *imgsz = list(im.shape) # BCHW
395
+ f = str(file).replace('.pt', '-fp16.tflite')
396
+
397
+ converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
398
+ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
399
+ converter.target_spec.supported_types = [tf.float16]
400
+ converter.optimizations = [tf.lite.Optimize.DEFAULT]
401
+ if int8:
402
+ from models.tf import representative_dataset_gen
403
+ dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False)
404
+ converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100)
405
+ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
406
+ converter.target_spec.supported_types = []
407
+ converter.inference_input_type = tf.uint8 # or tf.int8
408
+ converter.inference_output_type = tf.uint8 # or tf.int8
409
+ converter.experimental_new_quantizer = True
410
+ f = str(file).replace('.pt', '-int8.tflite')
411
+ if nms or agnostic_nms:
412
+ converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
413
+
414
+ tflite_model = converter.convert()
415
+ open(f, "wb").write(tflite_model)
416
+ return f, None
417
+
418
+
419
+ @try_export
420
+ def export_edgetpu(file, prefix=colorstr('Edge TPU:')):
421
+ # YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
422
+ cmd = 'edgetpu_compiler --version'
423
+ help_url = 'https://coral.ai/docs/edgetpu/compiler/'
424
+ assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'
425
+ if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0:
426
+ LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')
427
+ sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system
428
+ for c in (
429
+ 'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',
430
+ 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',
431
+ 'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'):
432
+ subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True)
433
+ ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
434
+
435
+ LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')
436
+ f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model
437
+ f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model
438
+
439
+ cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {file.parent} {f_tfl}"
440
+ subprocess.run(cmd.split(), check=True)
441
+ return f, None
442
+
443
+
444
+ @try_export
445
+ def export_tfjs(file, prefix=colorstr('TensorFlow.js:')):
446
+ # YOLO TensorFlow.js export
447
+ check_requirements('tensorflowjs')
448
+ import tensorflowjs as tfjs
449
+
450
+ LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
451
+ f = str(file).replace('.pt', '_web_model') # js dir
452
+ f_pb = file.with_suffix('.pb') # *.pb path
453
+ f_json = f'{f}/model.json' # *.json path
454
+
455
+ cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \
456
+ f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}'
457
+ subprocess.run(cmd.split())
458
+
459
+ json = Path(f_json).read_text()
460
+ with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order
461
+ subst = re.sub(
462
+ r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
463
+ r'"Identity.?.?": {"name": "Identity.?.?"}, '
464
+ r'"Identity.?.?": {"name": "Identity.?.?"}, '
465
+ r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, '
466
+ r'"Identity_1": {"name": "Identity_1"}, '
467
+ r'"Identity_2": {"name": "Identity_2"}, '
468
+ r'"Identity_3": {"name": "Identity_3"}}}', json)
469
+ j.write(subst)
470
+ return f, None
471
+
472
+
473
+ def add_tflite_metadata(file, metadata, num_outputs):
474
+ # Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata
475
+ with contextlib.suppress(ImportError):
476
+ # check_requirements('tflite_support')
477
+ from tflite_support import flatbuffers
478
+ from tflite_support import metadata as _metadata
479
+ from tflite_support import metadata_schema_py_generated as _metadata_fb
480
+
481
+ tmp_file = Path('/tmp/meta.txt')
482
+ with open(tmp_file, 'w') as meta_f:
483
+ meta_f.write(str(metadata))
484
+
485
+ model_meta = _metadata_fb.ModelMetadataT()
486
+ label_file = _metadata_fb.AssociatedFileT()
487
+ label_file.name = tmp_file.name
488
+ model_meta.associatedFiles = [label_file]
489
+
490
+ subgraph = _metadata_fb.SubGraphMetadataT()
491
+ subgraph.inputTensorMetadata = [_metadata_fb.TensorMetadataT()]
492
+ subgraph.outputTensorMetadata = [_metadata_fb.TensorMetadataT()] * num_outputs
493
+ model_meta.subgraphMetadata = [subgraph]
494
+
495
+ b = flatbuffers.Builder(0)
496
+ b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
497
+ metadata_buf = b.Output()
498
+
499
+ populator = _metadata.MetadataPopulator.with_model_file(file)
500
+ populator.load_metadata_buffer(metadata_buf)
501
+ populator.load_associated_files([str(tmp_file)])
502
+ populator.populate()
503
+ tmp_file.unlink()
504
+
505
+
506
+ @smart_inference_mode()
507
+ def run(
508
+ data=ROOT / 'data/coco.yaml', # 'dataset.yaml path'
509
+ weights=ROOT / 'yolo.pt', # weights path
510
+ imgsz=(640, 640), # image (height, width)
511
+ batch_size=1, # batch size
512
+ device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu
513
+ include=('torchscript', 'onnx'), # include formats
514
+ half=False, # FP16 half-precision export
515
+ inplace=False, # set YOLO Detect() inplace=True
516
+ keras=False, # use Keras
517
+ optimize=False, # TorchScript: optimize for mobile
518
+ int8=False, # CoreML/TF INT8 quantization
519
+ dynamic=False, # ONNX/TF/TensorRT: dynamic axes
520
+ simplify=False, # ONNX: simplify model
521
+ opset=12, # ONNX: opset version
522
+ verbose=False, # TensorRT: verbose log
523
+ workspace=4, # TensorRT: workspace size (GB)
524
+ nms=False, # TF: add NMS to model
525
+ agnostic_nms=False, # TF: add agnostic NMS to model
526
+ topk_per_class=100, # TF.js NMS: topk per class to keep
527
+ topk_all=100, # TF.js NMS: topk for all classes to keep
528
+ iou_thres=0.45, # TF.js NMS: IoU threshold
529
+ conf_thres=0.25, # TF.js NMS: confidence threshold
530
+ ):
531
+ t = time.time()
532
+ include = [x.lower() for x in include] # to lowercase
533
+ fmts = tuple(export_formats()['Argument'][1:]) # --include arguments
534
+ flags = [x in include for x in fmts]
535
+ assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'
536
+ jit, onnx, onnx_end2end, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags # export booleans
537
+ file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights
538
+
539
+ # Load PyTorch model
540
+ device = select_device(device)
541
+ if half:
542
+ assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0'
543
+ assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
544
+ model = attempt_load(weights, device=device, inplace=True, fuse=True) # load FP32 model
545
+
546
+ # Checks
547
+ imgsz *= 2 if len(imgsz) == 1 else 1 # expand
548
+ if optimize:
549
+ assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'
550
+
551
+ # Input
552
+ gs = int(max(model.stride)) # grid size (max stride)
553
+ imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples
554
+ im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection
555
+
556
+ # Update model
557
+ model.eval()
558
+ for k, m in model.named_modules():
559
+ if isinstance(m, (Detect, DDetect, DualDetect, DualDDetect)):
560
+ m.inplace = inplace
561
+ m.dynamic = dynamic
562
+ m.export = True
563
+
564
+ for _ in range(2):
565
+ y = model(im) # dry runs
566
+ if half and not coreml:
567
+ im, model = im.half(), model.half() # to FP16
568
+ shape = tuple((y[0] if isinstance(y, (tuple, list)) else y).shape) # model output shape
569
+ metadata = {'stride': int(max(model.stride)), 'names': model.names} # model metadata
570
+ LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")
571
+
572
+ # Exports
573
+ f = [''] * len(fmts) # exported filenames
574
+ warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning
575
+ if jit: # TorchScript
576
+ f[0], _ = export_torchscript(model, im, file, optimize)
577
+ if engine: # TensorRT required before ONNX
578
+ f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose)
579
+ if onnx or xml: # OpenVINO requires ONNX
580
+ f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify)
581
+ if onnx_end2end:
582
+ if isinstance(model, DetectionModel):
583
+ labels = model.names
584
+ f[2], _ = export_onnx_end2end(model, im, file, simplify, topk_all, iou_thres, conf_thres, device, len(labels))
585
+ else:
586
+ raise RuntimeError("The model is not a DetectionModel.")
587
+ if xml: # OpenVINO
588
+ f[3], _ = export_openvino(file, metadata, half)
589
+ if coreml: # CoreML
590
+ f[4], _ = export_coreml(model, im, file, int8, half)
591
+ if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
592
+ assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'
593
+ assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'
594
+ f[5], s_model = export_saved_model(model.cpu(),
595
+ im,
596
+ file,
597
+ dynamic,
598
+ tf_nms=nms or agnostic_nms or tfjs,
599
+ agnostic_nms=agnostic_nms or tfjs,
600
+ topk_per_class=topk_per_class,
601
+ topk_all=topk_all,
602
+ iou_thres=iou_thres,
603
+ conf_thres=conf_thres,
604
+ keras=keras)
605
+ if pb or tfjs: # pb prerequisite to tfjs
606
+ f[6], _ = export_pb(s_model, file)
607
+ if tflite or edgetpu:
608
+ f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms)
609
+ if edgetpu:
610
+ f[8], _ = export_edgetpu(file)
611
+ add_tflite_metadata(f[8] or f[7], metadata, num_outputs=len(s_model.outputs))
612
+ if tfjs:
613
+ f[9], _ = export_tfjs(file)
614
+ if paddle: # PaddlePaddle
615
+ f[10], _ = export_paddle(model, im, file, metadata)
616
+
617
+ # Finish
618
+ f = [str(x) for x in f if x] # filter out '' and None
619
+ if any(f):
620
+ cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel)) # type
621
+ dir = Path('segment' if seg else 'classify' if cls else '')
622
+ h = '--half' if half else '' # --half FP16 inference arg
623
+ s = "# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference" if cls else \
624
+ "# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference" if seg else ''
625
+ if onnx_end2end:
626
+ LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
627
+ f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
628
+ f"\nVisualize: https://netron.app")
629
+ else:
630
+ LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'
631
+ f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
632
+ f"\nDetect: python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}"
633
+ f"\nValidate: python {dir / 'val.py'} --weights {f[-1]} {h}"
634
+ f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}') {s}"
635
+ f"\nVisualize: https://netron.app")
636
+ return f # return list of exported files/dirs
637
+
638
+
639
+ def parse_opt():
640
+ parser = argparse.ArgumentParser()
641
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path')
642
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model.pt path(s)')
643
+ parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
644
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
645
+ parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
646
+ parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
647
+ parser.add_argument('--inplace', action='store_true', help='set YOLO Detect() inplace=True')
648
+ parser.add_argument('--keras', action='store_true', help='TF: use Keras')
649
+ parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
650
+ parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
651
+ parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')
652
+ parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
653
+ parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
654
+ parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
655
+ parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
656
+ parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')
657
+ parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')
658
+ parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
659
+ parser.add_argument('--topk-all', type=int, default=100, help='ONNX END2END/TF.js NMS: topk for all classes to keep')
660
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='ONNX END2END/TF.js NMS: IoU threshold')
661
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='ONNX END2END/TF.js NMS: confidence threshold')
662
+ parser.add_argument(
663
+ '--include',
664
+ nargs='+',
665
+ default=['torchscript'],
666
+ help='torchscript, onnx, onnx_end2end, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle')
667
+ opt = parser.parse_args()
668
+
669
+ if 'onnx_end2end' in opt.include:
670
+ opt.simplify = True
671
+ opt.dynamic = True
672
+ opt.inplace = True
673
+ opt.half = False
674
+
675
+ print_args(vars(opt))
676
+ return opt
677
+
678
+
679
+ def main(opt):
680
+ for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):
681
+ run(**vars(opt))
682
+
683
+
684
+ if __name__ == "__main__":
685
+ opt = parse_opt()
686
+ main(opt)
yolov9/figure/horses_prediction.jpg ADDED
yolov9/figure/multitask.png ADDED

Git LFS Details

  • SHA256: b7c83ee5db84a3760a0f854e5d70ed0e2ca1cc0f5ef5ff8a88e87d525e87eee1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.29 MB
yolov9/figure/performance.png ADDED
yolov9/hubconf.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
5
+ """Creates or loads a YOLO model
6
+
7
+ Arguments:
8
+ name (str): model name 'yolov3' or path 'path/to/best.pt'
9
+ pretrained (bool): load pretrained weights into the model
10
+ channels (int): number of input channels
11
+ classes (int): number of model classes
12
+ autoshape (bool): apply YOLO .autoshape() wrapper to model
13
+ verbose (bool): print all information to screen
14
+ device (str, torch.device, None): device to use for model parameters
15
+
16
+ Returns:
17
+ YOLO model
18
+ """
19
+ from pathlib import Path
20
+
21
+ from models.common import AutoShape, DetectMultiBackend
22
+ from models.experimental import attempt_load
23
+ from models.yolo import ClassificationModel, DetectionModel, SegmentationModel
24
+ from utils.downloads import attempt_download
25
+ from utils.general import LOGGER, check_requirements, intersect_dicts, logging
26
+ from utils.torch_utils import select_device
27
+
28
+ if not verbose:
29
+ LOGGER.setLevel(logging.WARNING)
30
+ check_requirements(exclude=('opencv-python', 'tensorboard', 'thop'))
31
+ name = Path(name)
32
+ path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name # checkpoint path
33
+ try:
34
+ device = select_device(device)
35
+ if pretrained and channels == 3 and classes == 80:
36
+ try:
37
+ model = DetectMultiBackend(path, device=device, fuse=autoshape) # detection model
38
+ if autoshape:
39
+ if model.pt and isinstance(model.model, ClassificationModel):
40
+ LOGGER.warning('WARNING ⚠️ YOLO ClassificationModel is not yet AutoShape compatible. '
41
+ 'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).')
42
+ elif model.pt and isinstance(model.model, SegmentationModel):
43
+ LOGGER.warning('WARNING ⚠️ YOLO SegmentationModel is not yet AutoShape compatible. '
44
+ 'You will not be able to run inference with this model.')
45
+ else:
46
+ model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS
47
+ except Exception:
48
+ model = attempt_load(path, device=device, fuse=False) # arbitrary model
49
+ else:
50
+ cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path
51
+ model = DetectionModel(cfg, channels, classes) # create model
52
+ if pretrained:
53
+ ckpt = torch.load(attempt_download(path), map_location=device) # load
54
+ csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
55
+ csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect
56
+ model.load_state_dict(csd, strict=False) # load
57
+ if len(ckpt['model'].names) == classes:
58
+ model.names = ckpt['model'].names # set class names attribute
59
+ if not verbose:
60
+ LOGGER.setLevel(logging.INFO) # reset to default
61
+ return model.to(device)
62
+
63
+ except Exception as e:
64
+ help_url = 'https://github.com/ultralytics/yolov5/issues/36'
65
+ s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
66
+ raise Exception(s) from e
67
+
68
+
69
+ def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None):
70
+ # YOLO custom or local model
71
+ return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
72
+
73
+
74
+ if __name__ == '__main__':
75
+ import argparse
76
+ from pathlib import Path
77
+
78
+ import numpy as np
79
+ from PIL import Image
80
+
81
+ from utils.general import cv2, print_args
82
+
83
+ # Argparser
84
+ parser = argparse.ArgumentParser()
85
+ parser.add_argument('--model', type=str, default='yolo', help='model name')
86
+ opt = parser.parse_args()
87
+ print_args(vars(opt))
88
+
89
+ # Model
90
+ model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
91
+ # model = custom(path='path/to/model.pt') # custom
92
+
93
+ # Images
94
+ imgs = [
95
+ 'data/images/zidane.jpg', # filename
96
+ Path('data/images/zidane.jpg'), # Path
97
+ 'https://ultralytics.com/images/zidane.jpg', # URI
98
+ cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV
99
+ Image.open('data/images/bus.jpg'), # PIL
100
+ np.zeros((320, 640, 3))] # numpy
101
+
102
+ # Inference
103
+ results = model(imgs, size=320) # batched inference
104
+
105
+ # Results
106
+ results.print()
107
+ results.save()
yolov9/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # init
yolov9/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (191 Bytes). View file
 
yolov9/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (207 Bytes). View file
 
yolov9/models/__pycache__/common.cpython-310.pyc ADDED
Binary file (49.7 kB). View file
 
yolov9/models/__pycache__/common.cpython-311.pyc ADDED
Binary file (108 kB). View file
 
yolov9/models/__pycache__/experimental.cpython-310.pyc ADDED
Binary file (10.3 kB). View file
 
yolov9/models/__pycache__/experimental.cpython-311.pyc ADDED
Binary file (20.1 kB). View file
 
yolov9/models/__pycache__/yolo.cpython-310.pyc ADDED
Binary file (31.9 kB). View file
 
yolov9/models/__pycache__/yolo.cpython-311.pyc ADDED
Binary file (86.2 kB). View file
 
yolov9/models/common.py ADDED
@@ -0,0 +1,1212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import contextlib
3
+ import json
4
+ import math
5
+ import platform
6
+ import warnings
7
+ import zipfile
8
+ from collections import OrderedDict, namedtuple
9
+ from copy import copy
10
+ from pathlib import Path
11
+ from urllib.parse import urlparse
12
+
13
+ from typing import Optional
14
+
15
+ import cv2
16
+ import numpy as np
17
+ import pandas as pd
18
+ import requests
19
+ import torch
20
+ import torch.nn as nn
21
+ from IPython.display import display
22
+ from PIL import Image
23
+ from torch.cuda import amp
24
+
25
+ from utils import TryExcept
26
+ from utils.dataloaders import exif_transpose, letterbox
27
+ from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
28
+ increment_path, is_notebook, make_divisible, non_max_suppression, scale_boxes,
29
+ xywh2xyxy, xyxy2xywh, yaml_load)
30
+ from utils.plots import Annotator, colors, save_one_box
31
+ from utils.torch_utils import copy_attr, smart_inference_mode
32
+
33
+
34
+ def autopad(k, p=None, d=1): # kernel, padding, dilation
35
+ # Pad to 'same' shape outputs
36
+ if d > 1:
37
+ k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
38
+ if p is None:
39
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
40
+ return p
41
+
42
+
43
+ class Conv(nn.Module):
44
+ # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
45
+ default_act = nn.SiLU() # default activation
46
+
47
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
48
+ super().__init__()
49
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
50
+ self.bn = nn.BatchNorm2d(c2)
51
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
52
+
53
+ def forward(self, x):
54
+ return self.act(self.bn(self.conv(x)))
55
+
56
+ def forward_fuse(self, x):
57
+ return self.act(self.conv(x))
58
+
59
+
60
+ class AConv(nn.Module):
61
+ def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand
62
+ super().__init__()
63
+ self.cv1 = Conv(c1, c2, 3, 2, 1)
64
+
65
+ def forward(self, x):
66
+ x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
67
+ return self.cv1(x)
68
+
69
+
70
+ class ADown(nn.Module):
71
+ def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand
72
+ super().__init__()
73
+ self.c = c2 // 2
74
+ self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
75
+ self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)
76
+
77
+ def forward(self, x):
78
+ x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
79
+ x1,x2 = x.chunk(2, 1)
80
+ x1 = self.cv1(x1)
81
+ x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
82
+ x2 = self.cv2(x2)
83
+ return torch.cat((x1, x2), 1)
84
+
85
+
86
+ class RepConvN(nn.Module):
87
+ """RepConv is a basic rep-style block, including training and deploy status
88
+ This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
89
+ """
90
+ default_act = nn.SiLU() # default activation
91
+
92
+ def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
93
+ super().__init__()
94
+ assert k == 3 and p == 1
95
+ self.g = g
96
+ self.c1 = c1
97
+ self.c2 = c2
98
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
99
+
100
+ self.bn = None
101
+ self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
102
+ self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
103
+
104
+ def forward_fuse(self, x):
105
+ """Forward process"""
106
+ return self.act(self.conv(x))
107
+
108
+ def forward(self, x):
109
+ """Forward process"""
110
+ id_out = 0 if self.bn is None else self.bn(x)
111
+ return self.act(self.conv1(x) + self.conv2(x) + id_out)
112
+
113
+ def get_equivalent_kernel_bias(self):
114
+ kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
115
+ kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
116
+ kernelid, biasid = self._fuse_bn_tensor(self.bn)
117
+ return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
118
+
119
+ def _avg_to_3x3_tensor(self, avgp):
120
+ channels = self.c1
121
+ groups = self.g
122
+ kernel_size = avgp.kernel_size
123
+ input_dim = channels // groups
124
+ k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
125
+ k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
126
+ return k
127
+
128
+ def _pad_1x1_to_3x3_tensor(self, kernel1x1):
129
+ if kernel1x1 is None:
130
+ return 0
131
+ else:
132
+ return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
133
+
134
+ def _fuse_bn_tensor(self, branch):
135
+ if branch is None:
136
+ return 0, 0
137
+ if isinstance(branch, Conv):
138
+ kernel = branch.conv.weight
139
+ running_mean = branch.bn.running_mean
140
+ running_var = branch.bn.running_var
141
+ gamma = branch.bn.weight
142
+ beta = branch.bn.bias
143
+ eps = branch.bn.eps
144
+ elif isinstance(branch, nn.BatchNorm2d):
145
+ if not hasattr(self, 'id_tensor'):
146
+ input_dim = self.c1 // self.g
147
+ kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
148
+ for i in range(self.c1):
149
+ kernel_value[i, i % input_dim, 1, 1] = 1
150
+ self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
151
+ kernel = self.id_tensor
152
+ running_mean = branch.running_mean
153
+ running_var = branch.running_var
154
+ gamma = branch.weight
155
+ beta = branch.bias
156
+ eps = branch.eps
157
+ std = (running_var + eps).sqrt()
158
+ t = (gamma / std).reshape(-1, 1, 1, 1)
159
+ return kernel * t, beta - running_mean * gamma / std
160
+
161
+ def fuse_convs(self):
162
+ if hasattr(self, 'conv'):
163
+ return
164
+ kernel, bias = self.get_equivalent_kernel_bias()
165
+ self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels,
166
+ out_channels=self.conv1.conv.out_channels,
167
+ kernel_size=self.conv1.conv.kernel_size,
168
+ stride=self.conv1.conv.stride,
169
+ padding=self.conv1.conv.padding,
170
+ dilation=self.conv1.conv.dilation,
171
+ groups=self.conv1.conv.groups,
172
+ bias=True).requires_grad_(False)
173
+ self.conv.weight.data = kernel
174
+ self.conv.bias.data = bias
175
+ for para in self.parameters():
176
+ para.detach_()
177
+ self.__delattr__('conv1')
178
+ self.__delattr__('conv2')
179
+ if hasattr(self, 'nm'):
180
+ self.__delattr__('nm')
181
+ if hasattr(self, 'bn'):
182
+ self.__delattr__('bn')
183
+ if hasattr(self, 'id_tensor'):
184
+ self.__delattr__('id_tensor')
185
+
186
+
187
+ class SP(nn.Module):
188
+ def __init__(self, k=3, s=1):
189
+ super(SP, self).__init__()
190
+ self.m = nn.MaxPool2d(kernel_size=k, stride=s, padding=k // 2)
191
+
192
+ def forward(self, x):
193
+ return self.m(x)
194
+
195
+
196
+ class MP(nn.Module):
197
+ # Max pooling
198
+ def __init__(self, k=2):
199
+ super(MP, self).__init__()
200
+ self.m = nn.MaxPool2d(kernel_size=k, stride=k)
201
+
202
+ def forward(self, x):
203
+ return self.m(x)
204
+
205
+
206
+ class ConvTranspose(nn.Module):
207
+ # Convolution transpose 2d layer
208
+ default_act = nn.SiLU() # default activation
209
+
210
+ def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
211
+ super().__init__()
212
+ self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
213
+ self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
214
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
215
+
216
+ def forward(self, x):
217
+ return self.act(self.bn(self.conv_transpose(x)))
218
+
219
+
220
+ class DWConv(Conv):
221
+ # Depth-wise convolution
222
+ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
223
+ super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
224
+
225
+
226
+ class DWConvTranspose2d(nn.ConvTranspose2d):
227
+ # Depth-wise transpose convolution
228
+ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
229
+ super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
230
+
231
+
232
+ class DFL(nn.Module):
233
+ # DFL module
234
+ def __init__(self, c1=17):
235
+ super().__init__()
236
+ self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
237
+ self.conv.weight.data[:] = nn.Parameter(torch.arange(c1, dtype=torch.float).view(1, c1, 1, 1)) # / 120.0
238
+ self.c1 = c1
239
+ # self.bn = nn.BatchNorm2d(4)
240
+
241
+ def forward(self, x):
242
+ b, c, a = x.shape # batch, channels, anchors
243
+ return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
244
+ # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
245
+
246
+
247
+ class BottleneckBase(nn.Module):
248
+ # Standard bottleneck
249
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(1, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand
250
+ super().__init__()
251
+ c_ = int(c2 * e) # hidden channels
252
+ self.cv1 = Conv(c1, c_, k[0], 1)
253
+ self.cv2 = Conv(c_, c2, k[1], 1, g=g)
254
+ self.add = shortcut and c1 == c2
255
+
256
+ def forward(self, x):
257
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
258
+
259
+
260
+ class RBottleneckBase(nn.Module):
261
+ # Standard bottleneck
262
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand
263
+ super().__init__()
264
+ c_ = int(c2 * e) # hidden channels
265
+ self.cv1 = Conv(c1, c_, k[0], 1)
266
+ self.cv2 = Conv(c_, c2, k[1], 1, g=g)
267
+ self.add = shortcut and c1 == c2
268
+
269
+ def forward(self, x):
270
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
271
+
272
+
273
+ class RepNRBottleneckBase(nn.Module):
274
+ # Standard bottleneck
275
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand
276
+ super().__init__()
277
+ c_ = int(c2 * e) # hidden channels
278
+ self.cv1 = RepConvN(c1, c_, k[0], 1)
279
+ self.cv2 = Conv(c_, c2, k[1], 1, g=g)
280
+ self.add = shortcut and c1 == c2
281
+
282
+ def forward(self, x):
283
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
284
+
285
+
286
+ class Bottleneck(nn.Module):
287
+ # Standard bottleneck
288
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand
289
+ super().__init__()
290
+ c_ = int(c2 * e) # hidden channels
291
+ self.cv1 = Conv(c1, c_, k[0], 1)
292
+ self.cv2 = Conv(c_, c2, k[1], 1, g=g)
293
+ self.add = shortcut and c1 == c2
294
+
295
+ def forward(self, x):
296
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
297
+
298
+
299
+ class RepNBottleneck(nn.Module):
300
+ # Standard bottleneck
301
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand
302
+ super().__init__()
303
+ c_ = int(c2 * e) # hidden channels
304
+ self.cv1 = RepConvN(c1, c_, k[0], 1)
305
+ self.cv2 = Conv(c_, c2, k[1], 1, g=g)
306
+ self.add = shortcut and c1 == c2
307
+
308
+ def forward(self, x):
309
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
310
+
311
+
312
+ class Res(nn.Module):
313
+ # ResNet bottleneck
314
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
315
+ super(Res, self).__init__()
316
+ c_ = int(c2 * e) # hidden channels
317
+ self.cv1 = Conv(c1, c_, 1, 1)
318
+ self.cv2 = Conv(c_, c_, 3, 1, g=g)
319
+ self.cv3 = Conv(c_, c2, 1, 1)
320
+ self.add = shortcut and c1 == c2
321
+
322
+ def forward(self, x):
323
+ return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x)))
324
+
325
+
326
+ class RepNRes(nn.Module):
327
+ # ResNet bottleneck
328
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
329
+ super(RepNRes, self).__init__()
330
+ c_ = int(c2 * e) # hidden channels
331
+ self.cv1 = Conv(c1, c_, 1, 1)
332
+ self.cv2 = RepConvN(c_, c_, 3, 1, g=g)
333
+ self.cv3 = Conv(c_, c2, 1, 1)
334
+ self.add = shortcut and c1 == c2
335
+
336
+ def forward(self, x):
337
+ return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x)))
338
+
339
+
340
+ class BottleneckCSP(nn.Module):
341
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
342
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
343
+ super().__init__()
344
+ c_ = int(c2 * e) # hidden channels
345
+ self.cv1 = Conv(c1, c_, 1, 1)
346
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
347
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
348
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
349
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
350
+ self.act = nn.SiLU()
351
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
352
+
353
+ def forward(self, x):
354
+ y1 = self.cv3(self.m(self.cv1(x)))
355
+ y2 = self.cv2(x)
356
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
357
+
358
+
359
+ class CSP(nn.Module):
360
+ # CSP Bottleneck with 3 convolutions
361
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
362
+ super().__init__()
363
+ c_ = int(c2 * e) # hidden channels
364
+ self.cv1 = Conv(c1, c_, 1, 1)
365
+ self.cv2 = Conv(c1, c_, 1, 1)
366
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
367
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
368
+
369
+ def forward(self, x):
370
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
371
+
372
+
373
+ class RepNCSP(nn.Module):
374
+ # CSP Bottleneck with 3 convolutions
375
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
376
+ super().__init__()
377
+ c_ = int(c2 * e) # hidden channels
378
+ self.cv1 = Conv(c1, c_, 1, 1)
379
+ self.cv2 = Conv(c1, c_, 1, 1)
380
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
381
+ self.m = nn.Sequential(*(RepNBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
382
+
383
+ def forward(self, x):
384
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
385
+
386
+
387
+ class CSPBase(nn.Module):
388
+ # CSP Bottleneck with 3 convolutions
389
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
390
+ super().__init__()
391
+ c_ = int(c2 * e) # hidden channels
392
+ self.cv1 = Conv(c1, c_, 1, 1)
393
+ self.cv2 = Conv(c1, c_, 1, 1)
394
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
395
+ self.m = nn.Sequential(*(BottleneckBase(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
396
+
397
+ def forward(self, x):
398
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
399
+
400
+
401
+ class SPP(nn.Module):
402
+ # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
403
+ def __init__(self, c1, c2, k=(5, 9, 13)):
404
+ super().__init__()
405
+ c_ = c1 // 2 # hidden channels
406
+ self.cv1 = Conv(c1, c_, 1, 1)
407
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
408
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
409
+
410
+ def forward(self, x):
411
+ x = self.cv1(x)
412
+ with warnings.catch_warnings():
413
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
414
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
415
+
416
+
417
+ class ASPP(torch.nn.Module):
418
+
419
+ def __init__(self, in_channels, out_channels):
420
+ super().__init__()
421
+ kernel_sizes = [1, 3, 3, 1]
422
+ dilations = [1, 3, 6, 1]
423
+ paddings = [0, 3, 6, 0]
424
+ self.aspp = torch.nn.ModuleList()
425
+ for aspp_idx in range(len(kernel_sizes)):
426
+ conv = torch.nn.Conv2d(
427
+ in_channels,
428
+ out_channels,
429
+ kernel_size=kernel_sizes[aspp_idx],
430
+ stride=1,
431
+ dilation=dilations[aspp_idx],
432
+ padding=paddings[aspp_idx],
433
+ bias=True)
434
+ self.aspp.append(conv)
435
+ self.gap = torch.nn.AdaptiveAvgPool2d(1)
436
+ self.aspp_num = len(kernel_sizes)
437
+ for m in self.modules():
438
+ if isinstance(m, torch.nn.Conv2d):
439
+ n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
440
+ m.weight.data.normal_(0, math.sqrt(2. / n))
441
+ m.bias.data.fill_(0)
442
+
443
+ def forward(self, x):
444
+ avg_x = self.gap(x)
445
+ out = []
446
+ for aspp_idx in range(self.aspp_num):
447
+ inp = avg_x if (aspp_idx == self.aspp_num - 1) else x
448
+ out.append(F.relu_(self.aspp[aspp_idx](inp)))
449
+ out[-1] = out[-1].expand_as(out[-2])
450
+ out = torch.cat(out, dim=1)
451
+ return out
452
+
453
+
454
+ class SPPCSPC(nn.Module):
455
+ # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks
456
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
457
+ super(SPPCSPC, self).__init__()
458
+ c_ = int(2 * c2 * e) # hidden channels
459
+ self.cv1 = Conv(c1, c_, 1, 1)
460
+ self.cv2 = Conv(c1, c_, 1, 1)
461
+ self.cv3 = Conv(c_, c_, 3, 1)
462
+ self.cv4 = Conv(c_, c_, 1, 1)
463
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
464
+ self.cv5 = Conv(4 * c_, c_, 1, 1)
465
+ self.cv6 = Conv(c_, c_, 3, 1)
466
+ self.cv7 = Conv(2 * c_, c2, 1, 1)
467
+
468
+ def forward(self, x):
469
+ x1 = self.cv4(self.cv3(self.cv1(x)))
470
+ y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
471
+ y2 = self.cv2(x)
472
+ return self.cv7(torch.cat((y1, y2), dim=1))
473
+
474
+
475
+ class SPPF(nn.Module):
476
+ # Spatial Pyramid Pooling - Fast (SPPF) layer by Glenn Jocher
477
+ def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
478
+ super().__init__()
479
+ c_ = c1 // 2 # hidden channels
480
+ self.cv1 = Conv(c1, c_, 1, 1)
481
+ self.cv2 = Conv(c_ * 4, c2, 1, 1)
482
+ self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
483
+ # self.m = SoftPool2d(kernel_size=k, stride=1, padding=k // 2)
484
+
485
+ def forward(self, x):
486
+ x = self.cv1(x)
487
+ with warnings.catch_warnings():
488
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
489
+ y1 = self.m(x)
490
+ y2 = self.m(y1)
491
+ return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
492
+
493
+
494
+ import torch.nn.functional as F
495
+ from torch.nn.modules.utils import _pair
496
+
497
+
498
+ class ReOrg(nn.Module):
499
+ # yolo
500
+ def __init__(self):
501
+ super(ReOrg, self).__init__()
502
+
503
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
504
+ return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
505
+
506
+
507
+ class Contract(nn.Module):
508
+ # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
509
+ def __init__(self, gain=2):
510
+ super().__init__()
511
+ self.gain = gain
512
+
513
+ def forward(self, x):
514
+ b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
515
+ s = self.gain
516
+ x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
517
+ x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
518
+ return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
519
+
520
+
521
+ class Expand(nn.Module):
522
+ # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
523
+ def __init__(self, gain=2):
524
+ super().__init__()
525
+ self.gain = gain
526
+
527
+ def forward(self, x):
528
+ b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
529
+ s = self.gain
530
+ x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
531
+ x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
532
+ return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
533
+
534
+
535
+ class Concat(nn.Module):
536
+ # Concatenate a list of tensors along dimension
537
+ def __init__(self, dimension=1):
538
+ super().__init__()
539
+ self.d = dimension
540
+
541
+ def forward(self, x):
542
+ return torch.cat(x, self.d)
543
+
544
+
545
+ class Shortcut(nn.Module):
546
+ def __init__(self, dimension=0):
547
+ super(Shortcut, self).__init__()
548
+ self.d = dimension
549
+
550
+ def forward(self, x):
551
+ return x[0]+x[1]
552
+
553
+
554
+ class Silence(nn.Module):
555
+ def __init__(self):
556
+ super(Silence, self).__init__()
557
+ def forward(self, x):
558
+ return x
559
+
560
+
561
+ ##### GELAN #####
562
+
563
+ class SPPELAN(nn.Module):
564
+ # spp-elan
565
+ def __init__(self, c1, c2, c3): # ch_in, ch_out, number, shortcut, groups, expansion
566
+ super().__init__()
567
+ self.c = c3
568
+ self.cv1 = Conv(c1, c3, 1, 1)
569
+ self.cv2 = SP(5)
570
+ self.cv3 = SP(5)
571
+ self.cv4 = SP(5)
572
+ self.cv5 = Conv(4*c3, c2, 1, 1)
573
+
574
+ def forward(self, x):
575
+ y = [self.cv1(x)]
576
+ y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
577
+ return self.cv5(torch.cat(y, 1))
578
+
579
+
580
+ class RepNCSPELAN4(nn.Module):
581
+ # csp-elan
582
+ def __init__(self, c1, c2, c3, c4, c5=1): # ch_in, ch_out, number, shortcut, groups, expansion
583
+ super().__init__()
584
+ self.c = c3//2
585
+ self.cv1 = Conv(c1, c3, 1, 1)
586
+ self.cv2 = nn.Sequential(RepNCSP(c3//2, c4, c5), Conv(c4, c4, 3, 1))
587
+ self.cv3 = nn.Sequential(RepNCSP(c4, c4, c5), Conv(c4, c4, 3, 1))
588
+ self.cv4 = Conv(c3+(2*c4), c2, 1, 1)
589
+
590
+ def forward(self, x):
591
+ y = list(self.cv1(x).chunk(2, 1))
592
+ y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
593
+ return self.cv4(torch.cat(y, 1))
594
+
595
+ def forward_split(self, x):
596
+ y = list(self.cv1(x).split((self.c, self.c), 1))
597
+ y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
598
+ return self.cv4(torch.cat(y, 1))
599
+
600
+ #################
601
+
602
+
603
+ ##### YOLOR #####
604
+
605
+ class ImplicitA(nn.Module):
606
+ def __init__(self, channel):
607
+ super(ImplicitA, self).__init__()
608
+ self.channel = channel
609
+ self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
610
+ nn.init.normal_(self.implicit, std=.02)
611
+
612
+ def forward(self, x):
613
+ return self.implicit + x
614
+
615
+
616
+ class ImplicitM(nn.Module):
617
+ def __init__(self, channel):
618
+ super(ImplicitM, self).__init__()
619
+ self.channel = channel
620
+ self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
621
+ nn.init.normal_(self.implicit, mean=1., std=.02)
622
+
623
+ def forward(self, x):
624
+ return self.implicit * x
625
+
626
+ #################
627
+
628
+
629
+ ##### CBNet #####
630
+
631
+ class CBLinear(nn.Module):
632
+ def __init__(self, c1, c2s, k=1, s=1, p=None, g=1): # ch_in, ch_outs, kernel, stride, padding, groups
633
+ super(CBLinear, self).__init__()
634
+ self.c2s = c2s
635
+ self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
636
+
637
+ def forward(self, x):
638
+ outs = self.conv(x).split(self.c2s, dim=1)
639
+ return outs
640
+
641
+ class CBFuse(nn.Module):
642
+ def __init__(self, idx):
643
+ super(CBFuse, self).__init__()
644
+ self.idx = idx
645
+
646
+ def forward(self, xs):
647
+ target_size = xs[-1].shape[2:]
648
+ res = [F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') for i, x in enumerate(xs[:-1])]
649
+ out = torch.sum(torch.stack(res + xs[-1:]), dim=0)
650
+ return out
651
+
652
+ #################
653
+
654
+
655
+ class DetectMultiBackend(nn.Module):
656
+ # YOLO MultiBackend class for python inference on various backends
657
+ def __init__(self, weights='yolo.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
658
+ # Usage:
659
+ # PyTorch: weights = *.pt
660
+ # TorchScript: *.torchscript
661
+ # ONNX Runtime: *.onnx
662
+ # ONNX OpenCV DNN: *.onnx --dnn
663
+ # OpenVINO: *_openvino_model
664
+ # CoreML: *.mlmodel
665
+ # TensorRT: *.engine
666
+ # TensorFlow SavedModel: *_saved_model
667
+ # TensorFlow GraphDef: *.pb
668
+ # TensorFlow Lite: *.tflite
669
+ # TensorFlow Edge TPU: *_edgetpu.tflite
670
+ # PaddlePaddle: *_paddle_model
671
+ from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
672
+
673
+ super().__init__()
674
+ w = str(weights[0] if isinstance(weights, list) else weights)
675
+ pt, jit, onnx, onnx_end2end, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
676
+ fp16 &= pt or jit or onnx or engine # FP16
677
+ nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
678
+ stride = 32 # default stride
679
+ cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
680
+ if not (pt or triton):
681
+ w = attempt_download(w) # download if not local
682
+
683
+ if pt: # PyTorch
684
+ model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
685
+ stride = max(int(model.stride.max()), 32) # model stride
686
+ names = model.module.names if hasattr(model, 'module') else model.names # get class names
687
+ model.half() if fp16 else model.float()
688
+ self.model = model # explicitly assign for to(), cpu(), cuda(), half()
689
+ elif jit: # TorchScript
690
+ LOGGER.info(f'Loading {w} for TorchScript inference...')
691
+ extra_files = {'config.txt': ''} # model metadata
692
+ model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
693
+ model.half() if fp16 else model.float()
694
+ if extra_files['config.txt']: # load metadata dict
695
+ d = json.loads(extra_files['config.txt'],
696
+ object_hook=lambda d: {int(k) if k.isdigit() else k: v
697
+ for k, v in d.items()})
698
+ stride, names = int(d['stride']), d['names']
699
+ elif dnn: # ONNX OpenCV DNN
700
+ LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
701
+ check_requirements('opencv-python>=4.5.4')
702
+ net = cv2.dnn.readNetFromONNX(w)
703
+ elif onnx: # ONNX Runtime
704
+ LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
705
+ check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
706
+ import onnxruntime
707
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
708
+ session = onnxruntime.InferenceSession(w, providers=providers)
709
+ output_names = [x.name for x in session.get_outputs()]
710
+ meta = session.get_modelmeta().custom_metadata_map # metadata
711
+ if 'stride' in meta:
712
+ stride, names = int(meta['stride']), eval(meta['names'])
713
+ elif xml: # OpenVINO
714
+ LOGGER.info(f'Loading {w} for OpenVINO inference...')
715
+ check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/
716
+ from openvino.runtime import Core, Layout, get_batch
717
+ ie = Core()
718
+ if not Path(w).is_file(): # if not *.xml
719
+ w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
720
+ network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
721
+ if network.get_parameters()[0].get_layout().empty:
722
+ network.get_parameters()[0].set_layout(Layout("NCHW"))
723
+ batch_dim = get_batch(network)
724
+ if batch_dim.is_static:
725
+ batch_size = batch_dim.get_length()
726
+ executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
727
+ stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
728
+ elif engine: # TensorRT
729
+ LOGGER.info(f'Loading {w} for TensorRT inference...')
730
+ import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
731
+ check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
732
+ if device.type == 'cpu':
733
+ device = torch.device('cuda:0')
734
+ Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
735
+ logger = trt.Logger(trt.Logger.INFO)
736
+ with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
737
+ model = runtime.deserialize_cuda_engine(f.read())
738
+ context = model.create_execution_context()
739
+ bindings = OrderedDict()
740
+ output_names = []
741
+ fp16 = False # default updated below
742
+ dynamic = False
743
+ for i in range(model.num_bindings):
744
+ name = model.get_binding_name(i)
745
+ dtype = trt.nptype(model.get_binding_dtype(i))
746
+ if model.binding_is_input(i):
747
+ if -1 in tuple(model.get_binding_shape(i)): # dynamic
748
+ dynamic = True
749
+ context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
750
+ if dtype == np.float16:
751
+ fp16 = True
752
+ else: # output
753
+ output_names.append(name)
754
+ shape = tuple(context.get_binding_shape(i))
755
+ im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
756
+ bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
757
+ binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
758
+ batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
759
+ elif coreml: # CoreML
760
+ LOGGER.info(f'Loading {w} for CoreML inference...')
761
+ import coremltools as ct
762
+ model = ct.models.MLModel(w)
763
+ elif saved_model: # TF SavedModel
764
+ LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
765
+ import tensorflow as tf
766
+ keras = False # assume TF1 saved_model
767
+ model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
768
+ elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
769
+ LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
770
+ import tensorflow as tf
771
+
772
+ def wrap_frozen_graph(gd, inputs, outputs):
773
+ x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
774
+ ge = x.graph.as_graph_element
775
+ return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
776
+
777
+ def gd_outputs(gd):
778
+ name_list, input_list = [], []
779
+ for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
780
+ name_list.append(node.name)
781
+ input_list.extend(node.input)
782
+ return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
783
+
784
+ gd = tf.Graph().as_graph_def() # TF GraphDef
785
+ with open(w, 'rb') as f:
786
+ gd.ParseFromString(f.read())
787
+ frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
788
+ elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
789
+ try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
790
+ from tflite_runtime.interpreter import Interpreter, load_delegate
791
+ except ImportError:
792
+ import tensorflow as tf
793
+ Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
794
+ if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
795
+ LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
796
+ delegate = {
797
+ 'Linux': 'libedgetpu.so.1',
798
+ 'Darwin': 'libedgetpu.1.dylib',
799
+ 'Windows': 'edgetpu.dll'}[platform.system()]
800
+ interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
801
+ else: # TFLite
802
+ LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
803
+ interpreter = Interpreter(model_path=w) # load TFLite model
804
+ interpreter.allocate_tensors() # allocate
805
+ input_details = interpreter.get_input_details() # inputs
806
+ output_details = interpreter.get_output_details() # outputs
807
+ # load metadata
808
+ with contextlib.suppress(zipfile.BadZipFile):
809
+ with zipfile.ZipFile(w, "r") as model:
810
+ meta_file = model.namelist()[0]
811
+ meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))
812
+ stride, names = int(meta['stride']), meta['names']
813
+ elif tfjs: # TF.js
814
+ raise NotImplementedError('ERROR: YOLO TF.js inference is not supported')
815
+ elif paddle: # PaddlePaddle
816
+ LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
817
+ check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
818
+ import paddle.inference as pdi
819
+ if not Path(w).is_file(): # if not *.pdmodel
820
+ w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
821
+ weights = Path(w).with_suffix('.pdiparams')
822
+ config = pdi.Config(str(w), str(weights))
823
+ if cuda:
824
+ config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
825
+ predictor = pdi.create_predictor(config)
826
+ input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
827
+ output_names = predictor.get_output_names()
828
+ elif triton: # NVIDIA Triton Inference Server
829
+ LOGGER.info(f'Using {w} as Triton Inference Server...')
830
+ check_requirements('tritonclient[all]')
831
+ from utils.triton import TritonRemoteModel
832
+ model = TritonRemoteModel(url=w)
833
+ nhwc = model.runtime.startswith("tensorflow")
834
+ else:
835
+ raise NotImplementedError(f'ERROR: {w} is not a supported format')
836
+
837
+ # class names
838
+ if 'names' not in locals():
839
+ names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
840
+ if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
841
+ names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
842
+
843
+ self.__dict__.update(locals()) # assign all variables to self
844
+
845
+ def forward(self, im, augment=False, visualize=False):
846
+ # YOLO MultiBackend inference
847
+ b, ch, h, w = im.shape # batch, channel, height, width
848
+ if self.fp16 and im.dtype != torch.float16:
849
+ im = im.half() # to FP16
850
+ if self.nhwc:
851
+ im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
852
+
853
+ if self.pt: # PyTorch
854
+ y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
855
+ elif self.jit: # TorchScript
856
+ y = self.model(im)
857
+ elif self.dnn: # ONNX OpenCV DNN
858
+ im = im.cpu().numpy() # torch to numpy
859
+ self.net.setInput(im)
860
+ y = self.net.forward()
861
+ elif self.onnx: # ONNX Runtime
862
+ im = im.cpu().numpy() # torch to numpy
863
+ y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
864
+ elif self.xml: # OpenVINO
865
+ im = im.cpu().numpy() # FP32
866
+ y = list(self.executable_network([im]).values())
867
+ elif self.engine: # TensorRT
868
+ if self.dynamic and im.shape != self.bindings['images'].shape:
869
+ i = self.model.get_binding_index('images')
870
+ self.context.set_binding_shape(i, im.shape) # reshape if dynamic
871
+ self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
872
+ for name in self.output_names:
873
+ i = self.model.get_binding_index(name)
874
+ self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
875
+ s = self.bindings['images'].shape
876
+ assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
877
+ self.binding_addrs['images'] = int(im.data_ptr())
878
+ self.context.execute_v2(list(self.binding_addrs.values()))
879
+ y = [self.bindings[x].data for x in sorted(self.output_names)]
880
+ elif self.coreml: # CoreML
881
+ im = im.cpu().numpy()
882
+ im = Image.fromarray((im[0] * 255).astype('uint8'))
883
+ # im = im.resize((192, 320), Image.ANTIALIAS)
884
+ y = self.model.predict({'image': im}) # coordinates are xywh normalized
885
+ if 'confidence' in y:
886
+ box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
887
+ conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
888
+ y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
889
+ else:
890
+ y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
891
+ elif self.paddle: # PaddlePaddle
892
+ im = im.cpu().numpy().astype(np.float32)
893
+ self.input_handle.copy_from_cpu(im)
894
+ self.predictor.run()
895
+ y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
896
+ elif self.triton: # NVIDIA Triton Inference Server
897
+ y = self.model(im)
898
+ else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
899
+ im = im.cpu().numpy()
900
+ if self.saved_model: # SavedModel
901
+ y = self.model(im, training=False) if self.keras else self.model(im)
902
+ elif self.pb: # GraphDef
903
+ y = self.frozen_func(x=self.tf.constant(im))
904
+ else: # Lite or Edge TPU
905
+ input = self.input_details[0]
906
+ int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
907
+ if int8:
908
+ scale, zero_point = input['quantization']
909
+ im = (im / scale + zero_point).astype(np.uint8) # de-scale
910
+ self.interpreter.set_tensor(input['index'], im)
911
+ self.interpreter.invoke()
912
+ y = []
913
+ for output in self.output_details:
914
+ x = self.interpreter.get_tensor(output['index'])
915
+ if int8:
916
+ scale, zero_point = output['quantization']
917
+ x = (x.astype(np.float32) - zero_point) * scale # re-scale
918
+ y.append(x)
919
+ y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
920
+ y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
921
+
922
+ if isinstance(y, (list, tuple)):
923
+ return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
924
+ else:
925
+ return self.from_numpy(y)
926
+
927
+ def from_numpy(self, x):
928
+ return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
929
+
930
+ def warmup(self, imgsz=(1, 3, 640, 640)):
931
+ # Warmup model by running inference once
932
+ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
933
+ if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
934
+ im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
935
+ for _ in range(2 if self.jit else 1): #
936
+ self.forward(im) # warmup
937
+
938
+ @staticmethod
939
+ def _model_type(p='path/to/model.pt'):
940
+ # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
941
+ # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
942
+ from export import export_formats
943
+ from utils.downloads import is_url
944
+ sf = list(export_formats().Suffix) # export suffixes
945
+ if not is_url(p, check=False):
946
+ check_suffix(p, sf) # checks
947
+ url = urlparse(p) # if url may be Triton inference server
948
+ types = [s in Path(p).name for s in sf]
949
+ types[8] &= not types[9] # tflite &= not edgetpu
950
+ triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
951
+ return types + [triton]
952
+
953
+ @staticmethod
954
+ def _load_metadata(f=Path('path/to/meta.yaml')):
955
+ # Load metadata from meta.yaml if it exists
956
+ if f.exists():
957
+ d = yaml_load(f)
958
+ return d['stride'], d['names'] # assign stride, names
959
+ return None, None
960
+
961
+
962
+ class AutoShape(nn.Module):
963
+ # YOLO input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
964
+ conf = 0.25 # NMS confidence threshold
965
+ iou = 0.45 # NMS IoU threshold
966
+ agnostic = False # NMS class-agnostic
967
+ multi_label = False # NMS multiple labels per box
968
+ classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
969
+ max_det = 1000 # maximum number of detections per image
970
+ amp = False # Automatic Mixed Precision (AMP) inference
971
+
972
+ def __init__(self, model, verbose=True):
973
+ super().__init__()
974
+ if verbose:
975
+ LOGGER.info('Adding AutoShape... ')
976
+ copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
977
+ self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
978
+ self.pt = not self.dmb or model.pt # PyTorch model
979
+ self.model = model.eval()
980
+ if self.pt:
981
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
982
+ m.inplace = False # Detect.inplace=False for safe multithread inference
983
+ m.export = True # do not output loss values
984
+
985
+ def _apply(self, fn):
986
+ # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
987
+ self = super()._apply(fn)
988
+ from models.yolo import Detect, Segment
989
+ if self.pt:
990
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
991
+ if isinstance(m, (Detect, Segment)):
992
+ for k in 'stride', 'anchor_grid', 'stride_grid', 'grid':
993
+ x = getattr(m, k)
994
+ setattr(m, k, list(map(fn, x))) if isinstance(x, (list, tuple)) else setattr(m, k, fn(x))
995
+ return self
996
+
997
+ @smart_inference_mode()
998
+ def forward(self, ims, size=640, augment=False, profile=False):
999
+ # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
1000
+ # file: ims = 'data/images/zidane.jpg' # str or PosixPath
1001
+ # URI: = 'https://ultralytics.com/images/zidane.jpg'
1002
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
1003
+ # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
1004
+ # numpy: = np.zeros((640,1280,3)) # HWC
1005
+ # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
1006
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
1007
+
1008
+ dt = (Profile(), Profile(), Profile())
1009
+ with dt[0]:
1010
+ if isinstance(size, int): # expand
1011
+ size = (size, size)
1012
+ p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
1013
+ autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
1014
+ if isinstance(ims, torch.Tensor): # torch
1015
+ with amp.autocast(autocast):
1016
+ return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
1017
+
1018
+ # Pre-process
1019
+ n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
1020
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
1021
+ for i, im in enumerate(ims):
1022
+ f = f'image{i}' # filename
1023
+ if isinstance(im, (str, Path)): # filename or uri
1024
+ im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
1025
+ im = np.asarray(exif_transpose(im))
1026
+ elif isinstance(im, Image.Image): # PIL Image
1027
+ im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
1028
+ files.append(Path(f).with_suffix('.jpg').name)
1029
+ if im.shape[0] < 5: # image in CHW
1030
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
1031
+ im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
1032
+ s = im.shape[:2] # HWC
1033
+ shape0.append(s) # image shape
1034
+ g = max(size) / max(s) # gain
1035
+ shape1.append([int(y * g) for y in s])
1036
+ ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
1037
+ shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
1038
+ x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
1039
+ x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
1040
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
1041
+
1042
+ with amp.autocast(autocast):
1043
+ # Inference
1044
+ with dt[1]:
1045
+ y = self.model(x, augment=augment) # forward
1046
+
1047
+ # Post-process
1048
+ with dt[2]:
1049
+ y = non_max_suppression(y if self.dmb else y[0],
1050
+ self.conf,
1051
+ self.iou,
1052
+ self.classes,
1053
+ self.agnostic,
1054
+ self.multi_label,
1055
+ max_det=self.max_det) # NMS
1056
+ for i in range(n):
1057
+ scale_boxes(shape1, y[i][:, :4], shape0[i])
1058
+
1059
+ return Detections(ims, y, files, dt, self.names, x.shape)
1060
+
1061
+
1062
+ class Detections:
1063
+ # YOLO detections class for inference results
1064
+ def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
1065
+ super().__init__()
1066
+ d = pred[0].device # device
1067
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
1068
+ self.ims = ims # list of images as numpy arrays
1069
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
1070
+ self.names = names # class names
1071
+ self.files = files # image filenames
1072
+ self.times = times # profiling times
1073
+ self.xyxy = pred # xyxy pixels
1074
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
1075
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
1076
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
1077
+ self.n = len(self.pred) # number of images (batch size)
1078
+ self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
1079
+ self.s = tuple(shape) # inference BCHW shape
1080
+
1081
+ def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
1082
+ s, crops = '', []
1083
+ for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
1084
+ s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
1085
+ if pred.shape[0]:
1086
+ for c in pred[:, -1].unique():
1087
+ n = (pred[:, -1] == c).sum() # detections per class
1088
+ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
1089
+ s = s.rstrip(', ')
1090
+ if show or save or render or crop:
1091
+ annotator = Annotator(im, example=str(self.names))
1092
+ for *box, conf, cls in reversed(pred): # xyxy, confidence, class
1093
+ label = f'{self.names[int(cls)]} {conf:.2f}'
1094
+ if crop:
1095
+ file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
1096
+ crops.append({
1097
+ 'box': box,
1098
+ 'conf': conf,
1099
+ 'cls': cls,
1100
+ 'label': label,
1101
+ 'im': save_one_box(box, im, file=file, save=save)})
1102
+ else: # all others
1103
+ annotator.box_label(box, label if labels else '', color=colors(cls))
1104
+ im = annotator.im
1105
+ else:
1106
+ s += '(no detections)'
1107
+
1108
+ im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
1109
+ if show:
1110
+ display(im) if is_notebook() else im.show(self.files[i])
1111
+ if save:
1112
+ f = self.files[i]
1113
+ im.save(save_dir / f) # save
1114
+ if i == self.n - 1:
1115
+ LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
1116
+ if render:
1117
+ self.ims[i] = np.asarray(im)
1118
+ if pprint:
1119
+ s = s.lstrip('\n')
1120
+ return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
1121
+ if crop:
1122
+ if save:
1123
+ LOGGER.info(f'Saved results to {save_dir}\n')
1124
+ return crops
1125
+
1126
+ @TryExcept('Showing images is not supported in this environment')
1127
+ def show(self, labels=True):
1128
+ self._run(show=True, labels=labels) # show results
1129
+
1130
+ def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
1131
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
1132
+ self._run(save=True, labels=labels, save_dir=save_dir) # save results
1133
+
1134
+ def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
1135
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
1136
+ return self._run(crop=True, save=save, save_dir=save_dir) # crop results
1137
+
1138
+ def render(self, labels=True):
1139
+ self._run(render=True, labels=labels) # render results
1140
+ return self.ims
1141
+
1142
+ def pandas(self):
1143
+ # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
1144
+ new = copy(self) # return copy
1145
+ ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
1146
+ cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
1147
+ for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
1148
+ a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
1149
+ setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
1150
+ return new
1151
+
1152
+ def tolist(self):
1153
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
1154
+ r = range(self.n) # iterable
1155
+ x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
1156
+ # for d in x:
1157
+ # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
1158
+ # setattr(d, k, getattr(d, k)[0]) # pop out of list
1159
+ return x
1160
+
1161
+ def print(self):
1162
+ LOGGER.info(self.__str__())
1163
+
1164
+ def __len__(self): # override len(results)
1165
+ return self.n
1166
+
1167
+ def __str__(self): # override print(results)
1168
+ return self._run(pprint=True) # print results
1169
+
1170
+ def __repr__(self):
1171
+ return f'YOLO {self.__class__} instance\n' + self.__str__()
1172
+
1173
+
1174
+ class Proto(nn.Module):
1175
+ # YOLO mask Proto module for segmentation models
1176
+ def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
1177
+ super().__init__()
1178
+ self.cv1 = Conv(c1, c_, k=3)
1179
+ self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
1180
+ self.cv2 = Conv(c_, c_, k=3)
1181
+ self.cv3 = Conv(c_, c2)
1182
+
1183
+ def forward(self, x):
1184
+ return self.cv3(self.cv2(self.upsample(self.cv1(x))))
1185
+
1186
+
1187
+ class UConv(nn.Module):
1188
+ def __init__(self, c1, c_=256, c2=256): # ch_in, number of protos, number of masks
1189
+ super().__init__()
1190
+
1191
+ self.cv1 = Conv(c1, c_, k=3)
1192
+ self.cv2 = nn.Conv2d(c_, c2, 1, 1)
1193
+ self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
1194
+
1195
+ def forward(self, x):
1196
+ return self.up(self.cv2(self.cv1(x)))
1197
+
1198
+
1199
+ class Classify(nn.Module):
1200
+ # YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)
1201
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
1202
+ super().__init__()
1203
+ c_ = 1280 # efficientnet_b0 size
1204
+ self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
1205
+ self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
1206
+ self.drop = nn.Dropout(p=0.0, inplace=True)
1207
+ self.linear = nn.Linear(c_, c2) # to x(b,c2)
1208
+
1209
+ def forward(self, x):
1210
+ if isinstance(x, list):
1211
+ x = torch.cat(x, 1)
1212
+ return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
yolov9/models/detect/gelan-c.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ #activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # gelan backbone
14
+ backbone:
15
+ [
16
+ # conv down
17
+ [-1, 1, Conv, [64, 3, 2]], # 0-P1/2
18
+
19
+ # conv down
20
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
21
+
22
+ # elan-1 block
23
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2
24
+
25
+ # avg-conv down
26
+ [-1, 1, ADown, [256]], # 3-P3/8
27
+
28
+ # elan-2 block
29
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4
30
+
31
+ # avg-conv down
32
+ [-1, 1, ADown, [512]], # 5-P4/16
33
+
34
+ # elan-2 block
35
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6
36
+
37
+ # avg-conv down
38
+ [-1, 1, ADown, [512]], # 7-P5/32
39
+
40
+ # elan-2 block
41
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8
42
+ ]
43
+
44
+ # gelan head
45
+ head:
46
+ [
47
+ # elan-spp block
48
+ [-1, 1, SPPELAN, [512, 256]], # 9
49
+
50
+ # up-concat merge
51
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
52
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
53
+
54
+ # elan-2 block
55
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12
56
+
57
+ # up-concat merge
58
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
59
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
60
+
61
+ # elan-2 block
62
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small)
63
+
64
+ # avg-conv-down merge
65
+ [-1, 1, ADown, [256]],
66
+ [[-1, 12], 1, Concat, [1]], # cat head P4
67
+
68
+ # elan-2 block
69
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium)
70
+
71
+ # avg-conv-down merge
72
+ [-1, 1, ADown, [512]],
73
+ [[-1, 9], 1, Concat, [1]], # cat head P5
74
+
75
+ # elan-2 block
76
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large)
77
+
78
+ # detect
79
+ [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5)
80
+ ]
yolov9/models/detect/gelan-e.yaml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ #activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # gelan backbone
14
+ backbone:
15
+ [
16
+ [-1, 1, Silence, []],
17
+
18
+ # conv down
19
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
20
+
21
+ # conv down
22
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
23
+
24
+ # elan-1 block
25
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3
26
+
27
+ # avg-conv down
28
+ [-1, 1, ADown, [256]], # 4-P3/8
29
+
30
+ # elan-2 block
31
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5
32
+
33
+ # avg-conv down
34
+ [-1, 1, ADown, [512]], # 6-P4/16
35
+
36
+ # elan-2 block
37
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7
38
+
39
+ # avg-conv down
40
+ [-1, 1, ADown, [1024]], # 8-P5/32
41
+
42
+ # elan-2 block
43
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9
44
+
45
+ # routing
46
+ [1, 1, CBLinear, [[64]]], # 10
47
+ [3, 1, CBLinear, [[64, 128]]], # 11
48
+ [5, 1, CBLinear, [[64, 128, 256]]], # 12
49
+ [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13
50
+ [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14
51
+
52
+ # conv down fuse
53
+ [0, 1, Conv, [64, 3, 2]], # 15-P1/2
54
+ [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16
55
+
56
+ # conv down fuse
57
+ [-1, 1, Conv, [128, 3, 2]], # 17-P2/4
58
+ [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18
59
+
60
+ # elan-1 block
61
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19
62
+
63
+ # avg-conv down fuse
64
+ [-1, 1, ADown, [256]], # 20-P3/8
65
+ [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21
66
+
67
+ # elan-2 block
68
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22
69
+
70
+ # avg-conv down fuse
71
+ [-1, 1, ADown, [512]], # 23-P4/16
72
+ [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24
73
+
74
+ # elan-2 block
75
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25
76
+
77
+ # avg-conv down fuse
78
+ [-1, 1, ADown, [1024]], # 26-P5/32
79
+ [[14, -1], 1, CBFuse, [[4]]], # 27
80
+
81
+ # elan-2 block
82
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28
83
+ ]
84
+
85
+ # gelan head
86
+ head:
87
+ [
88
+ # elan-spp block
89
+ [28, 1, SPPELAN, [512, 256]], # 29
90
+
91
+ # up-concat merge
92
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
93
+ [[-1, 25], 1, Concat, [1]], # cat backbone P4
94
+
95
+ # elan-2 block
96
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32
97
+
98
+ # up-concat merge
99
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
100
+ [[-1, 22], 1, Concat, [1]], # cat backbone P3
101
+
102
+ # elan-2 block
103
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 (P3/8-small)
104
+
105
+ # avg-conv-down merge
106
+ [-1, 1, ADown, [256]],
107
+ [[-1, 32], 1, Concat, [1]], # cat head P4
108
+
109
+ # elan-2 block
110
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 38 (P4/16-medium)
111
+
112
+ # avg-conv-down merge
113
+ [-1, 1, ADown, [512]],
114
+ [[-1, 29], 1, Concat, [1]], # cat head P5
115
+
116
+ # elan-2 block
117
+ [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 41 (P5/32-large)
118
+
119
+ # detect
120
+ [[35, 38, 41], 1, DDetect, [nc]], # Detect(P3, P4, P5)
121
+ ]
yolov9/models/detect/gelan.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # gelan backbone
14
+ backbone:
15
+ [
16
+ # conv down
17
+ [-1, 1, Conv, [64, 3, 2]], # 0-P1/2
18
+
19
+ # conv down
20
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
21
+
22
+ # elan-1 block
23
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2
24
+
25
+ # avg-conv down
26
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
27
+
28
+ # elan-2 block
29
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4
30
+
31
+ # avg-conv down
32
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
33
+
34
+ # elan-2 block
35
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6
36
+
37
+ # avg-conv down
38
+ [-1, 1, Conv, [512, 3, 2]], # 7-P5/32
39
+
40
+ # elan-2 block
41
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8
42
+ ]
43
+
44
+ # gelan head
45
+ head:
46
+ [
47
+ # elan-spp block
48
+ [-1, 1, SPPELAN, [512, 256]], # 9
49
+
50
+ # up-concat merge
51
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
52
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
53
+
54
+ # elan-2 block
55
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12
56
+
57
+ # up-concat merge
58
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
59
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
60
+
61
+ # elan-2 block
62
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small)
63
+
64
+ # avg-conv-down merge
65
+ [-1, 1, Conv, [256, 3, 2]],
66
+ [[-1, 12], 1, Concat, [1]], # cat head P4
67
+
68
+ # elan-2 block
69
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium)
70
+
71
+ # avg-conv-down merge
72
+ [-1, 1, Conv, [512, 3, 2]],
73
+ [[-1, 9], 1, Concat, [1]], # cat head P5
74
+
75
+ # elan-2 block
76
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large)
77
+
78
+ # detect
79
+ [[15, 18, 21], 1, DDetect, [nc]], # Detect(P3, P4, P5)
80
+ ]
yolov9/models/detect/yolov7-af.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv7
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1. # model depth multiple
6
+ width_multiple: 1. # layer channel multiple
7
+ anchors: 3
8
+
9
+ # YOLOv7 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [32, 3, 1]], # 0
13
+
14
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
15
+ [-1, 1, Conv, [64, 3, 1]],
16
+
17
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
18
+ [-1, 1, Conv, [64, 1, 1]],
19
+ [-2, 1, Conv, [64, 1, 1]],
20
+ [-1, 1, Conv, [64, 3, 1]],
21
+ [-1, 1, Conv, [64, 3, 1]],
22
+ [-1, 1, Conv, [64, 3, 1]],
23
+ [-1, 1, Conv, [64, 3, 1]],
24
+ [[-1, -3, -5, -6], 1, Concat, [1]],
25
+ [-1, 1, Conv, [256, 1, 1]], # 11
26
+
27
+ [-1, 1, MP, []],
28
+ [-1, 1, Conv, [128, 1, 1]],
29
+ [-3, 1, Conv, [128, 1, 1]],
30
+ [-1, 1, Conv, [128, 3, 2]],
31
+ [[-1, -3], 1, Concat, [1]], # 16-P3/8
32
+ [-1, 1, Conv, [128, 1, 1]],
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, Conv, [128, 3, 1]],
35
+ [-1, 1, Conv, [128, 3, 1]],
36
+ [-1, 1, Conv, [128, 3, 1]],
37
+ [-1, 1, Conv, [128, 3, 1]],
38
+ [[-1, -3, -5, -6], 1, Concat, [1]],
39
+ [-1, 1, Conv, [512, 1, 1]], # 24
40
+
41
+ [-1, 1, MP, []],
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-3, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, Conv, [256, 3, 2]],
45
+ [[-1, -3], 1, Concat, [1]], # 29-P4/16
46
+ [-1, 1, Conv, [256, 1, 1]],
47
+ [-2, 1, Conv, [256, 1, 1]],
48
+ [-1, 1, Conv, [256, 3, 1]],
49
+ [-1, 1, Conv, [256, 3, 1]],
50
+ [-1, 1, Conv, [256, 3, 1]],
51
+ [-1, 1, Conv, [256, 3, 1]],
52
+ [[-1, -3, -5, -6], 1, Concat, [1]],
53
+ [-1, 1, Conv, [1024, 1, 1]], # 37
54
+
55
+ [-1, 1, MP, []],
56
+ [-1, 1, Conv, [512, 1, 1]],
57
+ [-3, 1, Conv, [512, 1, 1]],
58
+ [-1, 1, Conv, [512, 3, 2]],
59
+ [[-1, -3], 1, Concat, [1]], # 42-P5/32
60
+ [-1, 1, Conv, [256, 1, 1]],
61
+ [-2, 1, Conv, [256, 1, 1]],
62
+ [-1, 1, Conv, [256, 3, 1]],
63
+ [-1, 1, Conv, [256, 3, 1]],
64
+ [-1, 1, Conv, [256, 3, 1]],
65
+ [-1, 1, Conv, [256, 3, 1]],
66
+ [[-1, -3, -5, -6], 1, Concat, [1]],
67
+ [-1, 1, Conv, [1024, 1, 1]], # 50
68
+ ]
69
+
70
+ # yolov7 head
71
+ head:
72
+ [[-1, 1, SPPCSPC, [512]], # 51
73
+
74
+ [-1, 1, Conv, [256, 1, 1]],
75
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
76
+ [37, 1, Conv, [256, 1, 1]], # route backbone P4
77
+ [[-1, -2], 1, Concat, [1]],
78
+
79
+ [-1, 1, Conv, [256, 1, 1]],
80
+ [-2, 1, Conv, [256, 1, 1]],
81
+ [-1, 1, Conv, [128, 3, 1]],
82
+ [-1, 1, Conv, [128, 3, 1]],
83
+ [-1, 1, Conv, [128, 3, 1]],
84
+ [-1, 1, Conv, [128, 3, 1]],
85
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
86
+ [-1, 1, Conv, [256, 1, 1]], # 63
87
+
88
+ [-1, 1, Conv, [128, 1, 1]],
89
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
90
+ [24, 1, Conv, [128, 1, 1]], # route backbone P3
91
+ [[-1, -2], 1, Concat, [1]],
92
+
93
+ [-1, 1, Conv, [128, 1, 1]],
94
+ [-2, 1, Conv, [128, 1, 1]],
95
+ [-1, 1, Conv, [64, 3, 1]],
96
+ [-1, 1, Conv, [64, 3, 1]],
97
+ [-1, 1, Conv, [64, 3, 1]],
98
+ [-1, 1, Conv, [64, 3, 1]],
99
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100
+ [-1, 1, Conv, [128, 1, 1]], # 75
101
+
102
+ [-1, 1, MP, []],
103
+ [-1, 1, Conv, [128, 1, 1]],
104
+ [-3, 1, Conv, [128, 1, 1]],
105
+ [-1, 1, Conv, [128, 3, 2]],
106
+ [[-1, -3, 63], 1, Concat, [1]],
107
+
108
+ [-1, 1, Conv, [256, 1, 1]],
109
+ [-2, 1, Conv, [256, 1, 1]],
110
+ [-1, 1, Conv, [128, 3, 1]],
111
+ [-1, 1, Conv, [128, 3, 1]],
112
+ [-1, 1, Conv, [128, 3, 1]],
113
+ [-1, 1, Conv, [128, 3, 1]],
114
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
115
+ [-1, 1, Conv, [256, 1, 1]], # 88
116
+
117
+ [-1, 1, MP, []],
118
+ [-1, 1, Conv, [256, 1, 1]],
119
+ [-3, 1, Conv, [256, 1, 1]],
120
+ [-1, 1, Conv, [256, 3, 2]],
121
+ [[-1, -3, 51], 1, Concat, [1]],
122
+
123
+ [-1, 1, Conv, [512, 1, 1]],
124
+ [-2, 1, Conv, [512, 1, 1]],
125
+ [-1, 1, Conv, [256, 3, 1]],
126
+ [-1, 1, Conv, [256, 3, 1]],
127
+ [-1, 1, Conv, [256, 3, 1]],
128
+ [-1, 1, Conv, [256, 3, 1]],
129
+ [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
130
+ [-1, 1, Conv, [512, 1, 1]], # 101
131
+
132
+ [75, 1, Conv, [256, 3, 1]],
133
+ [88, 1, Conv, [512, 3, 1]],
134
+ [101, 1, Conv, [1024, 3, 1]],
135
+
136
+ [[102, 103, 104], 1, Detect, [nc]], # Detect(P3, P4, P5)
137
+ ]
yolov9/models/detect/yolov9-c.yaml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ #activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # YOLOv9 backbone
14
+ backbone:
15
+ [
16
+ [-1, 1, Silence, []],
17
+
18
+ # conv down
19
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
20
+
21
+ # conv down
22
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
23
+
24
+ # elan-1 block
25
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3
26
+
27
+ # avg-conv down
28
+ [-1, 1, ADown, [256]], # 4-P3/8
29
+
30
+ # elan-2 block
31
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5
32
+
33
+ # avg-conv down
34
+ [-1, 1, ADown, [512]], # 6-P4/16
35
+
36
+ # elan-2 block
37
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7
38
+
39
+ # avg-conv down
40
+ [-1, 1, ADown, [512]], # 8-P5/32
41
+
42
+ # elan-2 block
43
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9
44
+ ]
45
+
46
+ # YOLOv9 head
47
+ head:
48
+ [
49
+ # elan-spp block
50
+ [-1, 1, SPPELAN, [512, 256]], # 10
51
+
52
+ # up-concat merge
53
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
54
+ [[-1, 7], 1, Concat, [1]], # cat backbone P4
55
+
56
+ # elan-2 block
57
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13
58
+
59
+ # up-concat merge
60
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
61
+ [[-1, 5], 1, Concat, [1]], # cat backbone P3
62
+
63
+ # elan-2 block
64
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small)
65
+
66
+ # avg-conv-down merge
67
+ [-1, 1, ADown, [256]],
68
+ [[-1, 13], 1, Concat, [1]], # cat head P4
69
+
70
+ # elan-2 block
71
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium)
72
+
73
+ # avg-conv-down merge
74
+ [-1, 1, ADown, [512]],
75
+ [[-1, 10], 1, Concat, [1]], # cat head P5
76
+
77
+ # elan-2 block
78
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large)
79
+
80
+
81
+ # multi-level reversible auxiliary branch
82
+
83
+ # routing
84
+ [5, 1, CBLinear, [[256]]], # 23
85
+ [7, 1, CBLinear, [[256, 512]]], # 24
86
+ [9, 1, CBLinear, [[256, 512, 512]]], # 25
87
+
88
+ # conv down
89
+ [0, 1, Conv, [64, 3, 2]], # 26-P1/2
90
+
91
+ # conv down
92
+ [-1, 1, Conv, [128, 3, 2]], # 27-P2/4
93
+
94
+ # elan-1 block
95
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28
96
+
97
+ # avg-conv down fuse
98
+ [-1, 1, ADown, [256]], # 29-P3/8
99
+ [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30
100
+
101
+ # elan-2 block
102
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31
103
+
104
+ # avg-conv down fuse
105
+ [-1, 1, ADown, [512]], # 32-P4/16
106
+ [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33
107
+
108
+ # elan-2 block
109
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34
110
+
111
+ # avg-conv down fuse
112
+ [-1, 1, ADown, [512]], # 35-P5/32
113
+ [[25, -1], 1, CBFuse, [[2]]], # 36
114
+
115
+ # elan-2 block
116
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37
117
+
118
+
119
+
120
+ # detection head
121
+
122
+ # detect
123
+ [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5)
124
+ ]
yolov9/models/detect/yolov9-e.yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ #activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # YOLOv9 backbone
14
+ backbone:
15
+ [
16
+ [-1, 1, Silence, []],
17
+
18
+ # conv down
19
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
20
+
21
+ # conv down
22
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
23
+
24
+ # csp-elan block
25
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3
26
+
27
+ # avg-conv down
28
+ [-1, 1, ADown, [256]], # 4-P3/8
29
+
30
+ # csp-elan block
31
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5
32
+
33
+ # avg-conv down
34
+ [-1, 1, ADown, [512]], # 6-P4/16
35
+
36
+ # csp-elan block
37
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7
38
+
39
+ # avg-conv down
40
+ [-1, 1, ADown, [1024]], # 8-P5/32
41
+
42
+ # csp-elan block
43
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9
44
+
45
+ # routing
46
+ [1, 1, CBLinear, [[64]]], # 10
47
+ [3, 1, CBLinear, [[64, 128]]], # 11
48
+ [5, 1, CBLinear, [[64, 128, 256]]], # 12
49
+ [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13
50
+ [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14
51
+
52
+ # conv down
53
+ [0, 1, Conv, [64, 3, 2]], # 15-P1/2
54
+ [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16
55
+
56
+ # conv down
57
+ [-1, 1, Conv, [128, 3, 2]], # 17-P2/4
58
+ [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18
59
+
60
+ # csp-elan block
61
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19
62
+
63
+ # avg-conv down fuse
64
+ [-1, 1, ADown, [256]], # 20-P3/8
65
+ [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21
66
+
67
+ # csp-elan block
68
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22
69
+
70
+ # avg-conv down fuse
71
+ [-1, 1, ADown, [512]], # 23-P4/16
72
+ [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24
73
+
74
+ # csp-elan block
75
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25
76
+
77
+ # avg-conv down fuse
78
+ [-1, 1, ADown, [1024]], # 26-P5/32
79
+ [[14, -1], 1, CBFuse, [[4]]], # 27
80
+
81
+ # csp-elan block
82
+ [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28
83
+ ]
84
+
85
+ # YOLOv9 head
86
+ head:
87
+ [
88
+ # multi-level auxiliary branch
89
+
90
+ # elan-spp block
91
+ [9, 1, SPPELAN, [512, 256]], # 29
92
+
93
+ # up-concat merge
94
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
95
+ [[-1, 7], 1, Concat, [1]], # cat backbone P4
96
+
97
+ # csp-elan block
98
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32
99
+
100
+ # up-concat merge
101
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102
+ [[-1, 5], 1, Concat, [1]], # cat backbone P3
103
+
104
+ # csp-elan block
105
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35
106
+
107
+
108
+
109
+ # main branch
110
+
111
+ # elan-spp block
112
+ [28, 1, SPPELAN, [512, 256]], # 36
113
+
114
+ # up-concat merge
115
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
116
+ [[-1, 25], 1, Concat, [1]], # cat backbone P4
117
+
118
+ # csp-elan block
119
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39
120
+
121
+ # up-concat merge
122
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
123
+ [[-1, 22], 1, Concat, [1]], # cat backbone P3
124
+
125
+ # csp-elan block
126
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small)
127
+
128
+ # avg-conv-down merge
129
+ [-1, 1, ADown, [256]],
130
+ [[-1, 39], 1, Concat, [1]], # cat head P4
131
+
132
+ # csp-elan block
133
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium)
134
+
135
+ # avg-conv-down merge
136
+ [-1, 1, ADown, [512]],
137
+ [[-1, 36], 1, Concat, [1]], # cat head P5
138
+
139
+ # csp-elan block
140
+ [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large)
141
+
142
+ # detect
143
+ [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5)
144
+ ]
yolov9/models/detect/yolov9.yaml ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ #activation: nn.LeakyReLU(0.1)
8
+ activation: nn.ReLU()
9
+
10
+ # anchors
11
+ anchors: 3
12
+
13
+ # YOLOv9 backbone
14
+ backbone:
15
+ [
16
+ [-1, 1, Silence, []],
17
+
18
+ # conv down
19
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
20
+
21
+ # conv down
22
+ [-1, 1, Conv, [128, 3, 2]], # 2-P2/4
23
+
24
+ # elan-1 block
25
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3
26
+
27
+ # conv down
28
+ [-1, 1, Conv, [256, 3, 2]], # 4-P3/8
29
+
30
+ # elan-2 block
31
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5
32
+
33
+ # conv down
34
+ [-1, 1, Conv, [512, 3, 2]], # 6-P4/16
35
+
36
+ # elan-2 block
37
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7
38
+
39
+ # conv down
40
+ [-1, 1, Conv, [512, 3, 2]], # 8-P5/32
41
+
42
+ # elan-2 block
43
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9
44
+ ]
45
+
46
+ # YOLOv9 head
47
+ head:
48
+ [
49
+ # elan-spp block
50
+ [-1, 1, SPPELAN, [512, 256]], # 10
51
+
52
+ # up-concat merge
53
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
54
+ [[-1, 7], 1, Concat, [1]], # cat backbone P4
55
+
56
+ # elan-2 block
57
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13
58
+
59
+ # up-concat merge
60
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
61
+ [[-1, 5], 1, Concat, [1]], # cat backbone P3
62
+
63
+ # elan-2 block
64
+ [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small)
65
+
66
+ # conv-down merge
67
+ [-1, 1, Conv, [256, 3, 2]],
68
+ [[-1, 13], 1, Concat, [1]], # cat head P4
69
+
70
+ # elan-2 block
71
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium)
72
+
73
+ # conv-down merge
74
+ [-1, 1, Conv, [512, 3, 2]],
75
+ [[-1, 10], 1, Concat, [1]], # cat head P5
76
+
77
+ # elan-2 block
78
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large)
79
+
80
+ # routing
81
+ [5, 1, CBLinear, [[256]]], # 23
82
+ [7, 1, CBLinear, [[256, 512]]], # 24
83
+ [9, 1, CBLinear, [[256, 512, 512]]], # 25
84
+
85
+ # conv down
86
+ [0, 1, Conv, [64, 3, 2]], # 26-P1/2
87
+
88
+ # conv down
89
+ [-1, 1, Conv, [128, 3, 2]], # 27-P2/4
90
+
91
+ # elan-1 block
92
+ [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28
93
+
94
+ # conv down fuse
95
+ [-1, 1, Conv, [256, 3, 2]], # 29-P3/8
96
+ [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30
97
+
98
+ # elan-2 block
99
+ [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31
100
+
101
+ # conv down fuse
102
+ [-1, 1, Conv, [512, 3, 2]], # 32-P4/16
103
+ [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33
104
+
105
+ # elan-2 block
106
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34
107
+
108
+ # conv down fuse
109
+ [-1, 1, Conv, [512, 3, 2]], # 35-P5/32
110
+ [[25, -1], 1, CBFuse, [[2]]], # 36
111
+
112
+ # elan-2 block
113
+ [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37
114
+
115
+ # detect
116
+ [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5)
117
+ ]
yolov9/models/experimental.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from utils.downloads import attempt_download
8
+
9
+
10
+ class Sum(nn.Module):
11
+ # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
12
+ def __init__(self, n, weight=False): # n: number of inputs
13
+ super().__init__()
14
+ self.weight = weight # apply weights boolean
15
+ self.iter = range(n - 1) # iter object
16
+ if weight:
17
+ self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
18
+
19
+ def forward(self, x):
20
+ y = x[0] # no weight
21
+ if self.weight:
22
+ w = torch.sigmoid(self.w) * 2
23
+ for i in self.iter:
24
+ y = y + x[i + 1] * w[i]
25
+ else:
26
+ for i in self.iter:
27
+ y = y + x[i + 1]
28
+ return y
29
+
30
+
31
+ class MixConv2d(nn.Module):
32
+ # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
33
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
34
+ super().__init__()
35
+ n = len(k) # number of convolutions
36
+ if equal_ch: # equal c_ per group
37
+ i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
38
+ c_ = [(i == g).sum() for g in range(n)] # intermediate channels
39
+ else: # equal weight.numel() per group
40
+ b = [c2] + [0] * n
41
+ a = np.eye(n + 1, n, k=-1)
42
+ a -= np.roll(a, 1, axis=1)
43
+ a *= np.array(k) ** 2
44
+ a[0] = 1
45
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
46
+
47
+ self.m = nn.ModuleList([
48
+ nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
49
+ self.bn = nn.BatchNorm2d(c2)
50
+ self.act = nn.SiLU()
51
+
52
+ def forward(self, x):
53
+ return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
54
+
55
+
56
+ class Ensemble(nn.ModuleList):
57
+ # Ensemble of models
58
+ def __init__(self):
59
+ super().__init__()
60
+
61
+ def forward(self, x, augment=False, profile=False, visualize=False):
62
+ y = [module(x, augment, profile, visualize)[0] for module in self]
63
+ # y = torch.stack(y).max(0)[0] # max ensemble
64
+ # y = torch.stack(y).mean(0) # mean ensemble
65
+ y = torch.cat(y, 1) # nms ensemble
66
+ return y, None # inference, train output
67
+
68
+
69
+ class ORT_NMS(torch.autograd.Function):
70
+ '''ONNX-Runtime NMS operation'''
71
+ @staticmethod
72
+ def forward(ctx,
73
+ boxes,
74
+ scores,
75
+ max_output_boxes_per_class=torch.tensor([100]),
76
+ iou_threshold=torch.tensor([0.45]),
77
+ score_threshold=torch.tensor([0.25])):
78
+ device = boxes.device
79
+ batch = scores.shape[0]
80
+ num_det = random.randint(0, 100)
81
+ batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
82
+ idxs = torch.arange(100, 100 + num_det).to(device)
83
+ zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
84
+ selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
85
+ selected_indices = selected_indices.to(torch.int64)
86
+ return selected_indices
87
+
88
+ @staticmethod
89
+ def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
90
+ return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
91
+
92
+
93
+ class TRT_NMS(torch.autograd.Function):
94
+ '''TensorRT NMS operation'''
95
+ @staticmethod
96
+ def forward(
97
+ ctx,
98
+ boxes,
99
+ scores,
100
+ background_class=-1,
101
+ box_coding=1,
102
+ iou_threshold=0.45,
103
+ max_output_boxes=100,
104
+ plugin_version="1",
105
+ score_activation=0,
106
+ score_threshold=0.25,
107
+ ):
108
+
109
+ batch_size, num_boxes, num_classes = scores.shape
110
+ num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
111
+ det_boxes = torch.randn(batch_size, max_output_boxes, 4)
112
+ det_scores = torch.randn(batch_size, max_output_boxes)
113
+ det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
114
+ return num_det, det_boxes, det_scores, det_classes
115
+
116
+ @staticmethod
117
+ def symbolic(g,
118
+ boxes,
119
+ scores,
120
+ background_class=-1,
121
+ box_coding=1,
122
+ iou_threshold=0.45,
123
+ max_output_boxes=100,
124
+ plugin_version="1",
125
+ score_activation=0,
126
+ score_threshold=0.25):
127
+ out = g.op("TRT::EfficientNMS_TRT",
128
+ boxes,
129
+ scores,
130
+ background_class_i=background_class,
131
+ box_coding_i=box_coding,
132
+ iou_threshold_f=iou_threshold,
133
+ max_output_boxes_i=max_output_boxes,
134
+ plugin_version_s=plugin_version,
135
+ score_activation_i=score_activation,
136
+ score_threshold_f=score_threshold,
137
+ outputs=4)
138
+ nums, boxes, scores, classes = out
139
+ return nums, boxes, scores, classes
140
+
141
+
142
+ class ONNX_ORT(nn.Module):
143
+ '''onnx module with ONNX-Runtime NMS operation.'''
144
+ def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None, n_classes=80):
145
+ super().__init__()
146
+ self.device = device if device else torch.device("cpu")
147
+ self.max_obj = torch.tensor([max_obj]).to(device)
148
+ self.iou_threshold = torch.tensor([iou_thres]).to(device)
149
+ self.score_threshold = torch.tensor([score_thres]).to(device)
150
+ self.max_wh = max_wh # if max_wh != 0 : non-agnostic else : agnostic
151
+ self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
152
+ dtype=torch.float32,
153
+ device=self.device)
154
+ self.n_classes=n_classes
155
+
156
+ def forward(self, x):
157
+ ## https://github.com/thaitc-hust/yolov9-tensorrt/blob/main/torch2onnx.py
158
+ ## thanks https://github.com/thaitc-hust
159
+ if isinstance(x, list): ## yolov9-c.pt and yolov9-e.pt return list
160
+ x = x[1]
161
+ x = x.permute(0, 2, 1)
162
+ bboxes_x = x[..., 0:1]
163
+ bboxes_y = x[..., 1:2]
164
+ bboxes_w = x[..., 2:3]
165
+ bboxes_h = x[..., 3:4]
166
+ bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1)
167
+ bboxes = bboxes.unsqueeze(2) # [n_batch, n_bboxes, 4] -> [n_batch, n_bboxes, 1, 4]
168
+ obj_conf = x[..., 4:]
169
+ scores = obj_conf
170
+ bboxes @= self.convert_matrix
171
+ max_score, category_id = scores.max(2, keepdim=True)
172
+ dis = category_id.float() * self.max_wh
173
+ nmsbox = bboxes + dis
174
+ max_score_tp = max_score.transpose(1, 2).contiguous()
175
+ selected_indices = ORT_NMS.apply(nmsbox, max_score_tp, self.max_obj, self.iou_threshold, self.score_threshold)
176
+ X, Y = selected_indices[:, 0], selected_indices[:, 2]
177
+ selected_boxes = bboxes[X, Y, :]
178
+ selected_categories = category_id[X, Y, :].float()
179
+ selected_scores = max_score[X, Y, :]
180
+ X = X.unsqueeze(1).float()
181
+ return torch.cat([X, selected_boxes, selected_categories, selected_scores], 1)
182
+
183
+
184
+ class ONNX_TRT(nn.Module):
185
+ '''onnx module with TensorRT NMS operation.'''
186
+ def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None, n_classes=80):
187
+ super().__init__()
188
+ assert max_wh is None
189
+ self.device = device if device else torch.device('cpu')
190
+ self.background_class = -1,
191
+ self.box_coding = 1,
192
+ self.iou_threshold = iou_thres
193
+ self.max_obj = max_obj
194
+ self.plugin_version = '1'
195
+ self.score_activation = 0
196
+ self.score_threshold = score_thres
197
+ self.n_classes=n_classes
198
+
199
+ def forward(self, x):
200
+ ## https://github.com/thaitc-hust/yolov9-tensorrt/blob/main/torch2onnx.py
201
+ ## thanks https://github.com/thaitc-hust
202
+ if isinstance(x, list): ## yolov9-c.pt and yolov9-e.pt return list
203
+ x = x[1]
204
+ x = x.permute(0, 2, 1)
205
+ bboxes_x = x[..., 0:1]
206
+ bboxes_y = x[..., 1:2]
207
+ bboxes_w = x[..., 2:3]
208
+ bboxes_h = x[..., 3:4]
209
+ bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1)
210
+ bboxes = bboxes.unsqueeze(2) # [n_batch, n_bboxes, 4] -> [n_batch, n_bboxes, 1, 4]
211
+ obj_conf = x[..., 4:]
212
+ scores = obj_conf
213
+ num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(bboxes, scores, self.background_class, self.box_coding,
214
+ self.iou_threshold, self.max_obj,
215
+ self.plugin_version, self.score_activation,
216
+ self.score_threshold)
217
+ return num_det, det_boxes, det_scores, det_classes
218
+
219
+ class End2End(nn.Module):
220
+ '''export onnx or tensorrt model with NMS operation.'''
221
+ def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, n_classes=80):
222
+ super().__init__()
223
+ device = device if device else torch.device('cpu')
224
+ assert isinstance(max_wh,(int)) or max_wh is None
225
+ self.model = model.to(device)
226
+ self.model.model[-1].end2end = True
227
+ self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
228
+ self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device, n_classes)
229
+ self.end2end.eval()
230
+
231
+ def forward(self, x):
232
+ x = self.model(x)
233
+ x = self.end2end(x)
234
+ return x
235
+
236
+
237
+ def attempt_load(weights, device=None, inplace=True, fuse=True):
238
+ # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
239
+ from models.yolo import Detect, Model
240
+
241
+ model = Ensemble()
242
+ for w in weights if isinstance(weights, list) else [weights]:
243
+ ckpt = torch.load(attempt_download(w), map_location='cpu') # load
244
+ ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
245
+
246
+ # Model compatibility updates
247
+ if not hasattr(ckpt, 'stride'):
248
+ ckpt.stride = torch.tensor([32.])
249
+ if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
250
+ ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
251
+
252
+ model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
253
+
254
+ # Module compatibility updates
255
+ for m in model.modules():
256
+ t = type(m)
257
+ if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
258
+ m.inplace = inplace # torch 1.7.0 compatibility
259
+ # if t is Detect and not isinstance(m.anchor_grid, list):
260
+ # delattr(m, 'anchor_grid')
261
+ # setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
262
+ elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
263
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
264
+
265
+ # Return model
266
+ if len(model) == 1:
267
+ return model[-1]
268
+
269
+ # Return detection ensemble
270
+ print(f'Ensemble created with {weights}\n')
271
+ for k in 'names', 'nc', 'yaml':
272
+ setattr(model, k, getattr(model[0], k))
273
+ model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
274
+ assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
275
+ return model