Spaces:
Sleeping
Sleeping
first commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LICENSE +427 -0
- README.md +38 -0
- annotator/openpose/__init__.py +73 -0
- annotator/openpose/body.py +219 -0
- annotator/openpose/hand.py +86 -0
- annotator/openpose/model.py +219 -0
- annotator/openpose/util.py +163 -0
- annotator/segm/__init__.py +162 -0
- annotator/segm/modules/__init__.py +5 -0
- annotator/segm/modules/bn.py +132 -0
- annotator/segm/modules/deeplab.py +84 -0
- annotator/segm/modules/dense.py +42 -0
- annotator/segm/modules/functions.py +244 -0
- annotator/segm/modules/misc.py +21 -0
- annotator/segm/modules/residual.py +182 -0
- annotator/segm/modules/src/checks.h +15 -0
- annotator/segm/modules/src/inplace_abn.cpp +95 -0
- annotator/segm/modules/src/inplace_abn.h +88 -0
- annotator/segm/modules/src/inplace_abn_cpu.cpp +119 -0
- annotator/segm/modules/src/inplace_abn_cuda.cu +333 -0
- annotator/segm/modules/src/inplace_abn_cuda_half.cu +275 -0
- annotator/segm/modules/src/utils/checks.h +15 -0
- annotator/segm/modules/src/utils/common.h +49 -0
- annotator/segm/modules/src/utils/cuda.cuh +71 -0
- annotator/segm/networks/AugmentCE2P.py +337 -0
- annotator/segm/networks/__init__.py +13 -0
- annotator/segm/networks/backbone/mobilenetv2.py +156 -0
- annotator/segm/networks/backbone/resnet.py +205 -0
- annotator/segm/networks/backbone/resnext.py +149 -0
- annotator/segm/networks/context_encoding/aspp.py +64 -0
- annotator/segm/networks/context_encoding/ocnet.py +226 -0
- annotator/segm/networks/context_encoding/psp.py +48 -0
- annotator/segm/transforms.py +167 -0
- annotator/util.py +49 -0
- app.py +475 -0
- app_files/default_images/mask.png +0 -0
- app_files/default_images/pose.png +0 -0
- app_files/default_images/ref.png +0 -0
- app_files/samples/pose/MEN/full_1.png +0 -0
- app_files/samples/pose/MEN/full_2.png +0 -0
- app_files/samples/pose/MEN/half_back.png +0 -0
- app_files/samples/pose/MEN/half_front.png +0 -0
- app_files/samples/pose/MEN/half_left.png +0 -0
- app_files/samples/pose/WOMEN/pose_0.png +0 -0
- app_files/samples/pose/WOMEN/pose_1.png +0 -0
- app_files/samples/pose/WOMEN/pose_2.png +0 -0
- app_files/samples/pose/WOMEN/pose_3.png +0 -0
- app_files/samples/pose/WOMEN/pose_4.png +0 -0
- app_files/samples/pose/WOMEN/pose_5.png +0 -0
- app_files/samples/pose/WOMEN/pose_6.png +0 -0
LICENSE
ADDED
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CC BY-SA 4.0 DEED
|
2 |
+
Attribution-ShareAlike 4.0 International
|
3 |
+
=======================================================================
|
4 |
+
|
5 |
+
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
6 |
+
does not provide legal services or legal advice. Distribution of
|
7 |
+
Creative Commons public licenses does not create a lawyer-client or
|
8 |
+
other relationship. Creative Commons makes its licenses and related
|
9 |
+
information available on an "as-is" basis. Creative Commons gives no
|
10 |
+
warranties regarding its licenses, any material licensed under their
|
11 |
+
terms and conditions, or any related information. Creative Commons
|
12 |
+
disclaims all liability for damages resulting from their use to the
|
13 |
+
fullest extent possible.
|
14 |
+
|
15 |
+
Using Creative Commons Public Licenses
|
16 |
+
|
17 |
+
Creative Commons public licenses provide a standard set of terms and
|
18 |
+
conditions that creators and other rights holders may use to share
|
19 |
+
original works of authorship and other material subject to copyright
|
20 |
+
and certain other rights specified in the public license below. The
|
21 |
+
following considerations are for informational purposes only, are not
|
22 |
+
exhaustive, and do not form part of our licenses.
|
23 |
+
|
24 |
+
Considerations for licensors: Our public licenses are
|
25 |
+
intended for use by those authorized to give the public
|
26 |
+
permission to use material in ways otherwise restricted by
|
27 |
+
copyright and certain other rights. Our licenses are
|
28 |
+
irrevocable. Licensors should read and understand the terms
|
29 |
+
and conditions of the license they choose before applying it.
|
30 |
+
Licensors should also secure all rights necessary before
|
31 |
+
applying our licenses so that the public can reuse the
|
32 |
+
material as expected. Licensors should clearly mark any
|
33 |
+
material not subject to the license. This includes other CC-
|
34 |
+
licensed material, or material used under an exception or
|
35 |
+
limitation to copyright. More considerations for licensors:
|
36 |
+
wiki.creativecommons.org/Considerations_for_licensors
|
37 |
+
|
38 |
+
Considerations for the public: By using one of our public
|
39 |
+
licenses, a licensor grants the public permission to use the
|
40 |
+
licensed material under specified terms and conditions. If
|
41 |
+
the licensor's permission is not necessary for any reason--for
|
42 |
+
example, because of any applicable exception or limitation to
|
43 |
+
copyright--then that use is not regulated by the license. Our
|
44 |
+
licenses grant only permissions under copyright and certain
|
45 |
+
other rights that a licensor has authority to grant. Use of
|
46 |
+
the licensed material may still be restricted for other
|
47 |
+
reasons, including because others have copyright or other
|
48 |
+
rights in the material. A licensor may make special requests,
|
49 |
+
such as asking that all changes be marked or described.
|
50 |
+
Although not required by our licenses, you are encouraged to
|
51 |
+
respect those requests where reasonable. More considerations
|
52 |
+
for the public:
|
53 |
+
wiki.creativecommons.org/Considerations_for_licensees
|
54 |
+
|
55 |
+
=======================================================================
|
56 |
+
|
57 |
+
Creative Commons Attribution-ShareAlike 4.0 International Public
|
58 |
+
License
|
59 |
+
|
60 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
61 |
+
to be bound by the terms and conditions of this Creative Commons
|
62 |
+
Attribution-ShareAlike 4.0 International Public License ("Public
|
63 |
+
License"). To the extent this Public License may be interpreted as a
|
64 |
+
contract, You are granted the Licensed Rights in consideration of Your
|
65 |
+
acceptance of these terms and conditions, and the Licensor grants You
|
66 |
+
such rights in consideration of benefits the Licensor receives from
|
67 |
+
making the Licensed Material available under these terms and
|
68 |
+
conditions.
|
69 |
+
|
70 |
+
|
71 |
+
Section 1 -- Definitions.
|
72 |
+
|
73 |
+
a. Adapted Material means material subject to Copyright and Similar
|
74 |
+
Rights that is derived from or based upon the Licensed Material
|
75 |
+
and in which the Licensed Material is translated, altered,
|
76 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
77 |
+
permission under the Copyright and Similar Rights held by the
|
78 |
+
Licensor. For purposes of this Public License, where the Licensed
|
79 |
+
Material is a musical work, performance, or sound recording,
|
80 |
+
Adapted Material is always produced where the Licensed Material is
|
81 |
+
synched in timed relation with a moving image.
|
82 |
+
|
83 |
+
b. Adapter's License means the license You apply to Your Copyright
|
84 |
+
and Similar Rights in Your contributions to Adapted Material in
|
85 |
+
accordance with the terms and conditions of this Public License.
|
86 |
+
|
87 |
+
c. BY-SA Compatible License means a license listed at
|
88 |
+
creativecommons.org/compatiblelicenses, approved by Creative
|
89 |
+
Commons as essentially the equivalent of this Public License.
|
90 |
+
|
91 |
+
d. Copyright and Similar Rights means copyright and/or similar rights
|
92 |
+
closely related to copyright including, without limitation,
|
93 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
94 |
+
Rights, without regard to how the rights are labeled or
|
95 |
+
categorized. For purposes of this Public License, the rights
|
96 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
97 |
+
Rights.
|
98 |
+
|
99 |
+
e. Effective Technological Measures means those measures that, in the
|
100 |
+
absence of proper authority, may not be circumvented under laws
|
101 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
102 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
103 |
+
agreements.
|
104 |
+
|
105 |
+
f. Exceptions and Limitations means fair use, fair dealing, and/or
|
106 |
+
any other exception or limitation to Copyright and Similar Rights
|
107 |
+
that applies to Your use of the Licensed Material.
|
108 |
+
|
109 |
+
g. License Elements means the license attributes listed in the name
|
110 |
+
of a Creative Commons Public License. The License Elements of this
|
111 |
+
Public License are Attribution and ShareAlike.
|
112 |
+
|
113 |
+
h. Licensed Material means the artistic or literary work, database,
|
114 |
+
or other material to which the Licensor applied this Public
|
115 |
+
License.
|
116 |
+
|
117 |
+
i. Licensed Rights means the rights granted to You subject to the
|
118 |
+
terms and conditions of this Public License, which are limited to
|
119 |
+
all Copyright and Similar Rights that apply to Your use of the
|
120 |
+
Licensed Material and that the Licensor has authority to license.
|
121 |
+
|
122 |
+
j. Licensor means the individual(s) or entity(ies) granting rights
|
123 |
+
under this Public License.
|
124 |
+
|
125 |
+
k. Share means to provide material to the public by any means or
|
126 |
+
process that requires permission under the Licensed Rights, such
|
127 |
+
as reproduction, public display, public performance, distribution,
|
128 |
+
dissemination, communication, or importation, and to make material
|
129 |
+
available to the public including in ways that members of the
|
130 |
+
public may access the material from a place and at a time
|
131 |
+
individually chosen by them.
|
132 |
+
|
133 |
+
l. Sui Generis Database Rights means rights other than copyright
|
134 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
135 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
136 |
+
as amended and/or succeeded, as well as other essentially
|
137 |
+
equivalent rights anywhere in the world.
|
138 |
+
|
139 |
+
m. You means the individual or entity exercising the Licensed Rights
|
140 |
+
under this Public License. Your has a corresponding meaning.
|
141 |
+
|
142 |
+
|
143 |
+
Section 2 -- Scope.
|
144 |
+
|
145 |
+
a. License grant.
|
146 |
+
|
147 |
+
1. Subject to the terms and conditions of this Public License,
|
148 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
149 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
150 |
+
exercise the Licensed Rights in the Licensed Material to:
|
151 |
+
|
152 |
+
a. reproduce and Share the Licensed Material, in whole or
|
153 |
+
in part; and
|
154 |
+
|
155 |
+
b. produce, reproduce, and Share Adapted Material.
|
156 |
+
|
157 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
158 |
+
Exceptions and Limitations apply to Your use, this Public
|
159 |
+
License does not apply, and You do not need to comply with
|
160 |
+
its terms and conditions.
|
161 |
+
|
162 |
+
3. Term. The term of this Public License is specified in Section
|
163 |
+
6(a).
|
164 |
+
|
165 |
+
4. Media and formats; technical modifications allowed. The
|
166 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
167 |
+
all media and formats whether now known or hereafter created,
|
168 |
+
and to make technical modifications necessary to do so. The
|
169 |
+
Licensor waives and/or agrees not to assert any right or
|
170 |
+
authority to forbid You from making technical modifications
|
171 |
+
necessary to exercise the Licensed Rights, including
|
172 |
+
technical modifications necessary to circumvent Effective
|
173 |
+
Technological Measures. For purposes of this Public License,
|
174 |
+
simply making modifications authorized by this Section 2(a)
|
175 |
+
(4) never produces Adapted Material.
|
176 |
+
|
177 |
+
5. Downstream recipients.
|
178 |
+
|
179 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
180 |
+
recipient of the Licensed Material automatically
|
181 |
+
receives an offer from the Licensor to exercise the
|
182 |
+
Licensed Rights under the terms and conditions of this
|
183 |
+
Public License.
|
184 |
+
|
185 |
+
b. Additional offer from the Licensor -- Adapted Material.
|
186 |
+
Every recipient of Adapted Material from You
|
187 |
+
automatically receives an offer from the Licensor to
|
188 |
+
exercise the Licensed Rights in the Adapted Material
|
189 |
+
under the conditions of the Adapter's License You apply.
|
190 |
+
|
191 |
+
c. No downstream restrictions. You may not offer or impose
|
192 |
+
any additional or different terms or conditions on, or
|
193 |
+
apply any Effective Technological Measures to, the
|
194 |
+
Licensed Material if doing so restricts exercise of the
|
195 |
+
Licensed Rights by any recipient of the Licensed
|
196 |
+
Material.
|
197 |
+
|
198 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
199 |
+
may be construed as permission to assert or imply that You
|
200 |
+
are, or that Your use of the Licensed Material is, connected
|
201 |
+
with, or sponsored, endorsed, or granted official status by,
|
202 |
+
the Licensor or others designated to receive attribution as
|
203 |
+
provided in Section 3(a)(1)(A)(i).
|
204 |
+
|
205 |
+
b. Other rights.
|
206 |
+
|
207 |
+
1. Moral rights, such as the right of integrity, are not
|
208 |
+
licensed under this Public License, nor are publicity,
|
209 |
+
privacy, and/or other similar personality rights; however, to
|
210 |
+
the extent possible, the Licensor waives and/or agrees not to
|
211 |
+
assert any such rights held by the Licensor to the limited
|
212 |
+
extent necessary to allow You to exercise the Licensed
|
213 |
+
Rights, but not otherwise.
|
214 |
+
|
215 |
+
2. Patent and trademark rights are not licensed under this
|
216 |
+
Public License.
|
217 |
+
|
218 |
+
3. To the extent possible, the Licensor waives any right to
|
219 |
+
collect royalties from You for the exercise of the Licensed
|
220 |
+
Rights, whether directly or through a collecting society
|
221 |
+
under any voluntary or waivable statutory or compulsory
|
222 |
+
licensing scheme. In all other cases the Licensor expressly
|
223 |
+
reserves any right to collect such royalties.
|
224 |
+
|
225 |
+
|
226 |
+
Section 3 -- License Conditions.
|
227 |
+
|
228 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
229 |
+
following conditions.
|
230 |
+
|
231 |
+
a. Attribution.
|
232 |
+
|
233 |
+
1. If You Share the Licensed Material (including in modified
|
234 |
+
form), You must:
|
235 |
+
|
236 |
+
a. retain the following if it is supplied by the Licensor
|
237 |
+
with the Licensed Material:
|
238 |
+
|
239 |
+
i. identification of the creator(s) of the Licensed
|
240 |
+
Material and any others designated to receive
|
241 |
+
attribution, in any reasonable manner requested by
|
242 |
+
the Licensor (including by pseudonym if
|
243 |
+
designated);
|
244 |
+
|
245 |
+
ii. a copyright notice;
|
246 |
+
|
247 |
+
iii. a notice that refers to this Public License;
|
248 |
+
|
249 |
+
iv. a notice that refers to the disclaimer of
|
250 |
+
warranties;
|
251 |
+
|
252 |
+
v. a URI or hyperlink to the Licensed Material to the
|
253 |
+
extent reasonably practicable;
|
254 |
+
|
255 |
+
b. indicate if You modified the Licensed Material and
|
256 |
+
retain an indication of any previous modifications; and
|
257 |
+
|
258 |
+
c. indicate the Licensed Material is licensed under this
|
259 |
+
Public License, and include the text of, or the URI or
|
260 |
+
hyperlink to, this Public License.
|
261 |
+
|
262 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
263 |
+
reasonable manner based on the medium, means, and context in
|
264 |
+
which You Share the Licensed Material. For example, it may be
|
265 |
+
reasonable to satisfy the conditions by providing a URI or
|
266 |
+
hyperlink to a resource that includes the required
|
267 |
+
information.
|
268 |
+
|
269 |
+
3. If requested by the Licensor, You must remove any of the
|
270 |
+
information required by Section 3(a)(1)(A) to the extent
|
271 |
+
reasonably practicable.
|
272 |
+
|
273 |
+
b. ShareAlike.
|
274 |
+
|
275 |
+
In addition to the conditions in Section 3(a), if You Share
|
276 |
+
Adapted Material You produce, the following conditions also apply.
|
277 |
+
|
278 |
+
1. The Adapter's License You apply must be a Creative Commons
|
279 |
+
license with the same License Elements, this version or
|
280 |
+
later, or a BY-SA Compatible License.
|
281 |
+
|
282 |
+
2. You must include the text of, or the URI or hyperlink to, the
|
283 |
+
Adapter's License You apply. You may satisfy this condition
|
284 |
+
in any reasonable manner based on the medium, means, and
|
285 |
+
context in which You Share Adapted Material.
|
286 |
+
|
287 |
+
3. You may not offer or impose any additional or different terms
|
288 |
+
or conditions on, or apply any Effective Technological
|
289 |
+
Measures to, Adapted Material that restrict exercise of the
|
290 |
+
rights granted under the Adapter's License You apply.
|
291 |
+
|
292 |
+
|
293 |
+
Section 4 -- Sui Generis Database Rights.
|
294 |
+
|
295 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
296 |
+
apply to Your use of the Licensed Material:
|
297 |
+
|
298 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
299 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
300 |
+
portion of the contents of the database;
|
301 |
+
|
302 |
+
b. if You include all or a substantial portion of the database
|
303 |
+
contents in a database in which You have Sui Generis Database
|
304 |
+
Rights, then the database in which You have Sui Generis Database
|
305 |
+
Rights (but not its individual contents) is Adapted Material,
|
306 |
+
including for purposes of Section 3(b); and
|
307 |
+
|
308 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
309 |
+
all or a substantial portion of the contents of the database.
|
310 |
+
|
311 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
312 |
+
replace Your obligations under this Public License where the Licensed
|
313 |
+
Rights include other Copyright and Similar Rights.
|
314 |
+
|
315 |
+
|
316 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
317 |
+
|
318 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
319 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
320 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
321 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
322 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
323 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
324 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
325 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
326 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
327 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
328 |
+
|
329 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
330 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
331 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
332 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
333 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
334 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
335 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
336 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
337 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
338 |
+
|
339 |
+
c. The disclaimer of warranties and limitation of liability provided
|
340 |
+
above shall be interpreted in a manner that, to the extent
|
341 |
+
possible, most closely approximates an absolute disclaimer and
|
342 |
+
waiver of all liability.
|
343 |
+
|
344 |
+
|
345 |
+
Section 6 -- Term and Termination.
|
346 |
+
|
347 |
+
a. This Public License applies for the term of the Copyright and
|
348 |
+
Similar Rights licensed here. However, if You fail to comply with
|
349 |
+
this Public License, then Your rights under this Public License
|
350 |
+
terminate automatically.
|
351 |
+
|
352 |
+
b. Where Your right to use the Licensed Material has terminated under
|
353 |
+
Section 6(a), it reinstates:
|
354 |
+
|
355 |
+
1. automatically as of the date the violation is cured, provided
|
356 |
+
it is cured within 30 days of Your discovery of the
|
357 |
+
violation; or
|
358 |
+
|
359 |
+
2. upon express reinstatement by the Licensor.
|
360 |
+
|
361 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
362 |
+
right the Licensor may have to seek remedies for Your violations
|
363 |
+
of this Public License.
|
364 |
+
|
365 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
366 |
+
Licensed Material under separate terms or conditions or stop
|
367 |
+
distributing the Licensed Material at any time; however, doing so
|
368 |
+
will not terminate this Public License.
|
369 |
+
|
370 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
371 |
+
License.
|
372 |
+
|
373 |
+
|
374 |
+
Section 7 -- Other Terms and Conditions.
|
375 |
+
|
376 |
+
a. The Licensor shall not be bound by any additional or different
|
377 |
+
terms or conditions communicated by You unless expressly agreed.
|
378 |
+
|
379 |
+
b. Any arrangements, understandings, or agreements regarding the
|
380 |
+
Licensed Material not stated herein are separate from and
|
381 |
+
independent of the terms and conditions of this Public License.
|
382 |
+
|
383 |
+
|
384 |
+
Section 8 -- Interpretation.
|
385 |
+
|
386 |
+
a. For the avoidance of doubt, this Public License does not, and
|
387 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
388 |
+
conditions on any use of the Licensed Material that could lawfully
|
389 |
+
be made without permission under this Public License.
|
390 |
+
|
391 |
+
b. To the extent possible, if any provision of this Public License is
|
392 |
+
deemed unenforceable, it shall be automatically reformed to the
|
393 |
+
minimum extent necessary to make it enforceable. If the provision
|
394 |
+
cannot be reformed, it shall be severed from this Public License
|
395 |
+
without affecting the enforceability of the remaining terms and
|
396 |
+
conditions.
|
397 |
+
|
398 |
+
c. No term or condition of this Public License will be waived and no
|
399 |
+
failure to comply consented to unless expressly agreed to by the
|
400 |
+
Licensor.
|
401 |
+
|
402 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
403 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
404 |
+
that apply to the Licensor or You, including from the legal
|
405 |
+
processes of any jurisdiction or authority.
|
406 |
+
|
407 |
+
|
408 |
+
=======================================================================
|
409 |
+
|
410 |
+
Creative Commons is not a party to its public
|
411 |
+
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
412 |
+
its public licenses to material it publishes and in those instances
|
413 |
+
will be considered the “Licensor.†The text of the Creative Commons
|
414 |
+
public licenses is dedicated to the public domain under the CC0 Public
|
415 |
+
Domain Dedication. Except for the limited purpose of indicating that
|
416 |
+
material is shared under a Creative Commons public license or as
|
417 |
+
otherwise permitted by the Creative Commons policies published at
|
418 |
+
creativecommons.org/policies, Creative Commons does not authorize the
|
419 |
+
use of the trademark "Creative Commons" or any other trademark or logo
|
420 |
+
of Creative Commons without its prior written consent including,
|
421 |
+
without limitation, in connection with any unauthorized modifications
|
422 |
+
to any of its public licenses or any other arrangements,
|
423 |
+
understandings, or agreements concerning use of licensed material. For
|
424 |
+
the avoidance of doubt, this paragraph does not form part of the
|
425 |
+
public licenses.
|
426 |
+
|
427 |
+
Creative Commons may be contacted at creativecommons.org.
|
README.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## *ViscoNet*: Bridging and Harmonizing Visual and Textual Conditioning for ControlNet
|
2 |
+
[Soon Yau Cheong](https://scholar.google.com/citations?user=dRot7GUAAAAJ&hl=en)
|
3 |
+
[Armin Mustafa](https://scholar.google.com/citations?user=0xOHqkMAAAAJ&hl=en)
|
4 |
+
[Andrew Gilbert](https://scholar.google.com/citations?user=NNhnVwoAAAAJ&hl=en)
|
5 |
+
|
6 |
+
|
7 |
+
<a href='https://soon-yau.github.io/visconet/'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
8 |
+
<a href='https://arxiv.org/abs/2312.03154'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
|
9 |
+
[![YouTube](https://badges.aleen42.com/src/youtube.svg)](https://youtu.be/3_6Zq3hk86Q)
|
10 |
+
|
11 |
+
https://github.com/soon-yau/visconet/assets/19167278/ae58b7ab-fa76-4253-8a10-46656f234b20
|
12 |
+
|
13 |
+
### Requirements
|
14 |
+
A suitable [conda](https://conda.io/) environment named `control` can be created
|
15 |
+
and activated with:
|
16 |
+
```
|
17 |
+
conda env create -f environment.yaml
|
18 |
+
conda activate control
|
19 |
+
```
|
20 |
+
### Files
|
21 |
+
All model and data files are in [here](https://huggingface.co/soonyau/visconet/tree/main).
|
22 |
+
Including eval.zip containing all images used in human evaluation.
|
23 |
+
|
24 |
+
### Gradio App
|
25 |
+
[![App](./assets/app.png)](https://youtu.be/3_6Zq3hk86Q)
|
26 |
+
1. Download *visconet_v1.pth* and *exp-schp-201908301523-atr.pth* into directory ./models
|
27 |
+
2. (Optional) download fashion.zip and unzip it to home directory.
|
28 |
+
3. run ```python gradio_visconet.py```
|
29 |
+
|
30 |
+
### Citation
|
31 |
+
```
|
32 |
+
@article{cheong2023visconet,
|
33 |
+
author = {Cheong, Soon Yau and Mustafa, Armin and Gilbert, Andrew},
|
34 |
+
title = {ViscoNet: Bridging and Harmonizing Visual and Textual Conditioning for ControlNet},
|
35 |
+
journal = {Arxiv Preprint 2312.03154},
|
36 |
+
month = {December},
|
37 |
+
year = {2023}}
|
38 |
+
```
|
annotator/openpose/__init__.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Openpose
|
2 |
+
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
3 |
+
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
|
4 |
+
# 3rd Edited by ControlNet
|
5 |
+
|
6 |
+
import os
|
7 |
+
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
8 |
+
|
9 |
+
import torch
|
10 |
+
import numpy as np
|
11 |
+
from . import util
|
12 |
+
from .body import Body
|
13 |
+
from .hand import Hand
|
14 |
+
from annotator.util import annotator_ckpts_path
|
15 |
+
|
16 |
+
|
17 |
+
body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
|
18 |
+
hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
|
19 |
+
|
20 |
+
|
21 |
+
class OpenposeDetector:
|
22 |
+
def __init__(self):
|
23 |
+
body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth")
|
24 |
+
hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth")
|
25 |
+
|
26 |
+
if not os.path.exists(hand_modelpath):
|
27 |
+
from basicsr.utils.download_util import load_file_from_url
|
28 |
+
load_file_from_url(body_model_path, model_dir=annotator_ckpts_path)
|
29 |
+
load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path)
|
30 |
+
|
31 |
+
self.body_estimation = Body(body_modelpath)
|
32 |
+
self.hand_estimation = Hand(hand_modelpath)
|
33 |
+
|
34 |
+
def __call__(self, oriImg, hand=False):
|
35 |
+
oriImg = oriImg[:, :, ::-1].copy()
|
36 |
+
with torch.no_grad():
|
37 |
+
candidate, subset = self.body_estimation(oriImg)
|
38 |
+
canvas = np.zeros_like(oriImg)
|
39 |
+
canvas = util.draw_bodypose(canvas, candidate, subset)
|
40 |
+
if hand:
|
41 |
+
hands_list = util.handDetect(candidate, subset, oriImg)
|
42 |
+
all_hand_peaks = []
|
43 |
+
for x, y, w, is_left in hands_list:
|
44 |
+
peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
|
45 |
+
peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
|
46 |
+
peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
|
47 |
+
all_hand_peaks.append(peaks)
|
48 |
+
canvas = util.draw_handpose(canvas, all_hand_peaks)
|
49 |
+
return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
|
50 |
+
|
51 |
+
|
52 |
+
class VisconetDetector(OpenposeDetector):
|
53 |
+
def __init__(self):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
def __call__(self, oriImg):
|
57 |
+
oriImg = oriImg[:, :, ::-1].copy()
|
58 |
+
with torch.no_grad():
|
59 |
+
candidate, subset = self.body_estimation(oriImg)
|
60 |
+
canvas = util.draw_bodypose(np.zeros_like(oriImg), candidate, subset, stickwidth=1, circlewidth=2)
|
61 |
+
# detect hand
|
62 |
+
hands_list = util.handDetect(candidate, subset, oriImg)
|
63 |
+
|
64 |
+
all_hand_peaks = []
|
65 |
+
for x, y, w, is_left in hands_list:
|
66 |
+
|
67 |
+
peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
|
68 |
+
peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
|
69 |
+
peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
|
70 |
+
all_hand_peaks.append(peaks)
|
71 |
+
|
72 |
+
canvas = util.draw_handpose(canvas, all_hand_peaks,stickwidth=1)
|
73 |
+
return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
|
annotator/openpose/body.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import math
|
4 |
+
import time
|
5 |
+
from scipy.ndimage.filters import gaussian_filter
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import matplotlib
|
8 |
+
import torch
|
9 |
+
from torchvision import transforms
|
10 |
+
|
11 |
+
from . import util
|
12 |
+
from .model import bodypose_model
|
13 |
+
|
14 |
+
class Body(object):
|
15 |
+
def __init__(self, model_path):
|
16 |
+
self.model = bodypose_model()
|
17 |
+
if torch.cuda.is_available():
|
18 |
+
self.model = self.model.cuda()
|
19 |
+
print('cuda')
|
20 |
+
model_dict = util.transfer(self.model, torch.load(model_path))
|
21 |
+
self.model.load_state_dict(model_dict)
|
22 |
+
self.model.eval()
|
23 |
+
|
24 |
+
def __call__(self, oriImg):
|
25 |
+
# scale_search = [0.5, 1.0, 1.5, 2.0]
|
26 |
+
scale_search = [0.5]
|
27 |
+
boxsize = 368
|
28 |
+
stride = 8
|
29 |
+
padValue = 128
|
30 |
+
thre1 = 0.1
|
31 |
+
thre2 = 0.05
|
32 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
33 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
|
34 |
+
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
|
35 |
+
|
36 |
+
for m in range(len(multiplier)):
|
37 |
+
scale = multiplier[m]
|
38 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
39 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
40 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
41 |
+
im = np.ascontiguousarray(im)
|
42 |
+
|
43 |
+
data = torch.from_numpy(im).float()
|
44 |
+
if torch.cuda.is_available():
|
45 |
+
data = data.cuda()
|
46 |
+
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
|
47 |
+
with torch.no_grad():
|
48 |
+
Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
|
49 |
+
Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
|
50 |
+
Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
|
51 |
+
|
52 |
+
# extract outputs, resize, and remove padding
|
53 |
+
# heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
|
54 |
+
heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
|
55 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
56 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
57 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
58 |
+
|
59 |
+
# paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
|
60 |
+
paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
|
61 |
+
paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
62 |
+
paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
63 |
+
paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
64 |
+
|
65 |
+
heatmap_avg += heatmap_avg + heatmap / len(multiplier)
|
66 |
+
paf_avg += + paf / len(multiplier)
|
67 |
+
|
68 |
+
all_peaks = []
|
69 |
+
peak_counter = 0
|
70 |
+
|
71 |
+
for part in range(18):
|
72 |
+
map_ori = heatmap_avg[:, :, part]
|
73 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
74 |
+
|
75 |
+
map_left = np.zeros(one_heatmap.shape)
|
76 |
+
map_left[1:, :] = one_heatmap[:-1, :]
|
77 |
+
map_right = np.zeros(one_heatmap.shape)
|
78 |
+
map_right[:-1, :] = one_heatmap[1:, :]
|
79 |
+
map_up = np.zeros(one_heatmap.shape)
|
80 |
+
map_up[:, 1:] = one_heatmap[:, :-1]
|
81 |
+
map_down = np.zeros(one_heatmap.shape)
|
82 |
+
map_down[:, :-1] = one_heatmap[:, 1:]
|
83 |
+
|
84 |
+
peaks_binary = np.logical_and.reduce(
|
85 |
+
(one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
|
86 |
+
peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
|
87 |
+
peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
|
88 |
+
peak_id = range(peak_counter, peak_counter + len(peaks))
|
89 |
+
peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
|
90 |
+
|
91 |
+
all_peaks.append(peaks_with_score_and_id)
|
92 |
+
peak_counter += len(peaks)
|
93 |
+
|
94 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
95 |
+
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
|
96 |
+
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
|
97 |
+
[1, 16], [16, 18], [3, 17], [6, 18]]
|
98 |
+
# the middle joints heatmap correpondence
|
99 |
+
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
|
100 |
+
[23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
|
101 |
+
[55, 56], [37, 38], [45, 46]]
|
102 |
+
|
103 |
+
connection_all = []
|
104 |
+
special_k = []
|
105 |
+
mid_num = 10
|
106 |
+
|
107 |
+
for k in range(len(mapIdx)):
|
108 |
+
score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
|
109 |
+
candA = all_peaks[limbSeq[k][0] - 1]
|
110 |
+
candB = all_peaks[limbSeq[k][1] - 1]
|
111 |
+
nA = len(candA)
|
112 |
+
nB = len(candB)
|
113 |
+
indexA, indexB = limbSeq[k]
|
114 |
+
if (nA != 0 and nB != 0):
|
115 |
+
connection_candidate = []
|
116 |
+
for i in range(nA):
|
117 |
+
for j in range(nB):
|
118 |
+
vec = np.subtract(candB[j][:2], candA[i][:2])
|
119 |
+
norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
|
120 |
+
norm = max(0.001, norm)
|
121 |
+
vec = np.divide(vec, norm)
|
122 |
+
|
123 |
+
startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
|
124 |
+
np.linspace(candA[i][1], candB[j][1], num=mid_num)))
|
125 |
+
|
126 |
+
vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
|
127 |
+
for I in range(len(startend))])
|
128 |
+
vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
|
129 |
+
for I in range(len(startend))])
|
130 |
+
|
131 |
+
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
|
132 |
+
score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
|
133 |
+
0.5 * oriImg.shape[0] / norm - 1, 0)
|
134 |
+
criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
|
135 |
+
criterion2 = score_with_dist_prior > 0
|
136 |
+
if criterion1 and criterion2:
|
137 |
+
connection_candidate.append(
|
138 |
+
[i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
|
139 |
+
|
140 |
+
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
|
141 |
+
connection = np.zeros((0, 5))
|
142 |
+
for c in range(len(connection_candidate)):
|
143 |
+
i, j, s = connection_candidate[c][0:3]
|
144 |
+
if (i not in connection[:, 3] and j not in connection[:, 4]):
|
145 |
+
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
|
146 |
+
if (len(connection) >= min(nA, nB)):
|
147 |
+
break
|
148 |
+
|
149 |
+
connection_all.append(connection)
|
150 |
+
else:
|
151 |
+
special_k.append(k)
|
152 |
+
connection_all.append([])
|
153 |
+
|
154 |
+
# last number in each row is the total parts number of that person
|
155 |
+
# the second last number in each row is the score of the overall configuration
|
156 |
+
subset = -1 * np.ones((0, 20))
|
157 |
+
candidate = np.array([item for sublist in all_peaks for item in sublist])
|
158 |
+
|
159 |
+
for k in range(len(mapIdx)):
|
160 |
+
if k not in special_k:
|
161 |
+
partAs = connection_all[k][:, 0]
|
162 |
+
partBs = connection_all[k][:, 1]
|
163 |
+
indexA, indexB = np.array(limbSeq[k]) - 1
|
164 |
+
|
165 |
+
for i in range(len(connection_all[k])): # = 1:size(temp,1)
|
166 |
+
found = 0
|
167 |
+
subset_idx = [-1, -1]
|
168 |
+
for j in range(len(subset)): # 1:size(subset,1):
|
169 |
+
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
|
170 |
+
subset_idx[found] = j
|
171 |
+
found += 1
|
172 |
+
|
173 |
+
if found == 1:
|
174 |
+
j = subset_idx[0]
|
175 |
+
if subset[j][indexB] != partBs[i]:
|
176 |
+
subset[j][indexB] = partBs[i]
|
177 |
+
subset[j][-1] += 1
|
178 |
+
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
179 |
+
elif found == 2: # if found 2 and disjoint, merge them
|
180 |
+
j1, j2 = subset_idx
|
181 |
+
membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
|
182 |
+
if len(np.nonzero(membership == 2)[0]) == 0: # merge
|
183 |
+
subset[j1][:-2] += (subset[j2][:-2] + 1)
|
184 |
+
subset[j1][-2:] += subset[j2][-2:]
|
185 |
+
subset[j1][-2] += connection_all[k][i][2]
|
186 |
+
subset = np.delete(subset, j2, 0)
|
187 |
+
else: # as like found == 1
|
188 |
+
subset[j1][indexB] = partBs[i]
|
189 |
+
subset[j1][-1] += 1
|
190 |
+
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
191 |
+
|
192 |
+
# if find no partA in the subset, create a new subset
|
193 |
+
elif not found and k < 17:
|
194 |
+
row = -1 * np.ones(20)
|
195 |
+
row[indexA] = partAs[i]
|
196 |
+
row[indexB] = partBs[i]
|
197 |
+
row[-1] = 2
|
198 |
+
row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
|
199 |
+
subset = np.vstack([subset, row])
|
200 |
+
# delete some rows of subset which has few parts occur
|
201 |
+
deleteIdx = []
|
202 |
+
for i in range(len(subset)):
|
203 |
+
if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
|
204 |
+
deleteIdx.append(i)
|
205 |
+
subset = np.delete(subset, deleteIdx, axis=0)
|
206 |
+
|
207 |
+
# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
|
208 |
+
# candidate: x, y, score, id
|
209 |
+
return candidate, subset
|
210 |
+
|
211 |
+
if __name__ == "__main__":
|
212 |
+
body_estimation = Body('../model/body_pose_model.pth')
|
213 |
+
|
214 |
+
test_image = '../images/ski.jpg'
|
215 |
+
oriImg = cv2.imread(test_image) # B,G,R order
|
216 |
+
candidate, subset = body_estimation(oriImg)
|
217 |
+
canvas = util.draw_bodypose(oriImg, candidate, subset)
|
218 |
+
plt.imshow(canvas[:, :, [2, 1, 0]])
|
219 |
+
plt.show()
|
annotator/openpose/hand.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import json
|
3 |
+
import numpy as np
|
4 |
+
import math
|
5 |
+
import time
|
6 |
+
from scipy.ndimage.filters import gaussian_filter
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import matplotlib
|
9 |
+
import torch
|
10 |
+
from skimage.measure import label
|
11 |
+
|
12 |
+
from .model import handpose_model
|
13 |
+
from . import util
|
14 |
+
|
15 |
+
class Hand(object):
|
16 |
+
def __init__(self, model_path):
|
17 |
+
self.model = handpose_model()
|
18 |
+
if torch.cuda.is_available():
|
19 |
+
self.model = self.model.cuda()
|
20 |
+
print('cuda')
|
21 |
+
model_dict = util.transfer(self.model, torch.load(model_path))
|
22 |
+
self.model.load_state_dict(model_dict)
|
23 |
+
self.model.eval()
|
24 |
+
|
25 |
+
def __call__(self, oriImg):
|
26 |
+
scale_search = [0.5, 1.0, 1.5, 2.0]
|
27 |
+
# scale_search = [0.5]
|
28 |
+
boxsize = 368
|
29 |
+
stride = 8
|
30 |
+
padValue = 128
|
31 |
+
thre = 0.05
|
32 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
33 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
|
34 |
+
# paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
|
35 |
+
|
36 |
+
for m in range(len(multiplier)):
|
37 |
+
scale = multiplier[m]
|
38 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
39 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
40 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
41 |
+
im = np.ascontiguousarray(im)
|
42 |
+
|
43 |
+
data = torch.from_numpy(im).float()
|
44 |
+
if torch.cuda.is_available():
|
45 |
+
data = data.cuda()
|
46 |
+
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
|
47 |
+
with torch.no_grad():
|
48 |
+
output = self.model(data).cpu().numpy()
|
49 |
+
# output = self.model(data).numpy()q
|
50 |
+
|
51 |
+
# extract outputs, resize, and remove padding
|
52 |
+
heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
|
53 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
54 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
55 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
56 |
+
|
57 |
+
heatmap_avg += heatmap / len(multiplier)
|
58 |
+
|
59 |
+
all_peaks = []
|
60 |
+
for part in range(21):
|
61 |
+
map_ori = heatmap_avg[:, :, part]
|
62 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
63 |
+
binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
|
64 |
+
# 全部小于阈值
|
65 |
+
if np.sum(binary) == 0:
|
66 |
+
all_peaks.append([0, 0])
|
67 |
+
continue
|
68 |
+
label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
|
69 |
+
max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
|
70 |
+
label_img[label_img != max_index] = 0
|
71 |
+
map_ori[label_img == 0] = 0
|
72 |
+
|
73 |
+
y, x = util.npmax(map_ori)
|
74 |
+
all_peaks.append([x, y])
|
75 |
+
return np.array(all_peaks)
|
76 |
+
|
77 |
+
if __name__ == "__main__":
|
78 |
+
hand_estimation = Hand('../model/hand_pose_model.pth')
|
79 |
+
|
80 |
+
# test_image = '../images/hand.jpg'
|
81 |
+
test_image = '../images/hand.jpg'
|
82 |
+
oriImg = cv2.imread(test_image) # B,G,R order
|
83 |
+
peaks = hand_estimation(oriImg)
|
84 |
+
canvas = util.draw_handpose(oriImg, peaks, True)
|
85 |
+
cv2.imshow('', canvas)
|
86 |
+
cv2.waitKey(0)
|
annotator/openpose/model.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from collections import OrderedDict
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
def make_layers(block, no_relu_layers):
|
8 |
+
layers = []
|
9 |
+
for layer_name, v in block.items():
|
10 |
+
if 'pool' in layer_name:
|
11 |
+
layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
|
12 |
+
padding=v[2])
|
13 |
+
layers.append((layer_name, layer))
|
14 |
+
else:
|
15 |
+
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
|
16 |
+
kernel_size=v[2], stride=v[3],
|
17 |
+
padding=v[4])
|
18 |
+
layers.append((layer_name, conv2d))
|
19 |
+
if layer_name not in no_relu_layers:
|
20 |
+
layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
|
21 |
+
|
22 |
+
return nn.Sequential(OrderedDict(layers))
|
23 |
+
|
24 |
+
class bodypose_model(nn.Module):
|
25 |
+
def __init__(self):
|
26 |
+
super(bodypose_model, self).__init__()
|
27 |
+
|
28 |
+
# these layers have no relu layer
|
29 |
+
no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
|
30 |
+
'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
|
31 |
+
'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
|
32 |
+
'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
|
33 |
+
blocks = {}
|
34 |
+
block0 = OrderedDict([
|
35 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
36 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
37 |
+
('pool1_stage1', [2, 2, 0]),
|
38 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
39 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
40 |
+
('pool2_stage1', [2, 2, 0]),
|
41 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
42 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
43 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
44 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
45 |
+
('pool3_stage1', [2, 2, 0]),
|
46 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
47 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
48 |
+
('conv4_3_CPM', [512, 256, 3, 1, 1]),
|
49 |
+
('conv4_4_CPM', [256, 128, 3, 1, 1])
|
50 |
+
])
|
51 |
+
|
52 |
+
|
53 |
+
# Stage 1
|
54 |
+
block1_1 = OrderedDict([
|
55 |
+
('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
|
56 |
+
('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
|
57 |
+
('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
|
58 |
+
('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
|
59 |
+
('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
|
60 |
+
])
|
61 |
+
|
62 |
+
block1_2 = OrderedDict([
|
63 |
+
('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
|
64 |
+
('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
|
65 |
+
('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
|
66 |
+
('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
|
67 |
+
('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
|
68 |
+
])
|
69 |
+
blocks['block1_1'] = block1_1
|
70 |
+
blocks['block1_2'] = block1_2
|
71 |
+
|
72 |
+
self.model0 = make_layers(block0, no_relu_layers)
|
73 |
+
|
74 |
+
# Stages 2 - 6
|
75 |
+
for i in range(2, 7):
|
76 |
+
blocks['block%d_1' % i] = OrderedDict([
|
77 |
+
('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
|
78 |
+
('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
79 |
+
('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
80 |
+
('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
81 |
+
('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
82 |
+
('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
|
83 |
+
('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
|
84 |
+
])
|
85 |
+
|
86 |
+
blocks['block%d_2' % i] = OrderedDict([
|
87 |
+
('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
|
88 |
+
('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
89 |
+
('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
90 |
+
('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
91 |
+
('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
92 |
+
('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
|
93 |
+
('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
|
94 |
+
])
|
95 |
+
|
96 |
+
for k in blocks.keys():
|
97 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
98 |
+
|
99 |
+
self.model1_1 = blocks['block1_1']
|
100 |
+
self.model2_1 = blocks['block2_1']
|
101 |
+
self.model3_1 = blocks['block3_1']
|
102 |
+
self.model4_1 = blocks['block4_1']
|
103 |
+
self.model5_1 = blocks['block5_1']
|
104 |
+
self.model6_1 = blocks['block6_1']
|
105 |
+
|
106 |
+
self.model1_2 = blocks['block1_2']
|
107 |
+
self.model2_2 = blocks['block2_2']
|
108 |
+
self.model3_2 = blocks['block3_2']
|
109 |
+
self.model4_2 = blocks['block4_2']
|
110 |
+
self.model5_2 = blocks['block5_2']
|
111 |
+
self.model6_2 = blocks['block6_2']
|
112 |
+
|
113 |
+
|
114 |
+
def forward(self, x):
|
115 |
+
|
116 |
+
out1 = self.model0(x)
|
117 |
+
|
118 |
+
out1_1 = self.model1_1(out1)
|
119 |
+
out1_2 = self.model1_2(out1)
|
120 |
+
out2 = torch.cat([out1_1, out1_2, out1], 1)
|
121 |
+
|
122 |
+
out2_1 = self.model2_1(out2)
|
123 |
+
out2_2 = self.model2_2(out2)
|
124 |
+
out3 = torch.cat([out2_1, out2_2, out1], 1)
|
125 |
+
|
126 |
+
out3_1 = self.model3_1(out3)
|
127 |
+
out3_2 = self.model3_2(out3)
|
128 |
+
out4 = torch.cat([out3_1, out3_2, out1], 1)
|
129 |
+
|
130 |
+
out4_1 = self.model4_1(out4)
|
131 |
+
out4_2 = self.model4_2(out4)
|
132 |
+
out5 = torch.cat([out4_1, out4_2, out1], 1)
|
133 |
+
|
134 |
+
out5_1 = self.model5_1(out5)
|
135 |
+
out5_2 = self.model5_2(out5)
|
136 |
+
out6 = torch.cat([out5_1, out5_2, out1], 1)
|
137 |
+
|
138 |
+
out6_1 = self.model6_1(out6)
|
139 |
+
out6_2 = self.model6_2(out6)
|
140 |
+
|
141 |
+
return out6_1, out6_2
|
142 |
+
|
143 |
+
class handpose_model(nn.Module):
|
144 |
+
def __init__(self):
|
145 |
+
super(handpose_model, self).__init__()
|
146 |
+
|
147 |
+
# these layers have no relu layer
|
148 |
+
no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
|
149 |
+
'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
|
150 |
+
# stage 1
|
151 |
+
block1_0 = OrderedDict([
|
152 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
153 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
154 |
+
('pool1_stage1', [2, 2, 0]),
|
155 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
156 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
157 |
+
('pool2_stage1', [2, 2, 0]),
|
158 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
159 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
160 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
161 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
162 |
+
('pool3_stage1', [2, 2, 0]),
|
163 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
164 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
165 |
+
('conv4_3', [512, 512, 3, 1, 1]),
|
166 |
+
('conv4_4', [512, 512, 3, 1, 1]),
|
167 |
+
('conv5_1', [512, 512, 3, 1, 1]),
|
168 |
+
('conv5_2', [512, 512, 3, 1, 1]),
|
169 |
+
('conv5_3_CPM', [512, 128, 3, 1, 1])
|
170 |
+
])
|
171 |
+
|
172 |
+
block1_1 = OrderedDict([
|
173 |
+
('conv6_1_CPM', [128, 512, 1, 1, 0]),
|
174 |
+
('conv6_2_CPM', [512, 22, 1, 1, 0])
|
175 |
+
])
|
176 |
+
|
177 |
+
blocks = {}
|
178 |
+
blocks['block1_0'] = block1_0
|
179 |
+
blocks['block1_1'] = block1_1
|
180 |
+
|
181 |
+
# stage 2-6
|
182 |
+
for i in range(2, 7):
|
183 |
+
blocks['block%d' % i] = OrderedDict([
|
184 |
+
('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
|
185 |
+
('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
|
186 |
+
('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
|
187 |
+
('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
|
188 |
+
('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
|
189 |
+
('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
|
190 |
+
('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
|
191 |
+
])
|
192 |
+
|
193 |
+
for k in blocks.keys():
|
194 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
195 |
+
|
196 |
+
self.model1_0 = blocks['block1_0']
|
197 |
+
self.model1_1 = blocks['block1_1']
|
198 |
+
self.model2 = blocks['block2']
|
199 |
+
self.model3 = blocks['block3']
|
200 |
+
self.model4 = blocks['block4']
|
201 |
+
self.model5 = blocks['block5']
|
202 |
+
self.model6 = blocks['block6']
|
203 |
+
|
204 |
+
def forward(self, x):
|
205 |
+
out1_0 = self.model1_0(x)
|
206 |
+
out1_1 = self.model1_1(out1_0)
|
207 |
+
concat_stage2 = torch.cat([out1_1, out1_0], 1)
|
208 |
+
out_stage2 = self.model2(concat_stage2)
|
209 |
+
concat_stage3 = torch.cat([out_stage2, out1_0], 1)
|
210 |
+
out_stage3 = self.model3(concat_stage3)
|
211 |
+
concat_stage4 = torch.cat([out_stage3, out1_0], 1)
|
212 |
+
out_stage4 = self.model4(concat_stage4)
|
213 |
+
concat_stage5 = torch.cat([out_stage4, out1_0], 1)
|
214 |
+
out_stage5 = self.model5(concat_stage5)
|
215 |
+
concat_stage6 = torch.cat([out_stage5, out1_0], 1)
|
216 |
+
out_stage6 = self.model6(concat_stage6)
|
217 |
+
return out_stage6
|
218 |
+
|
219 |
+
|
annotator/openpose/util.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib
|
4 |
+
import cv2
|
5 |
+
|
6 |
+
|
7 |
+
def padRightDownCorner(img, stride, padValue):
|
8 |
+
h = img.shape[0]
|
9 |
+
w = img.shape[1]
|
10 |
+
|
11 |
+
pad = 4 * [None]
|
12 |
+
pad[0] = 0 # up
|
13 |
+
pad[1] = 0 # left
|
14 |
+
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
|
15 |
+
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
|
16 |
+
|
17 |
+
img_padded = img
|
18 |
+
pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
|
19 |
+
img_padded = np.concatenate((pad_up, img_padded), axis=0)
|
20 |
+
pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
|
21 |
+
img_padded = np.concatenate((pad_left, img_padded), axis=1)
|
22 |
+
pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
|
23 |
+
img_padded = np.concatenate((img_padded, pad_down), axis=0)
|
24 |
+
pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
|
25 |
+
img_padded = np.concatenate((img_padded, pad_right), axis=1)
|
26 |
+
|
27 |
+
return img_padded, pad
|
28 |
+
|
29 |
+
# transfer caffe model to pytorch which will match the layer name
|
30 |
+
def transfer(model, model_weights):
|
31 |
+
transfered_model_weights = {}
|
32 |
+
for weights_name in model.state_dict().keys():
|
33 |
+
transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
|
34 |
+
return transfered_model_weights
|
35 |
+
|
36 |
+
# draw the body keypoint and lims
|
37 |
+
def draw_bodypose(canvas, candidate, subset, stickwidth=4, circlewidth=4):
|
38 |
+
|
39 |
+
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
|
40 |
+
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
|
41 |
+
[1, 16], [16, 18], [3, 17], [6, 18]]
|
42 |
+
|
43 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
44 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
45 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
46 |
+
for i in range(18):
|
47 |
+
for n in range(len(subset)):
|
48 |
+
index = int(subset[n][i])
|
49 |
+
if index == -1:
|
50 |
+
continue
|
51 |
+
x, y = candidate[index][0:2]
|
52 |
+
cv2.circle(canvas, (int(x), int(y)), circlewidth, colors[i], thickness=-1)
|
53 |
+
for i in range(17):
|
54 |
+
for n in range(len(subset)):
|
55 |
+
index = subset[n][np.array(limbSeq[i]) - 1]
|
56 |
+
if -1 in index:
|
57 |
+
continue
|
58 |
+
cur_canvas = canvas.copy()
|
59 |
+
Y = candidate[index.astype(int), 0]
|
60 |
+
X = candidate[index.astype(int), 1]
|
61 |
+
mX = np.mean(X)
|
62 |
+
mY = np.mean(Y)
|
63 |
+
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
64 |
+
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
65 |
+
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
66 |
+
cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
|
67 |
+
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
|
68 |
+
# plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
|
69 |
+
# plt.imshow(canvas[:, :, [2, 1, 0]])
|
70 |
+
return canvas
|
71 |
+
|
72 |
+
# image drawed by opencv is not good.
|
73 |
+
def draw_handpose(canvas, all_hand_peaks, show_number=False, stickwidth=2):
|
74 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
75 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
76 |
+
|
77 |
+
for peaks in all_hand_peaks:
|
78 |
+
for ie, e in enumerate(edges):
|
79 |
+
if np.sum(np.all(peaks[e], axis=1)==0)==0:
|
80 |
+
x1, y1 = peaks[e[0]]
|
81 |
+
x2, y2 = peaks[e[1]]
|
82 |
+
cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=stickwidth)
|
83 |
+
|
84 |
+
for i, keyponit in enumerate(peaks):
|
85 |
+
x, y = keyponit
|
86 |
+
cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
|
87 |
+
if show_number:
|
88 |
+
cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
|
89 |
+
return canvas
|
90 |
+
|
91 |
+
# detect hand according to body pose keypoints
|
92 |
+
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
|
93 |
+
def handDetect(candidate, subset, oriImg):
|
94 |
+
# right hand: wrist 4, elbow 3, shoulder 2
|
95 |
+
# left hand: wrist 7, elbow 6, shoulder 5
|
96 |
+
ratioWristElbow = 0.33
|
97 |
+
detect_result = []
|
98 |
+
image_height, image_width = oriImg.shape[0:2]
|
99 |
+
for person in subset.astype(int):
|
100 |
+
# if any of three not detected
|
101 |
+
has_left = np.sum(person[[5, 6, 7]] == -1) == 0
|
102 |
+
has_right = np.sum(person[[2, 3, 4]] == -1) == 0
|
103 |
+
if not (has_left or has_right):
|
104 |
+
continue
|
105 |
+
hands = []
|
106 |
+
#left hand
|
107 |
+
if has_left:
|
108 |
+
left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
|
109 |
+
x1, y1 = candidate[left_shoulder_index][:2]
|
110 |
+
x2, y2 = candidate[left_elbow_index][:2]
|
111 |
+
x3, y3 = candidate[left_wrist_index][:2]
|
112 |
+
hands.append([x1, y1, x2, y2, x3, y3, True])
|
113 |
+
# right hand
|
114 |
+
if has_right:
|
115 |
+
right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
|
116 |
+
x1, y1 = candidate[right_shoulder_index][:2]
|
117 |
+
x2, y2 = candidate[right_elbow_index][:2]
|
118 |
+
x3, y3 = candidate[right_wrist_index][:2]
|
119 |
+
hands.append([x1, y1, x2, y2, x3, y3, False])
|
120 |
+
|
121 |
+
for x1, y1, x2, y2, x3, y3, is_left in hands:
|
122 |
+
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
|
123 |
+
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
|
124 |
+
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
|
125 |
+
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
|
126 |
+
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
|
127 |
+
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
|
128 |
+
x = x3 + ratioWristElbow * (x3 - x2)
|
129 |
+
y = y3 + ratioWristElbow * (y3 - y2)
|
130 |
+
distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
|
131 |
+
distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
132 |
+
width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
|
133 |
+
# x-y refers to the center --> offset to topLeft point
|
134 |
+
# handRectangle.x -= handRectangle.width / 2.f;
|
135 |
+
# handRectangle.y -= handRectangle.height / 2.f;
|
136 |
+
x -= width / 2
|
137 |
+
y -= width / 2 # width = height
|
138 |
+
# overflow the image
|
139 |
+
if x < 0: x = 0
|
140 |
+
if y < 0: y = 0
|
141 |
+
width1 = width
|
142 |
+
width2 = width
|
143 |
+
if x + width > image_width: width1 = image_width - x
|
144 |
+
if y + width > image_height: width2 = image_height - y
|
145 |
+
width = min(width1, width2)
|
146 |
+
# the max hand box value is 20 pixels
|
147 |
+
if width >= 20:
|
148 |
+
detect_result.append([int(x), int(y), int(width), is_left])
|
149 |
+
|
150 |
+
'''
|
151 |
+
return value: [[x, y, w, True if left hand else False]].
|
152 |
+
width=height since the network require squared input.
|
153 |
+
x, y is the coordinate of top left
|
154 |
+
'''
|
155 |
+
return detect_result
|
156 |
+
|
157 |
+
# get max index of 2d array
|
158 |
+
def npmax(array):
|
159 |
+
arrayindex = array.argmax(1)
|
160 |
+
arrayvalue = array.max(1)
|
161 |
+
i = arrayvalue.argmax()
|
162 |
+
j = arrayindex[i]
|
163 |
+
return i, j
|
annotator/segm/__init__.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Self-Correction-Human-Parsing
|
2 |
+
# Original https://github.com/GoGoDuck912/Self-Correction-Human-Parsing
|
3 |
+
|
4 |
+
import os
|
5 |
+
import torch
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
import cv2
|
9 |
+
|
10 |
+
import torchvision.transforms as T
|
11 |
+
|
12 |
+
from .transforms import transform_logits, get_affine_transform
|
13 |
+
from . import networks
|
14 |
+
from annotator.util import annotator_ckpts_path
|
15 |
+
from huggingface_hub import snapshot_download
|
16 |
+
|
17 |
+
dataset_settings = {
|
18 |
+
'lip': {
|
19 |
+
'input_size': [473, 473],
|
20 |
+
'num_classes': 20,
|
21 |
+
'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat',
|
22 |
+
'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm',
|
23 |
+
'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
|
24 |
+
},
|
25 |
+
'atr': {
|
26 |
+
'input_size': [512, 512],
|
27 |
+
'num_classes': 18,
|
28 |
+
'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt',
|
29 |
+
'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
|
30 |
+
},
|
31 |
+
'pascal': {
|
32 |
+
'input_size': [512, 512],
|
33 |
+
'num_classes': 7,
|
34 |
+
'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'],
|
35 |
+
}
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
def get_palette(num_cls):
|
40 |
+
""" Returns the color map for visualizing the segmentation mask.
|
41 |
+
Args:
|
42 |
+
num_cls: Number of classes
|
43 |
+
Returns:
|
44 |
+
The color map
|
45 |
+
"""
|
46 |
+
n = num_cls
|
47 |
+
palette = [0] * (n * 3)
|
48 |
+
for j in range(0, n):
|
49 |
+
lab = j
|
50 |
+
palette[j * 3 + 0] = 0
|
51 |
+
palette[j * 3 + 1] = 0
|
52 |
+
palette[j * 3 + 2] = 0
|
53 |
+
i = 0
|
54 |
+
while lab:
|
55 |
+
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
|
56 |
+
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
|
57 |
+
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
|
58 |
+
i += 1
|
59 |
+
lab >>= 3
|
60 |
+
return palette
|
61 |
+
|
62 |
+
class Segmentator(torch.nn.Module):
|
63 |
+
def __init__(self, dataset='lip'):
|
64 |
+
super().__init__()
|
65 |
+
|
66 |
+
num_classes = dataset_settings[dataset]['num_classes']
|
67 |
+
input_size = dataset_settings[dataset]['input_size']
|
68 |
+
label = dataset_settings[dataset]['label']
|
69 |
+
|
70 |
+
if dataset == 'atr':
|
71 |
+
model_path='exp-schp-201908301523-atr.pth'
|
72 |
+
elif dataset == 'lip':
|
73 |
+
model_path='exp-schp-201908261155-lip.pth'
|
74 |
+
|
75 |
+
model_path = os.path.join(annotator_ckpts_path, model_path)
|
76 |
+
|
77 |
+
snapshot_download(repo_id="soonyau/visconet", allow_patterns="exp-schp-201908301523-atr.pth", local_dir=annotator_ckpts_path)
|
78 |
+
|
79 |
+
self.model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None)
|
80 |
+
state_dict = torch.load(model_path)['state_dict']
|
81 |
+
from collections import OrderedDict
|
82 |
+
new_state_dict = OrderedDict()
|
83 |
+
for k, v in state_dict.items():
|
84 |
+
name = k[7:] # remove `module.`
|
85 |
+
new_state_dict[name] = v
|
86 |
+
self.model.load_state_dict(new_state_dict)
|
87 |
+
self.model.eval()
|
88 |
+
|
89 |
+
self.palette = get_palette(num_classes)
|
90 |
+
|
91 |
+
self.transform = T.Compose([
|
92 |
+
T.ToTensor(),
|
93 |
+
T.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
|
94 |
+
])
|
95 |
+
self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
|
96 |
+
self.input_size = np.asarray(input_size)
|
97 |
+
|
98 |
+
def _box2cs(self, box):
|
99 |
+
x, y, w, h = box[:4]
|
100 |
+
return self._xywh2cs(x, y, w, h)
|
101 |
+
|
102 |
+
def _xywh2cs(self, x, y, w, h):
|
103 |
+
center = np.zeros((2), dtype=np.float32)
|
104 |
+
center[0] = x + w * 0.5
|
105 |
+
center[1] = y + h * 0.5
|
106 |
+
if w > self.aspect_ratio * h:
|
107 |
+
h = w * 1.0 / self.aspect_ratio
|
108 |
+
elif w < self.aspect_ratio * h:
|
109 |
+
w = h * self.aspect_ratio
|
110 |
+
scale = np.array([w, h], dtype=np.float32)
|
111 |
+
return center, scale
|
112 |
+
|
113 |
+
def preprocess(self, image:np.array):
|
114 |
+
# convert numpy to cv2
|
115 |
+
image = image[:,:,::-1]
|
116 |
+
h, w, _ = image.shape
|
117 |
+
|
118 |
+
# Get person center and scale
|
119 |
+
person_center, s = self._box2cs([0, 0, w - 1, h - 1])
|
120 |
+
r = 0
|
121 |
+
trans = get_affine_transform(person_center, s, r, self.input_size)
|
122 |
+
input = cv2.warpAffine(
|
123 |
+
image,
|
124 |
+
trans,
|
125 |
+
(int(self.input_size[1]), int(self.input_size[0])),
|
126 |
+
flags=cv2.INTER_LINEAR,
|
127 |
+
borderMode=cv2.BORDER_CONSTANT,
|
128 |
+
borderValue=(0, 0, 0))
|
129 |
+
|
130 |
+
input = self.transform(input)
|
131 |
+
meta = {
|
132 |
+
'center': person_center,
|
133 |
+
'height': h,
|
134 |
+
'width': w,
|
135 |
+
'scale': s,
|
136 |
+
'rotation': r
|
137 |
+
}
|
138 |
+
|
139 |
+
return input, meta
|
140 |
+
|
141 |
+
@torch.no_grad()
|
142 |
+
def __call__(self, input_image):
|
143 |
+
image, meta = self.preprocess(input_image)
|
144 |
+
c = meta['center']
|
145 |
+
s = meta['scale']
|
146 |
+
w = meta['width']
|
147 |
+
h = meta['height']
|
148 |
+
input_size = list(self.input_size)
|
149 |
+
device = next(self.parameters()).device
|
150 |
+
output = self.model(image.unsqueeze(0).to(device))
|
151 |
+
upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
|
152 |
+
upsample_output = upsample(output[0][-1][0].unsqueeze(0))
|
153 |
+
upsample_output = upsample_output.squeeze()
|
154 |
+
upsample_output = upsample_output.permute(1, 2, 0) # CHW -> HWC
|
155 |
+
logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=input_size)
|
156 |
+
parsing_result = np.argmax(logits_result, axis=2)
|
157 |
+
output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
|
158 |
+
#return output_img
|
159 |
+
output_img.putpalette(self.palette)
|
160 |
+
return output_img
|
161 |
+
#return np.array(output_img)
|
162 |
+
|
annotator/segm/modules/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .bn import ABN, InPlaceABN, InPlaceABNSync
|
2 |
+
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
3 |
+
from .misc import GlobalAvgPool2d, SingleGPU
|
4 |
+
from .residual import IdentityResidualBlock
|
5 |
+
from .dense import DenseModule
|
annotator/segm/modules/bn.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as functional
|
4 |
+
|
5 |
+
try:
|
6 |
+
from queue import Queue
|
7 |
+
except ImportError:
|
8 |
+
from Queue import Queue
|
9 |
+
|
10 |
+
from .functions import *
|
11 |
+
|
12 |
+
|
13 |
+
class ABN(nn.Module):
|
14 |
+
"""Activated Batch Normalization
|
15 |
+
|
16 |
+
This gathers a `BatchNorm2d` and an activation function in a single module
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
20 |
+
"""Creates an Activated Batch Normalization module
|
21 |
+
|
22 |
+
Parameters
|
23 |
+
----------
|
24 |
+
num_features : int
|
25 |
+
Number of feature channels in the input and output.
|
26 |
+
eps : float
|
27 |
+
Small constant to prevent numerical issues.
|
28 |
+
momentum : float
|
29 |
+
Momentum factor applied to compute running statistics as.
|
30 |
+
affine : bool
|
31 |
+
If `True` apply learned scale and shift transformation after normalization.
|
32 |
+
activation : str
|
33 |
+
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
34 |
+
slope : float
|
35 |
+
Negative slope for the `leaky_relu` activation.
|
36 |
+
"""
|
37 |
+
super(ABN, self).__init__()
|
38 |
+
self.num_features = num_features
|
39 |
+
self.affine = affine
|
40 |
+
self.eps = eps
|
41 |
+
self.momentum = momentum
|
42 |
+
self.activation = activation
|
43 |
+
self.slope = slope
|
44 |
+
if self.affine:
|
45 |
+
self.weight = nn.Parameter(torch.ones(num_features))
|
46 |
+
self.bias = nn.Parameter(torch.zeros(num_features))
|
47 |
+
else:
|
48 |
+
self.register_parameter('weight', None)
|
49 |
+
self.register_parameter('bias', None)
|
50 |
+
self.register_buffer('running_mean', torch.zeros(num_features))
|
51 |
+
self.register_buffer('running_var', torch.ones(num_features))
|
52 |
+
self.reset_parameters()
|
53 |
+
|
54 |
+
def reset_parameters(self):
|
55 |
+
nn.init.constant_(self.running_mean, 0)
|
56 |
+
nn.init.constant_(self.running_var, 1)
|
57 |
+
if self.affine:
|
58 |
+
nn.init.constant_(self.weight, 1)
|
59 |
+
nn.init.constant_(self.bias, 0)
|
60 |
+
|
61 |
+
def forward(self, x):
|
62 |
+
x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
|
63 |
+
self.training, self.momentum, self.eps)
|
64 |
+
|
65 |
+
if self.activation == ACT_RELU:
|
66 |
+
return functional.relu(x, inplace=True)
|
67 |
+
elif self.activation == ACT_LEAKY_RELU:
|
68 |
+
return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
|
69 |
+
elif self.activation == ACT_ELU:
|
70 |
+
return functional.elu(x, inplace=True)
|
71 |
+
else:
|
72 |
+
return x
|
73 |
+
|
74 |
+
def __repr__(self):
|
75 |
+
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
76 |
+
' affine={affine}, activation={activation}'
|
77 |
+
if self.activation == "leaky_relu":
|
78 |
+
rep += ', slope={slope})'
|
79 |
+
else:
|
80 |
+
rep += ')'
|
81 |
+
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
82 |
+
|
83 |
+
|
84 |
+
class InPlaceABN(ABN):
|
85 |
+
"""InPlace Activated Batch Normalization"""
|
86 |
+
|
87 |
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
88 |
+
"""Creates an InPlace Activated Batch Normalization module
|
89 |
+
|
90 |
+
Parameters
|
91 |
+
----------
|
92 |
+
num_features : int
|
93 |
+
Number of feature channels in the input and output.
|
94 |
+
eps : float
|
95 |
+
Small constant to prevent numerical issues.
|
96 |
+
momentum : float
|
97 |
+
Momentum factor applied to compute running statistics as.
|
98 |
+
affine : bool
|
99 |
+
If `True` apply learned scale and shift transformation after normalization.
|
100 |
+
activation : str
|
101 |
+
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
102 |
+
slope : float
|
103 |
+
Negative slope for the `leaky_relu` activation.
|
104 |
+
"""
|
105 |
+
super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
|
106 |
+
|
107 |
+
def forward(self, x):
|
108 |
+
x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
|
109 |
+
self.training, self.momentum, self.eps, self.activation, self.slope)
|
110 |
+
return x
|
111 |
+
|
112 |
+
|
113 |
+
class InPlaceABNSync(ABN):
|
114 |
+
"""InPlace Activated Batch Normalization with cross-GPU synchronization
|
115 |
+
This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
|
116 |
+
"""
|
117 |
+
|
118 |
+
def forward(self, x):
|
119 |
+
x, _, _ = inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
|
120 |
+
self.training, self.momentum, self.eps, self.activation, self.slope)
|
121 |
+
return x
|
122 |
+
|
123 |
+
def __repr__(self):
|
124 |
+
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
125 |
+
' affine={affine}, activation={activation}'
|
126 |
+
if self.activation == "leaky_relu":
|
127 |
+
rep += ', slope={slope})'
|
128 |
+
else:
|
129 |
+
rep += ')'
|
130 |
+
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
131 |
+
|
132 |
+
|
annotator/segm/modules/deeplab.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as functional
|
4 |
+
|
5 |
+
from models._util import try_index
|
6 |
+
from .bn import ABN
|
7 |
+
|
8 |
+
|
9 |
+
class DeeplabV3(nn.Module):
|
10 |
+
def __init__(self,
|
11 |
+
in_channels,
|
12 |
+
out_channels,
|
13 |
+
hidden_channels=256,
|
14 |
+
dilations=(12, 24, 36),
|
15 |
+
norm_act=ABN,
|
16 |
+
pooling_size=None):
|
17 |
+
super(DeeplabV3, self).__init__()
|
18 |
+
self.pooling_size = pooling_size
|
19 |
+
|
20 |
+
self.map_convs = nn.ModuleList([
|
21 |
+
nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
|
22 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
|
23 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
|
24 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
|
25 |
+
])
|
26 |
+
self.map_bn = norm_act(hidden_channels * 4)
|
27 |
+
|
28 |
+
self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
|
29 |
+
self.global_pooling_bn = norm_act(hidden_channels)
|
30 |
+
|
31 |
+
self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
|
32 |
+
self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
|
33 |
+
self.red_bn = norm_act(out_channels)
|
34 |
+
|
35 |
+
self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
|
36 |
+
|
37 |
+
def reset_parameters(self, activation, slope):
|
38 |
+
gain = nn.init.calculate_gain(activation, slope)
|
39 |
+
for m in self.modules():
|
40 |
+
if isinstance(m, nn.Conv2d):
|
41 |
+
nn.init.xavier_normal_(m.weight.data, gain)
|
42 |
+
if hasattr(m, "bias") and m.bias is not None:
|
43 |
+
nn.init.constant_(m.bias, 0)
|
44 |
+
elif isinstance(m, ABN):
|
45 |
+
if hasattr(m, "weight") and m.weight is not None:
|
46 |
+
nn.init.constant_(m.weight, 1)
|
47 |
+
if hasattr(m, "bias") and m.bias is not None:
|
48 |
+
nn.init.constant_(m.bias, 0)
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
# Map convolutions
|
52 |
+
out = torch.cat([m(x) for m in self.map_convs], dim=1)
|
53 |
+
out = self.map_bn(out)
|
54 |
+
out = self.red_conv(out)
|
55 |
+
|
56 |
+
# Global pooling
|
57 |
+
pool = self._global_pooling(x)
|
58 |
+
pool = self.global_pooling_conv(pool)
|
59 |
+
pool = self.global_pooling_bn(pool)
|
60 |
+
pool = self.pool_red_conv(pool)
|
61 |
+
if self.training or self.pooling_size is None:
|
62 |
+
pool = pool.repeat(1, 1, x.size(2), x.size(3))
|
63 |
+
|
64 |
+
out += pool
|
65 |
+
out = self.red_bn(out)
|
66 |
+
return out
|
67 |
+
|
68 |
+
def _global_pooling(self, x):
|
69 |
+
if self.training or self.pooling_size is None:
|
70 |
+
pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
|
71 |
+
pool = pool.view(x.size(0), x.size(1), 1, 1)
|
72 |
+
else:
|
73 |
+
pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
|
74 |
+
min(try_index(self.pooling_size, 1), x.shape[3]))
|
75 |
+
padding = (
|
76 |
+
(pooling_size[1] - 1) // 2,
|
77 |
+
(pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
|
78 |
+
(pooling_size[0] - 1) // 2,
|
79 |
+
(pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
|
80 |
+
)
|
81 |
+
|
82 |
+
pool = functional.avg_pool2d(x, pooling_size, stride=1)
|
83 |
+
pool = functional.pad(pool, pad=padding, mode="replicate")
|
84 |
+
return pool
|
annotator/segm/modules/dense.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import OrderedDict
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
from .bn import ABN
|
7 |
+
|
8 |
+
|
9 |
+
class DenseModule(nn.Module):
|
10 |
+
def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
|
11 |
+
super(DenseModule, self).__init__()
|
12 |
+
self.in_channels = in_channels
|
13 |
+
self.growth = growth
|
14 |
+
self.layers = layers
|
15 |
+
|
16 |
+
self.convs1 = nn.ModuleList()
|
17 |
+
self.convs3 = nn.ModuleList()
|
18 |
+
for i in range(self.layers):
|
19 |
+
self.convs1.append(nn.Sequential(OrderedDict([
|
20 |
+
("bn", norm_act(in_channels)),
|
21 |
+
("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
|
22 |
+
])))
|
23 |
+
self.convs3.append(nn.Sequential(OrderedDict([
|
24 |
+
("bn", norm_act(self.growth * bottleneck_factor)),
|
25 |
+
("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
|
26 |
+
dilation=dilation))
|
27 |
+
])))
|
28 |
+
in_channels += self.growth
|
29 |
+
|
30 |
+
@property
|
31 |
+
def out_channels(self):
|
32 |
+
return self.in_channels + self.growth * self.layers
|
33 |
+
|
34 |
+
def forward(self, x):
|
35 |
+
inputs = [x]
|
36 |
+
for i in range(self.layers):
|
37 |
+
x = torch.cat(inputs, dim=1)
|
38 |
+
x = self.convs1[i](x)
|
39 |
+
x = self.convs3[i](x)
|
40 |
+
inputs += [x]
|
41 |
+
|
42 |
+
return torch.cat(inputs, dim=1)
|
annotator/segm/modules/functions.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from os import path
|
2 |
+
import torch
|
3 |
+
import torch.distributed as dist
|
4 |
+
import torch.autograd as autograd
|
5 |
+
import torch.cuda.comm as comm
|
6 |
+
from torch.autograd.function import once_differentiable
|
7 |
+
from torch.utils.cpp_extension import load
|
8 |
+
|
9 |
+
_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
|
10 |
+
_backend = load(name="inplace_abn",
|
11 |
+
extra_cflags=["-O3"],
|
12 |
+
sources=[path.join(_src_path, f) for f in [
|
13 |
+
"inplace_abn.cpp",
|
14 |
+
"inplace_abn_cpu.cpp",
|
15 |
+
"inplace_abn_cuda.cu",
|
16 |
+
"inplace_abn_cuda_half.cu"
|
17 |
+
]],
|
18 |
+
extra_cuda_cflags=["--expt-extended-lambda"])
|
19 |
+
|
20 |
+
# Activation names
|
21 |
+
ACT_RELU = "relu"
|
22 |
+
ACT_LEAKY_RELU = "leaky_relu"
|
23 |
+
ACT_ELU = "elu"
|
24 |
+
ACT_NONE = "none"
|
25 |
+
|
26 |
+
|
27 |
+
def _check(fn, *args, **kwargs):
|
28 |
+
success = fn(*args, **kwargs)
|
29 |
+
if not success:
|
30 |
+
raise RuntimeError("CUDA Error encountered in {}".format(fn))
|
31 |
+
|
32 |
+
|
33 |
+
def _broadcast_shape(x):
|
34 |
+
out_size = []
|
35 |
+
for i, s in enumerate(x.size()):
|
36 |
+
if i != 1:
|
37 |
+
out_size.append(1)
|
38 |
+
else:
|
39 |
+
out_size.append(s)
|
40 |
+
return out_size
|
41 |
+
|
42 |
+
|
43 |
+
def _reduce(x):
|
44 |
+
if len(x.size()) == 2:
|
45 |
+
return x.sum(dim=0)
|
46 |
+
else:
|
47 |
+
n, c = x.size()[0:2]
|
48 |
+
return x.contiguous().view((n, c, -1)).sum(2).sum(0)
|
49 |
+
|
50 |
+
|
51 |
+
def _count_samples(x):
|
52 |
+
count = 1
|
53 |
+
for i, s in enumerate(x.size()):
|
54 |
+
if i != 1:
|
55 |
+
count *= s
|
56 |
+
return count
|
57 |
+
|
58 |
+
|
59 |
+
def _act_forward(ctx, x):
|
60 |
+
if ctx.activation == ACT_LEAKY_RELU:
|
61 |
+
_backend.leaky_relu_forward(x, ctx.slope)
|
62 |
+
elif ctx.activation == ACT_ELU:
|
63 |
+
_backend.elu_forward(x)
|
64 |
+
elif ctx.activation == ACT_NONE:
|
65 |
+
pass
|
66 |
+
|
67 |
+
|
68 |
+
def _act_backward(ctx, x, dx):
|
69 |
+
if ctx.activation == ACT_LEAKY_RELU:
|
70 |
+
_backend.leaky_relu_backward(x, dx, ctx.slope)
|
71 |
+
elif ctx.activation == ACT_ELU:
|
72 |
+
_backend.elu_backward(x, dx)
|
73 |
+
elif ctx.activation == ACT_NONE:
|
74 |
+
pass
|
75 |
+
|
76 |
+
|
77 |
+
class InPlaceABN(autograd.Function):
|
78 |
+
@staticmethod
|
79 |
+
def forward(ctx, x, weight, bias, running_mean, running_var,
|
80 |
+
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
|
81 |
+
# Save context
|
82 |
+
ctx.training = training
|
83 |
+
ctx.momentum = momentum
|
84 |
+
ctx.eps = eps
|
85 |
+
ctx.activation = activation
|
86 |
+
ctx.slope = slope
|
87 |
+
ctx.affine = weight is not None and bias is not None
|
88 |
+
|
89 |
+
# Prepare inputs
|
90 |
+
count = _count_samples(x)
|
91 |
+
x = x.contiguous()
|
92 |
+
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
93 |
+
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
94 |
+
|
95 |
+
if ctx.training:
|
96 |
+
mean, var = _backend.mean_var(x)
|
97 |
+
|
98 |
+
# Update running stats
|
99 |
+
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
100 |
+
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
|
101 |
+
|
102 |
+
# Mark in-place modified tensors
|
103 |
+
ctx.mark_dirty(x, running_mean, running_var)
|
104 |
+
else:
|
105 |
+
mean, var = running_mean.contiguous(), running_var.contiguous()
|
106 |
+
ctx.mark_dirty(x)
|
107 |
+
|
108 |
+
# BN forward + activation
|
109 |
+
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
110 |
+
_act_forward(ctx, x)
|
111 |
+
|
112 |
+
# Output
|
113 |
+
ctx.var = var
|
114 |
+
ctx.save_for_backward(x, var, weight, bias)
|
115 |
+
ctx.mark_non_differentiable(running_mean, running_var)
|
116 |
+
return x, running_mean, running_var
|
117 |
+
|
118 |
+
@staticmethod
|
119 |
+
@once_differentiable
|
120 |
+
def backward(ctx, dz, _drunning_mean, _drunning_var):
|
121 |
+
z, var, weight, bias = ctx.saved_tensors
|
122 |
+
dz = dz.contiguous()
|
123 |
+
|
124 |
+
# Undo activation
|
125 |
+
_act_backward(ctx, z, dz)
|
126 |
+
|
127 |
+
if ctx.training:
|
128 |
+
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
129 |
+
else:
|
130 |
+
# TODO: implement simplified CUDA backward for inference mode
|
131 |
+
edz = dz.new_zeros(dz.size(1))
|
132 |
+
eydz = dz.new_zeros(dz.size(1))
|
133 |
+
|
134 |
+
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
135 |
+
# dweight = eydz * weight.sign() if ctx.affine else None
|
136 |
+
dweight = eydz if ctx.affine else None
|
137 |
+
if dweight is not None:
|
138 |
+
dweight[weight < 0] *= -1
|
139 |
+
dbias = edz if ctx.affine else None
|
140 |
+
|
141 |
+
return dx, dweight, dbias, None, None, None, None, None, None, None
|
142 |
+
|
143 |
+
|
144 |
+
class InPlaceABNSync(autograd.Function):
|
145 |
+
@classmethod
|
146 |
+
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
|
147 |
+
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
|
148 |
+
# Save context
|
149 |
+
ctx.training = training
|
150 |
+
ctx.momentum = momentum
|
151 |
+
ctx.eps = eps
|
152 |
+
ctx.activation = activation
|
153 |
+
ctx.slope = slope
|
154 |
+
ctx.affine = weight is not None and bias is not None
|
155 |
+
|
156 |
+
# Prepare inputs
|
157 |
+
ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
|
158 |
+
|
159 |
+
# count = _count_samples(x)
|
160 |
+
batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)
|
161 |
+
|
162 |
+
x = x.contiguous()
|
163 |
+
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
164 |
+
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
165 |
+
|
166 |
+
if ctx.training:
|
167 |
+
mean, var = _backend.mean_var(x)
|
168 |
+
if ctx.world_size > 1:
|
169 |
+
# get global batch size
|
170 |
+
if equal_batches:
|
171 |
+
batch_size *= ctx.world_size
|
172 |
+
else:
|
173 |
+
dist.all_reduce(batch_size, dist.ReduceOp.SUM)
|
174 |
+
|
175 |
+
ctx.factor = x.shape[0] / float(batch_size.item())
|
176 |
+
|
177 |
+
mean_all = mean.clone() * ctx.factor
|
178 |
+
dist.all_reduce(mean_all, dist.ReduceOp.SUM)
|
179 |
+
|
180 |
+
var_all = (var + (mean - mean_all) ** 2) * ctx.factor
|
181 |
+
dist.all_reduce(var_all, dist.ReduceOp.SUM)
|
182 |
+
|
183 |
+
mean = mean_all
|
184 |
+
var = var_all
|
185 |
+
|
186 |
+
# Update running stats
|
187 |
+
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
188 |
+
count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
|
189 |
+
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
|
190 |
+
|
191 |
+
# Mark in-place modified tensors
|
192 |
+
ctx.mark_dirty(x, running_mean, running_var)
|
193 |
+
else:
|
194 |
+
mean, var = running_mean.contiguous(), running_var.contiguous()
|
195 |
+
ctx.mark_dirty(x)
|
196 |
+
|
197 |
+
# BN forward + activation
|
198 |
+
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
199 |
+
_act_forward(ctx, x)
|
200 |
+
|
201 |
+
# Output
|
202 |
+
ctx.var = var
|
203 |
+
ctx.save_for_backward(x, var, weight, bias)
|
204 |
+
ctx.mark_non_differentiable(running_mean, running_var)
|
205 |
+
return x, running_mean, running_var
|
206 |
+
|
207 |
+
@staticmethod
|
208 |
+
@once_differentiable
|
209 |
+
def backward(ctx, dz, _drunning_mean, _drunning_var):
|
210 |
+
z, var, weight, bias = ctx.saved_tensors
|
211 |
+
dz = dz.contiguous()
|
212 |
+
|
213 |
+
# Undo activation
|
214 |
+
_act_backward(ctx, z, dz)
|
215 |
+
|
216 |
+
if ctx.training:
|
217 |
+
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
218 |
+
edz_local = edz.clone()
|
219 |
+
eydz_local = eydz.clone()
|
220 |
+
|
221 |
+
if ctx.world_size > 1:
|
222 |
+
edz *= ctx.factor
|
223 |
+
dist.all_reduce(edz, dist.ReduceOp.SUM)
|
224 |
+
|
225 |
+
eydz *= ctx.factor
|
226 |
+
dist.all_reduce(eydz, dist.ReduceOp.SUM)
|
227 |
+
else:
|
228 |
+
edz_local = edz = dz.new_zeros(dz.size(1))
|
229 |
+
eydz_local = eydz = dz.new_zeros(dz.size(1))
|
230 |
+
|
231 |
+
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
232 |
+
# dweight = eydz_local * weight.sign() if ctx.affine else None
|
233 |
+
dweight = eydz_local if ctx.affine else None
|
234 |
+
if dweight is not None:
|
235 |
+
dweight[weight < 0] *= -1
|
236 |
+
dbias = edz_local if ctx.affine else None
|
237 |
+
|
238 |
+
return dx, dweight, dbias, None, None, None, None, None, None, None
|
239 |
+
|
240 |
+
|
241 |
+
inplace_abn = InPlaceABN.apply
|
242 |
+
inplace_abn_sync = InPlaceABNSync.apply
|
243 |
+
|
244 |
+
__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
|
annotator/segm/modules/misc.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch
|
3 |
+
import torch.distributed as dist
|
4 |
+
|
5 |
+
class GlobalAvgPool2d(nn.Module):
|
6 |
+
def __init__(self):
|
7 |
+
"""Global average pooling over the input's spatial dimensions"""
|
8 |
+
super(GlobalAvgPool2d, self).__init__()
|
9 |
+
|
10 |
+
def forward(self, inputs):
|
11 |
+
in_size = inputs.size()
|
12 |
+
return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
|
13 |
+
|
14 |
+
class SingleGPU(nn.Module):
|
15 |
+
def __init__(self, module):
|
16 |
+
super(SingleGPU, self).__init__()
|
17 |
+
self.module=module
|
18 |
+
|
19 |
+
def forward(self, input):
|
20 |
+
return self.module(input.cuda(non_blocking=True))
|
21 |
+
|
annotator/segm/modules/residual.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import OrderedDict
|
2 |
+
|
3 |
+
import torch.nn as nn
|
4 |
+
|
5 |
+
from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
6 |
+
import torch.nn.functional as functional
|
7 |
+
|
8 |
+
|
9 |
+
class ResidualBlock(nn.Module):
|
10 |
+
"""Configurable residual block
|
11 |
+
|
12 |
+
Parameters
|
13 |
+
----------
|
14 |
+
in_channels : int
|
15 |
+
Number of input channels.
|
16 |
+
channels : list of int
|
17 |
+
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
|
18 |
+
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
|
19 |
+
`3 x 3` then `1 x 1` convolutions.
|
20 |
+
stride : int
|
21 |
+
Stride of the first `3 x 3` convolution
|
22 |
+
dilation : int
|
23 |
+
Dilation to apply to the `3 x 3` convolutions.
|
24 |
+
groups : int
|
25 |
+
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
|
26 |
+
bottleneck blocks.
|
27 |
+
norm_act : callable
|
28 |
+
Function to create normalization / activation Module.
|
29 |
+
dropout: callable
|
30 |
+
Function to create Dropout Module.
|
31 |
+
"""
|
32 |
+
|
33 |
+
def __init__(self,
|
34 |
+
in_channels,
|
35 |
+
channels,
|
36 |
+
stride=1,
|
37 |
+
dilation=1,
|
38 |
+
groups=1,
|
39 |
+
norm_act=ABN,
|
40 |
+
dropout=None):
|
41 |
+
super(ResidualBlock, self).__init__()
|
42 |
+
|
43 |
+
# Check parameters for inconsistencies
|
44 |
+
if len(channels) != 2 and len(channels) != 3:
|
45 |
+
raise ValueError("channels must contain either two or three values")
|
46 |
+
if len(channels) == 2 and groups != 1:
|
47 |
+
raise ValueError("groups > 1 are only valid if len(channels) == 3")
|
48 |
+
|
49 |
+
is_bottleneck = len(channels) == 3
|
50 |
+
need_proj_conv = stride != 1 or in_channels != channels[-1]
|
51 |
+
|
52 |
+
if not is_bottleneck:
|
53 |
+
bn2 = norm_act(channels[1])
|
54 |
+
bn2.activation = ACT_NONE
|
55 |
+
layers = [
|
56 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
|
57 |
+
dilation=dilation)),
|
58 |
+
("bn1", norm_act(channels[0])),
|
59 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
60 |
+
dilation=dilation)),
|
61 |
+
("bn2", bn2)
|
62 |
+
]
|
63 |
+
if dropout is not None:
|
64 |
+
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
|
65 |
+
else:
|
66 |
+
bn3 = norm_act(channels[2])
|
67 |
+
bn3.activation = ACT_NONE
|
68 |
+
layers = [
|
69 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)),
|
70 |
+
("bn1", norm_act(channels[0])),
|
71 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False,
|
72 |
+
groups=groups, dilation=dilation)),
|
73 |
+
("bn2", norm_act(channels[1])),
|
74 |
+
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)),
|
75 |
+
("bn3", bn3)
|
76 |
+
]
|
77 |
+
if dropout is not None:
|
78 |
+
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
|
79 |
+
self.convs = nn.Sequential(OrderedDict(layers))
|
80 |
+
|
81 |
+
if need_proj_conv:
|
82 |
+
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
|
83 |
+
self.proj_bn = norm_act(channels[-1])
|
84 |
+
self.proj_bn.activation = ACT_NONE
|
85 |
+
|
86 |
+
def forward(self, x):
|
87 |
+
if hasattr(self, "proj_conv"):
|
88 |
+
residual = self.proj_conv(x)
|
89 |
+
residual = self.proj_bn(residual)
|
90 |
+
else:
|
91 |
+
residual = x
|
92 |
+
x = self.convs(x) + residual
|
93 |
+
|
94 |
+
if self.convs.bn1.activation == ACT_LEAKY_RELU:
|
95 |
+
return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True)
|
96 |
+
elif self.convs.bn1.activation == ACT_ELU:
|
97 |
+
return functional.elu(x, inplace=True)
|
98 |
+
else:
|
99 |
+
return x
|
100 |
+
|
101 |
+
|
102 |
+
class IdentityResidualBlock(nn.Module):
|
103 |
+
def __init__(self,
|
104 |
+
in_channels,
|
105 |
+
channels,
|
106 |
+
stride=1,
|
107 |
+
dilation=1,
|
108 |
+
groups=1,
|
109 |
+
norm_act=ABN,
|
110 |
+
dropout=None):
|
111 |
+
"""Configurable identity-mapping residual block
|
112 |
+
|
113 |
+
Parameters
|
114 |
+
----------
|
115 |
+
in_channels : int
|
116 |
+
Number of input channels.
|
117 |
+
channels : list of int
|
118 |
+
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
|
119 |
+
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
|
120 |
+
`3 x 3` then `1 x 1` convolutions.
|
121 |
+
stride : int
|
122 |
+
Stride of the first `3 x 3` convolution
|
123 |
+
dilation : int
|
124 |
+
Dilation to apply to the `3 x 3` convolutions.
|
125 |
+
groups : int
|
126 |
+
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
|
127 |
+
bottleneck blocks.
|
128 |
+
norm_act : callable
|
129 |
+
Function to create normalization / activation Module.
|
130 |
+
dropout: callable
|
131 |
+
Function to create Dropout Module.
|
132 |
+
"""
|
133 |
+
super(IdentityResidualBlock, self).__init__()
|
134 |
+
|
135 |
+
# Check parameters for inconsistencies
|
136 |
+
if len(channels) != 2 and len(channels) != 3:
|
137 |
+
raise ValueError("channels must contain either two or three values")
|
138 |
+
if len(channels) == 2 and groups != 1:
|
139 |
+
raise ValueError("groups > 1 are only valid if len(channels) == 3")
|
140 |
+
|
141 |
+
is_bottleneck = len(channels) == 3
|
142 |
+
need_proj_conv = stride != 1 or in_channels != channels[-1]
|
143 |
+
|
144 |
+
self.bn1 = norm_act(in_channels)
|
145 |
+
if not is_bottleneck:
|
146 |
+
layers = [
|
147 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
|
148 |
+
dilation=dilation)),
|
149 |
+
("bn2", norm_act(channels[0])),
|
150 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
151 |
+
dilation=dilation))
|
152 |
+
]
|
153 |
+
if dropout is not None:
|
154 |
+
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
|
155 |
+
else:
|
156 |
+
layers = [
|
157 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
|
158 |
+
("bn2", norm_act(channels[0])),
|
159 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
160 |
+
groups=groups, dilation=dilation)),
|
161 |
+
("bn3", norm_act(channels[1])),
|
162 |
+
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
|
163 |
+
]
|
164 |
+
if dropout is not None:
|
165 |
+
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
|
166 |
+
self.convs = nn.Sequential(OrderedDict(layers))
|
167 |
+
|
168 |
+
if need_proj_conv:
|
169 |
+
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
|
170 |
+
|
171 |
+
def forward(self, x):
|
172 |
+
if hasattr(self, "proj_conv"):
|
173 |
+
bn1 = self.bn1(x)
|
174 |
+
shortcut = self.proj_conv(bn1)
|
175 |
+
else:
|
176 |
+
shortcut = x.clone()
|
177 |
+
bn1 = self.bn1(x)
|
178 |
+
|
179 |
+
out = self.convs(bn1)
|
180 |
+
out.add_(shortcut)
|
181 |
+
|
182 |
+
return out
|
annotator/segm/modules/src/checks.h
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pragma once
|
2 |
+
|
3 |
+
#include <ATen/ATen.h>
|
4 |
+
|
5 |
+
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
6 |
+
#ifndef AT_CHECK
|
7 |
+
#define AT_CHECK AT_ASSERT
|
8 |
+
#endif
|
9 |
+
|
10 |
+
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
11 |
+
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
12 |
+
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
13 |
+
|
14 |
+
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
15 |
+
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
annotator/segm/modules/src/inplace_abn.cpp
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <torch/extension.h>
|
2 |
+
|
3 |
+
#include <vector>
|
4 |
+
|
5 |
+
#include "inplace_abn.h"
|
6 |
+
|
7 |
+
std::vector<at::Tensor> mean_var(at::Tensor x) {
|
8 |
+
if (x.is_cuda()) {
|
9 |
+
if (x.type().scalarType() == at::ScalarType::Half) {
|
10 |
+
return mean_var_cuda_h(x);
|
11 |
+
} else {
|
12 |
+
return mean_var_cuda(x);
|
13 |
+
}
|
14 |
+
} else {
|
15 |
+
return mean_var_cpu(x);
|
16 |
+
}
|
17 |
+
}
|
18 |
+
|
19 |
+
at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
20 |
+
bool affine, float eps) {
|
21 |
+
if (x.is_cuda()) {
|
22 |
+
if (x.type().scalarType() == at::ScalarType::Half) {
|
23 |
+
return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
|
24 |
+
} else {
|
25 |
+
return forward_cuda(x, mean, var, weight, bias, affine, eps);
|
26 |
+
}
|
27 |
+
} else {
|
28 |
+
return forward_cpu(x, mean, var, weight, bias, affine, eps);
|
29 |
+
}
|
30 |
+
}
|
31 |
+
|
32 |
+
std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
33 |
+
bool affine, float eps) {
|
34 |
+
if (z.is_cuda()) {
|
35 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
36 |
+
return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
|
37 |
+
} else {
|
38 |
+
return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
|
39 |
+
}
|
40 |
+
} else {
|
41 |
+
return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
|
42 |
+
}
|
43 |
+
}
|
44 |
+
|
45 |
+
at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
46 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
47 |
+
if (z.is_cuda()) {
|
48 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
49 |
+
return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
50 |
+
} else {
|
51 |
+
return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
52 |
+
}
|
53 |
+
} else {
|
54 |
+
return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
55 |
+
}
|
56 |
+
}
|
57 |
+
|
58 |
+
void leaky_relu_forward(at::Tensor z, float slope) {
|
59 |
+
at::leaky_relu_(z, slope);
|
60 |
+
}
|
61 |
+
|
62 |
+
void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
|
63 |
+
if (z.is_cuda()) {
|
64 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
65 |
+
return leaky_relu_backward_cuda_h(z, dz, slope);
|
66 |
+
} else {
|
67 |
+
return leaky_relu_backward_cuda(z, dz, slope);
|
68 |
+
}
|
69 |
+
} else {
|
70 |
+
return leaky_relu_backward_cpu(z, dz, slope);
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
+
void elu_forward(at::Tensor z) {
|
75 |
+
at::elu_(z);
|
76 |
+
}
|
77 |
+
|
78 |
+
void elu_backward(at::Tensor z, at::Tensor dz) {
|
79 |
+
if (z.is_cuda()) {
|
80 |
+
return elu_backward_cuda(z, dz);
|
81 |
+
} else {
|
82 |
+
return elu_backward_cpu(z, dz);
|
83 |
+
}
|
84 |
+
}
|
85 |
+
|
86 |
+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
87 |
+
m.def("mean_var", &mean_var, "Mean and variance computation");
|
88 |
+
m.def("forward", &forward, "In-place forward computation");
|
89 |
+
m.def("edz_eydz", &edz_eydz, "First part of backward computation");
|
90 |
+
m.def("backward", &backward, "Second part of backward computation");
|
91 |
+
m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
|
92 |
+
m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
|
93 |
+
m.def("elu_forward", &elu_forward, "Elu forward computation");
|
94 |
+
m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
|
95 |
+
}
|
annotator/segm/modules/src/inplace_abn.h
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pragma once
|
2 |
+
|
3 |
+
#include <ATen/ATen.h>
|
4 |
+
|
5 |
+
#include <vector>
|
6 |
+
|
7 |
+
std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
|
8 |
+
std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
|
9 |
+
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
|
10 |
+
|
11 |
+
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
12 |
+
bool affine, float eps);
|
13 |
+
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
14 |
+
bool affine, float eps);
|
15 |
+
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
16 |
+
bool affine, float eps);
|
17 |
+
|
18 |
+
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
19 |
+
bool affine, float eps);
|
20 |
+
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
21 |
+
bool affine, float eps);
|
22 |
+
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
23 |
+
bool affine, float eps);
|
24 |
+
|
25 |
+
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
26 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
27 |
+
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
28 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
29 |
+
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
30 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
31 |
+
|
32 |
+
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
|
33 |
+
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
|
34 |
+
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
|
35 |
+
|
36 |
+
void elu_backward_cpu(at::Tensor z, at::Tensor dz);
|
37 |
+
void elu_backward_cuda(at::Tensor z, at::Tensor dz);
|
38 |
+
|
39 |
+
static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
|
40 |
+
num = x.size(0);
|
41 |
+
chn = x.size(1);
|
42 |
+
sp = 1;
|
43 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
44 |
+
sp *= x.size(i);
|
45 |
+
}
|
46 |
+
|
47 |
+
/*
|
48 |
+
* Specialized CUDA reduction functions for BN
|
49 |
+
*/
|
50 |
+
#ifdef __CUDACC__
|
51 |
+
|
52 |
+
#include "utils/cuda.cuh"
|
53 |
+
|
54 |
+
template <typename T, typename Op>
|
55 |
+
__device__ T reduce(Op op, int plane, int N, int S) {
|
56 |
+
T sum = (T)0;
|
57 |
+
for (int batch = 0; batch < N; ++batch) {
|
58 |
+
for (int x = threadIdx.x; x < S; x += blockDim.x) {
|
59 |
+
sum += op(batch, plane, x);
|
60 |
+
}
|
61 |
+
}
|
62 |
+
|
63 |
+
// sum over NumThreads within a warp
|
64 |
+
sum = warpSum(sum);
|
65 |
+
|
66 |
+
// 'transpose', and reduce within warp again
|
67 |
+
__shared__ T shared[32];
|
68 |
+
__syncthreads();
|
69 |
+
if (threadIdx.x % WARP_SIZE == 0) {
|
70 |
+
shared[threadIdx.x / WARP_SIZE] = sum;
|
71 |
+
}
|
72 |
+
if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
|
73 |
+
// zero out the other entries in shared
|
74 |
+
shared[threadIdx.x] = (T)0;
|
75 |
+
}
|
76 |
+
__syncthreads();
|
77 |
+
if (threadIdx.x / WARP_SIZE == 0) {
|
78 |
+
sum = warpSum(shared[threadIdx.x]);
|
79 |
+
if (threadIdx.x == 0) {
|
80 |
+
shared[0] = sum;
|
81 |
+
}
|
82 |
+
}
|
83 |
+
__syncthreads();
|
84 |
+
|
85 |
+
// Everyone picks it up, should be broadcast into the whole gradInput
|
86 |
+
return shared[0];
|
87 |
+
}
|
88 |
+
#endif
|
annotator/segm/modules/src/inplace_abn_cpu.cpp
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <ATen/ATen.h>
|
2 |
+
|
3 |
+
#include <vector>
|
4 |
+
|
5 |
+
#include "utils/checks.h"
|
6 |
+
#include "inplace_abn.h"
|
7 |
+
|
8 |
+
at::Tensor reduce_sum(at::Tensor x) {
|
9 |
+
if (x.ndimension() == 2) {
|
10 |
+
return x.sum(0);
|
11 |
+
} else {
|
12 |
+
auto x_view = x.view({x.size(0), x.size(1), -1});
|
13 |
+
return x_view.sum(-1).sum(0);
|
14 |
+
}
|
15 |
+
}
|
16 |
+
|
17 |
+
at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
|
18 |
+
if (x.ndimension() == 2) {
|
19 |
+
return v;
|
20 |
+
} else {
|
21 |
+
std::vector<int64_t> broadcast_size = {1, -1};
|
22 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
23 |
+
broadcast_size.push_back(1);
|
24 |
+
|
25 |
+
return v.view(broadcast_size);
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
int64_t count(at::Tensor x) {
|
30 |
+
int64_t count = x.size(0);
|
31 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
32 |
+
count *= x.size(i);
|
33 |
+
|
34 |
+
return count;
|
35 |
+
}
|
36 |
+
|
37 |
+
at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
|
38 |
+
if (affine) {
|
39 |
+
return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
|
40 |
+
} else {
|
41 |
+
return z;
|
42 |
+
}
|
43 |
+
}
|
44 |
+
|
45 |
+
std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
|
46 |
+
auto num = count(x);
|
47 |
+
auto mean = reduce_sum(x) / num;
|
48 |
+
auto diff = x - broadcast_to(mean, x);
|
49 |
+
auto var = reduce_sum(diff.pow(2)) / num;
|
50 |
+
|
51 |
+
return {mean, var};
|
52 |
+
}
|
53 |
+
|
54 |
+
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
55 |
+
bool affine, float eps) {
|
56 |
+
auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
|
57 |
+
auto mul = at::rsqrt(var + eps) * gamma;
|
58 |
+
|
59 |
+
x.sub_(broadcast_to(mean, x));
|
60 |
+
x.mul_(broadcast_to(mul, x));
|
61 |
+
if (affine) x.add_(broadcast_to(bias, x));
|
62 |
+
|
63 |
+
return x;
|
64 |
+
}
|
65 |
+
|
66 |
+
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
67 |
+
bool affine, float eps) {
|
68 |
+
auto edz = reduce_sum(dz);
|
69 |
+
auto y = invert_affine(z, weight, bias, affine, eps);
|
70 |
+
auto eydz = reduce_sum(y * dz);
|
71 |
+
|
72 |
+
return {edz, eydz};
|
73 |
+
}
|
74 |
+
|
75 |
+
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
76 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
77 |
+
auto y = invert_affine(z, weight, bias, affine, eps);
|
78 |
+
auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
|
79 |
+
|
80 |
+
auto num = count(z);
|
81 |
+
auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
|
82 |
+
return dx;
|
83 |
+
}
|
84 |
+
|
85 |
+
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
|
86 |
+
CHECK_CPU_INPUT(z);
|
87 |
+
CHECK_CPU_INPUT(dz);
|
88 |
+
|
89 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
|
90 |
+
int64_t count = z.numel();
|
91 |
+
auto *_z = z.data<scalar_t>();
|
92 |
+
auto *_dz = dz.data<scalar_t>();
|
93 |
+
|
94 |
+
for (int64_t i = 0; i < count; ++i) {
|
95 |
+
if (_z[i] < 0) {
|
96 |
+
_z[i] *= 1 / slope;
|
97 |
+
_dz[i] *= slope;
|
98 |
+
}
|
99 |
+
}
|
100 |
+
}));
|
101 |
+
}
|
102 |
+
|
103 |
+
void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
|
104 |
+
CHECK_CPU_INPUT(z);
|
105 |
+
CHECK_CPU_INPUT(dz);
|
106 |
+
|
107 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
|
108 |
+
int64_t count = z.numel();
|
109 |
+
auto *_z = z.data<scalar_t>();
|
110 |
+
auto *_dz = dz.data<scalar_t>();
|
111 |
+
|
112 |
+
for (int64_t i = 0; i < count; ++i) {
|
113 |
+
if (_z[i] < 0) {
|
114 |
+
_z[i] = log1p(_z[i]);
|
115 |
+
_dz[i] *= (_z[i] + 1.f);
|
116 |
+
}
|
117 |
+
}
|
118 |
+
}));
|
119 |
+
}
|
annotator/segm/modules/src/inplace_abn_cuda.cu
ADDED
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <ATen/ATen.h>
|
2 |
+
|
3 |
+
#include <thrust/device_ptr.h>
|
4 |
+
#include <thrust/transform.h>
|
5 |
+
|
6 |
+
#include <vector>
|
7 |
+
|
8 |
+
#include "utils/checks.h"
|
9 |
+
#include "utils/cuda.cuh"
|
10 |
+
#include "inplace_abn.h"
|
11 |
+
|
12 |
+
#include <ATen/cuda/CUDAContext.h>
|
13 |
+
|
14 |
+
// Operations for reduce
|
15 |
+
template<typename T>
|
16 |
+
struct SumOp {
|
17 |
+
__device__ SumOp(const T *t, int c, int s)
|
18 |
+
: tensor(t), chn(c), sp(s) {}
|
19 |
+
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
20 |
+
return tensor[(batch * chn + plane) * sp + n];
|
21 |
+
}
|
22 |
+
const T *tensor;
|
23 |
+
const int chn;
|
24 |
+
const int sp;
|
25 |
+
};
|
26 |
+
|
27 |
+
template<typename T>
|
28 |
+
struct VarOp {
|
29 |
+
__device__ VarOp(T m, const T *t, int c, int s)
|
30 |
+
: mean(m), tensor(t), chn(c), sp(s) {}
|
31 |
+
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
32 |
+
T val = tensor[(batch * chn + plane) * sp + n];
|
33 |
+
return (val - mean) * (val - mean);
|
34 |
+
}
|
35 |
+
const T mean;
|
36 |
+
const T *tensor;
|
37 |
+
const int chn;
|
38 |
+
const int sp;
|
39 |
+
};
|
40 |
+
|
41 |
+
template<typename T>
|
42 |
+
struct GradOp {
|
43 |
+
__device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
|
44 |
+
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
45 |
+
__device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
|
46 |
+
T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
|
47 |
+
T _dz = dz[(batch * chn + plane) * sp + n];
|
48 |
+
return Pair<T>(_dz, _y * _dz);
|
49 |
+
}
|
50 |
+
const T weight;
|
51 |
+
const T bias;
|
52 |
+
const T *z;
|
53 |
+
const T *dz;
|
54 |
+
const int chn;
|
55 |
+
const int sp;
|
56 |
+
};
|
57 |
+
|
58 |
+
/***********
|
59 |
+
* mean_var
|
60 |
+
***********/
|
61 |
+
|
62 |
+
template<typename T>
|
63 |
+
__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
|
64 |
+
int plane = blockIdx.x;
|
65 |
+
T norm = T(1) / T(num * sp);
|
66 |
+
|
67 |
+
T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
|
68 |
+
__syncthreads();
|
69 |
+
T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
|
70 |
+
|
71 |
+
if (threadIdx.x == 0) {
|
72 |
+
mean[plane] = _mean;
|
73 |
+
var[plane] = _var;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
|
78 |
+
CHECK_CUDA_INPUT(x);
|
79 |
+
|
80 |
+
// Extract dimensions
|
81 |
+
int64_t num, chn, sp;
|
82 |
+
get_dims(x, num, chn, sp);
|
83 |
+
|
84 |
+
// Prepare output tensors
|
85 |
+
auto mean = at::empty({chn}, x.options());
|
86 |
+
auto var = at::empty({chn}, x.options());
|
87 |
+
|
88 |
+
// Run kernel
|
89 |
+
dim3 blocks(chn);
|
90 |
+
dim3 threads(getNumThreads(sp));
|
91 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
92 |
+
AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
|
93 |
+
mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
94 |
+
x.data<scalar_t>(),
|
95 |
+
mean.data<scalar_t>(),
|
96 |
+
var.data<scalar_t>(),
|
97 |
+
num, chn, sp);
|
98 |
+
}));
|
99 |
+
|
100 |
+
return {mean, var};
|
101 |
+
}
|
102 |
+
|
103 |
+
/**********
|
104 |
+
* forward
|
105 |
+
**********/
|
106 |
+
|
107 |
+
template<typename T>
|
108 |
+
__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
|
109 |
+
bool affine, float eps, int num, int chn, int sp) {
|
110 |
+
int plane = blockIdx.x;
|
111 |
+
|
112 |
+
T _mean = mean[plane];
|
113 |
+
T _var = var[plane];
|
114 |
+
T _weight = affine ? abs(weight[plane]) + eps : T(1);
|
115 |
+
T _bias = affine ? bias[plane] : T(0);
|
116 |
+
|
117 |
+
T mul = rsqrt(_var + eps) * _weight;
|
118 |
+
|
119 |
+
for (int batch = 0; batch < num; ++batch) {
|
120 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
121 |
+
T _x = x[(batch * chn + plane) * sp + n];
|
122 |
+
T _y = (_x - _mean) * mul + _bias;
|
123 |
+
|
124 |
+
x[(batch * chn + plane) * sp + n] = _y;
|
125 |
+
}
|
126 |
+
}
|
127 |
+
}
|
128 |
+
|
129 |
+
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
130 |
+
bool affine, float eps) {
|
131 |
+
CHECK_CUDA_INPUT(x);
|
132 |
+
CHECK_CUDA_INPUT(mean);
|
133 |
+
CHECK_CUDA_INPUT(var);
|
134 |
+
CHECK_CUDA_INPUT(weight);
|
135 |
+
CHECK_CUDA_INPUT(bias);
|
136 |
+
|
137 |
+
// Extract dimensions
|
138 |
+
int64_t num, chn, sp;
|
139 |
+
get_dims(x, num, chn, sp);
|
140 |
+
|
141 |
+
// Run kernel
|
142 |
+
dim3 blocks(chn);
|
143 |
+
dim3 threads(getNumThreads(sp));
|
144 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
145 |
+
AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
|
146 |
+
forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
147 |
+
x.data<scalar_t>(),
|
148 |
+
mean.data<scalar_t>(),
|
149 |
+
var.data<scalar_t>(),
|
150 |
+
weight.data<scalar_t>(),
|
151 |
+
bias.data<scalar_t>(),
|
152 |
+
affine, eps, num, chn, sp);
|
153 |
+
}));
|
154 |
+
|
155 |
+
return x;
|
156 |
+
}
|
157 |
+
|
158 |
+
/***********
|
159 |
+
* edz_eydz
|
160 |
+
***********/
|
161 |
+
|
162 |
+
template<typename T>
|
163 |
+
__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
|
164 |
+
T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
|
165 |
+
int plane = blockIdx.x;
|
166 |
+
|
167 |
+
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
168 |
+
T _bias = affine ? bias[plane] : 0.f;
|
169 |
+
|
170 |
+
Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
171 |
+
__syncthreads();
|
172 |
+
|
173 |
+
if (threadIdx.x == 0) {
|
174 |
+
edz[plane] = res.v1;
|
175 |
+
eydz[plane] = res.v2;
|
176 |
+
}
|
177 |
+
}
|
178 |
+
|
179 |
+
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
180 |
+
bool affine, float eps) {
|
181 |
+
CHECK_CUDA_INPUT(z);
|
182 |
+
CHECK_CUDA_INPUT(dz);
|
183 |
+
CHECK_CUDA_INPUT(weight);
|
184 |
+
CHECK_CUDA_INPUT(bias);
|
185 |
+
|
186 |
+
// Extract dimensions
|
187 |
+
int64_t num, chn, sp;
|
188 |
+
get_dims(z, num, chn, sp);
|
189 |
+
|
190 |
+
auto edz = at::empty({chn}, z.options());
|
191 |
+
auto eydz = at::empty({chn}, z.options());
|
192 |
+
|
193 |
+
// Run kernel
|
194 |
+
dim3 blocks(chn);
|
195 |
+
dim3 threads(getNumThreads(sp));
|
196 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
197 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
|
198 |
+
edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
199 |
+
z.data<scalar_t>(),
|
200 |
+
dz.data<scalar_t>(),
|
201 |
+
weight.data<scalar_t>(),
|
202 |
+
bias.data<scalar_t>(),
|
203 |
+
edz.data<scalar_t>(),
|
204 |
+
eydz.data<scalar_t>(),
|
205 |
+
affine, eps, num, chn, sp);
|
206 |
+
}));
|
207 |
+
|
208 |
+
return {edz, eydz};
|
209 |
+
}
|
210 |
+
|
211 |
+
/***********
|
212 |
+
* backward
|
213 |
+
***********/
|
214 |
+
|
215 |
+
template<typename T>
|
216 |
+
__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
|
217 |
+
const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
|
218 |
+
int plane = blockIdx.x;
|
219 |
+
|
220 |
+
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
221 |
+
T _bias = affine ? bias[plane] : 0.f;
|
222 |
+
T _var = var[plane];
|
223 |
+
T _edz = edz[plane];
|
224 |
+
T _eydz = eydz[plane];
|
225 |
+
|
226 |
+
T _mul = _weight * rsqrt(_var + eps);
|
227 |
+
T count = T(num * sp);
|
228 |
+
|
229 |
+
for (int batch = 0; batch < num; ++batch) {
|
230 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
231 |
+
T _dz = dz[(batch * chn + plane) * sp + n];
|
232 |
+
T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
|
233 |
+
|
234 |
+
dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
|
235 |
+
}
|
236 |
+
}
|
237 |
+
}
|
238 |
+
|
239 |
+
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
240 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
241 |
+
CHECK_CUDA_INPUT(z);
|
242 |
+
CHECK_CUDA_INPUT(dz);
|
243 |
+
CHECK_CUDA_INPUT(var);
|
244 |
+
CHECK_CUDA_INPUT(weight);
|
245 |
+
CHECK_CUDA_INPUT(bias);
|
246 |
+
CHECK_CUDA_INPUT(edz);
|
247 |
+
CHECK_CUDA_INPUT(eydz);
|
248 |
+
|
249 |
+
// Extract dimensions
|
250 |
+
int64_t num, chn, sp;
|
251 |
+
get_dims(z, num, chn, sp);
|
252 |
+
|
253 |
+
auto dx = at::zeros_like(z);
|
254 |
+
|
255 |
+
// Run kernel
|
256 |
+
dim3 blocks(chn);
|
257 |
+
dim3 threads(getNumThreads(sp));
|
258 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
259 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
|
260 |
+
backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
261 |
+
z.data<scalar_t>(),
|
262 |
+
dz.data<scalar_t>(),
|
263 |
+
var.data<scalar_t>(),
|
264 |
+
weight.data<scalar_t>(),
|
265 |
+
bias.data<scalar_t>(),
|
266 |
+
edz.data<scalar_t>(),
|
267 |
+
eydz.data<scalar_t>(),
|
268 |
+
dx.data<scalar_t>(),
|
269 |
+
affine, eps, num, chn, sp);
|
270 |
+
}));
|
271 |
+
|
272 |
+
return dx;
|
273 |
+
}
|
274 |
+
|
275 |
+
/**************
|
276 |
+
* activations
|
277 |
+
**************/
|
278 |
+
|
279 |
+
template<typename T>
|
280 |
+
inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
|
281 |
+
// Create thrust pointers
|
282 |
+
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
283 |
+
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
284 |
+
|
285 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
286 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
287 |
+
th_dz, th_dz + count, th_z, th_dz,
|
288 |
+
[slope] __device__ (const T& dz) { return dz * slope; },
|
289 |
+
[] __device__ (const T& z) { return z < 0; });
|
290 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
291 |
+
th_z, th_z + count, th_z,
|
292 |
+
[slope] __device__ (const T& z) { return z / slope; },
|
293 |
+
[] __device__ (const T& z) { return z < 0; });
|
294 |
+
}
|
295 |
+
|
296 |
+
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
|
297 |
+
CHECK_CUDA_INPUT(z);
|
298 |
+
CHECK_CUDA_INPUT(dz);
|
299 |
+
|
300 |
+
int64_t count = z.numel();
|
301 |
+
|
302 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
303 |
+
leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
|
304 |
+
}));
|
305 |
+
}
|
306 |
+
|
307 |
+
template<typename T>
|
308 |
+
inline void elu_backward_impl(T *z, T *dz, int64_t count) {
|
309 |
+
// Create thrust pointers
|
310 |
+
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
311 |
+
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
312 |
+
|
313 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
314 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
315 |
+
th_dz, th_dz + count, th_z, th_z, th_dz,
|
316 |
+
[] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
|
317 |
+
[] __device__ (const T& z) { return z < 0; });
|
318 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
319 |
+
th_z, th_z + count, th_z,
|
320 |
+
[] __device__ (const T& z) { return log1p(z); },
|
321 |
+
[] __device__ (const T& z) { return z < 0; });
|
322 |
+
}
|
323 |
+
|
324 |
+
void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
|
325 |
+
CHECK_CUDA_INPUT(z);
|
326 |
+
CHECK_CUDA_INPUT(dz);
|
327 |
+
|
328 |
+
int64_t count = z.numel();
|
329 |
+
|
330 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
331 |
+
elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
|
332 |
+
}));
|
333 |
+
}
|
annotator/segm/modules/src/inplace_abn_cuda_half.cu
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <ATen/ATen.h>
|
2 |
+
|
3 |
+
#include <cuda_fp16.h>
|
4 |
+
|
5 |
+
#include <vector>
|
6 |
+
|
7 |
+
#include "utils/checks.h"
|
8 |
+
#include "utils/cuda.cuh"
|
9 |
+
#include "inplace_abn.h"
|
10 |
+
|
11 |
+
#include <ATen/cuda/CUDAContext.h>
|
12 |
+
|
13 |
+
// Operations for reduce
|
14 |
+
struct SumOpH {
|
15 |
+
__device__ SumOpH(const half *t, int c, int s)
|
16 |
+
: tensor(t), chn(c), sp(s) {}
|
17 |
+
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
18 |
+
return __half2float(tensor[(batch * chn + plane) * sp + n]);
|
19 |
+
}
|
20 |
+
const half *tensor;
|
21 |
+
const int chn;
|
22 |
+
const int sp;
|
23 |
+
};
|
24 |
+
|
25 |
+
struct VarOpH {
|
26 |
+
__device__ VarOpH(float m, const half *t, int c, int s)
|
27 |
+
: mean(m), tensor(t), chn(c), sp(s) {}
|
28 |
+
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
29 |
+
const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
|
30 |
+
return (t - mean) * (t - mean);
|
31 |
+
}
|
32 |
+
const float mean;
|
33 |
+
const half *tensor;
|
34 |
+
const int chn;
|
35 |
+
const int sp;
|
36 |
+
};
|
37 |
+
|
38 |
+
struct GradOpH {
|
39 |
+
__device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
|
40 |
+
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
41 |
+
__device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
|
42 |
+
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
|
43 |
+
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
44 |
+
return Pair<float>(_dz, _y * _dz);
|
45 |
+
}
|
46 |
+
const float weight;
|
47 |
+
const float bias;
|
48 |
+
const half *z;
|
49 |
+
const half *dz;
|
50 |
+
const int chn;
|
51 |
+
const int sp;
|
52 |
+
};
|
53 |
+
|
54 |
+
/***********
|
55 |
+
* mean_var
|
56 |
+
***********/
|
57 |
+
|
58 |
+
__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
|
59 |
+
int plane = blockIdx.x;
|
60 |
+
float norm = 1.f / static_cast<float>(num * sp);
|
61 |
+
|
62 |
+
float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
|
63 |
+
__syncthreads();
|
64 |
+
float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
|
65 |
+
|
66 |
+
if (threadIdx.x == 0) {
|
67 |
+
mean[plane] = _mean;
|
68 |
+
var[plane] = _var;
|
69 |
+
}
|
70 |
+
}
|
71 |
+
|
72 |
+
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
|
73 |
+
CHECK_CUDA_INPUT(x);
|
74 |
+
|
75 |
+
// Extract dimensions
|
76 |
+
int64_t num, chn, sp;
|
77 |
+
get_dims(x, num, chn, sp);
|
78 |
+
|
79 |
+
// Prepare output tensors
|
80 |
+
auto mean = at::empty({chn},x.options().dtype(at::kFloat));
|
81 |
+
auto var = at::empty({chn},x.options().dtype(at::kFloat));
|
82 |
+
|
83 |
+
// Run kernel
|
84 |
+
dim3 blocks(chn);
|
85 |
+
dim3 threads(getNumThreads(sp));
|
86 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
87 |
+
mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
|
88 |
+
reinterpret_cast<half*>(x.data<at::Half>()),
|
89 |
+
mean.data<float>(),
|
90 |
+
var.data<float>(),
|
91 |
+
num, chn, sp);
|
92 |
+
|
93 |
+
return {mean, var};
|
94 |
+
}
|
95 |
+
|
96 |
+
/**********
|
97 |
+
* forward
|
98 |
+
**********/
|
99 |
+
|
100 |
+
__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
|
101 |
+
bool affine, float eps, int num, int chn, int sp) {
|
102 |
+
int plane = blockIdx.x;
|
103 |
+
|
104 |
+
const float _mean = mean[plane];
|
105 |
+
const float _var = var[plane];
|
106 |
+
const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
107 |
+
const float _bias = affine ? bias[plane] : 0.f;
|
108 |
+
|
109 |
+
const float mul = rsqrt(_var + eps) * _weight;
|
110 |
+
|
111 |
+
for (int batch = 0; batch < num; ++batch) {
|
112 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
113 |
+
half *x_ptr = x + (batch * chn + plane) * sp + n;
|
114 |
+
float _x = __half2float(*x_ptr);
|
115 |
+
float _y = (_x - _mean) * mul + _bias;
|
116 |
+
|
117 |
+
*x_ptr = __float2half(_y);
|
118 |
+
}
|
119 |
+
}
|
120 |
+
}
|
121 |
+
|
122 |
+
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
123 |
+
bool affine, float eps) {
|
124 |
+
CHECK_CUDA_INPUT(x);
|
125 |
+
CHECK_CUDA_INPUT(mean);
|
126 |
+
CHECK_CUDA_INPUT(var);
|
127 |
+
CHECK_CUDA_INPUT(weight);
|
128 |
+
CHECK_CUDA_INPUT(bias);
|
129 |
+
|
130 |
+
// Extract dimensions
|
131 |
+
int64_t num, chn, sp;
|
132 |
+
get_dims(x, num, chn, sp);
|
133 |
+
|
134 |
+
// Run kernel
|
135 |
+
dim3 blocks(chn);
|
136 |
+
dim3 threads(getNumThreads(sp));
|
137 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
138 |
+
forward_kernel_h<<<blocks, threads, 0, stream>>>(
|
139 |
+
reinterpret_cast<half*>(x.data<at::Half>()),
|
140 |
+
mean.data<float>(),
|
141 |
+
var.data<float>(),
|
142 |
+
weight.data<float>(),
|
143 |
+
bias.data<float>(),
|
144 |
+
affine, eps, num, chn, sp);
|
145 |
+
|
146 |
+
return x;
|
147 |
+
}
|
148 |
+
|
149 |
+
__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
|
150 |
+
float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
|
151 |
+
int plane = blockIdx.x;
|
152 |
+
|
153 |
+
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
154 |
+
float _bias = affine ? bias[plane] : 0.f;
|
155 |
+
|
156 |
+
Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
157 |
+
__syncthreads();
|
158 |
+
|
159 |
+
if (threadIdx.x == 0) {
|
160 |
+
edz[plane] = res.v1;
|
161 |
+
eydz[plane] = res.v2;
|
162 |
+
}
|
163 |
+
}
|
164 |
+
|
165 |
+
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
166 |
+
bool affine, float eps) {
|
167 |
+
CHECK_CUDA_INPUT(z);
|
168 |
+
CHECK_CUDA_INPUT(dz);
|
169 |
+
CHECK_CUDA_INPUT(weight);
|
170 |
+
CHECK_CUDA_INPUT(bias);
|
171 |
+
|
172 |
+
// Extract dimensions
|
173 |
+
int64_t num, chn, sp;
|
174 |
+
get_dims(z, num, chn, sp);
|
175 |
+
|
176 |
+
auto edz = at::empty({chn},z.options().dtype(at::kFloat));
|
177 |
+
auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
|
178 |
+
|
179 |
+
// Run kernel
|
180 |
+
dim3 blocks(chn);
|
181 |
+
dim3 threads(getNumThreads(sp));
|
182 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
183 |
+
edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
|
184 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
185 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
186 |
+
weight.data<float>(),
|
187 |
+
bias.data<float>(),
|
188 |
+
edz.data<float>(),
|
189 |
+
eydz.data<float>(),
|
190 |
+
affine, eps, num, chn, sp);
|
191 |
+
|
192 |
+
return {edz, eydz};
|
193 |
+
}
|
194 |
+
|
195 |
+
__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
|
196 |
+
const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
|
197 |
+
int plane = blockIdx.x;
|
198 |
+
|
199 |
+
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
200 |
+
float _bias = affine ? bias[plane] : 0.f;
|
201 |
+
float _var = var[plane];
|
202 |
+
float _edz = edz[plane];
|
203 |
+
float _eydz = eydz[plane];
|
204 |
+
|
205 |
+
float _mul = _weight * rsqrt(_var + eps);
|
206 |
+
float count = float(num * sp);
|
207 |
+
|
208 |
+
for (int batch = 0; batch < num; ++batch) {
|
209 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
210 |
+
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
211 |
+
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
|
212 |
+
|
213 |
+
dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
|
214 |
+
}
|
215 |
+
}
|
216 |
+
}
|
217 |
+
|
218 |
+
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
219 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
220 |
+
CHECK_CUDA_INPUT(z);
|
221 |
+
CHECK_CUDA_INPUT(dz);
|
222 |
+
CHECK_CUDA_INPUT(var);
|
223 |
+
CHECK_CUDA_INPUT(weight);
|
224 |
+
CHECK_CUDA_INPUT(bias);
|
225 |
+
CHECK_CUDA_INPUT(edz);
|
226 |
+
CHECK_CUDA_INPUT(eydz);
|
227 |
+
|
228 |
+
// Extract dimensions
|
229 |
+
int64_t num, chn, sp;
|
230 |
+
get_dims(z, num, chn, sp);
|
231 |
+
|
232 |
+
auto dx = at::zeros_like(z);
|
233 |
+
|
234 |
+
// Run kernel
|
235 |
+
dim3 blocks(chn);
|
236 |
+
dim3 threads(getNumThreads(sp));
|
237 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
238 |
+
backward_kernel_h<<<blocks, threads, 0, stream>>>(
|
239 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
240 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
241 |
+
var.data<float>(),
|
242 |
+
weight.data<float>(),
|
243 |
+
bias.data<float>(),
|
244 |
+
edz.data<float>(),
|
245 |
+
eydz.data<float>(),
|
246 |
+
reinterpret_cast<half*>(dx.data<at::Half>()),
|
247 |
+
affine, eps, num, chn, sp);
|
248 |
+
|
249 |
+
return dx;
|
250 |
+
}
|
251 |
+
|
252 |
+
__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
|
253 |
+
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
|
254 |
+
float _z = __half2float(z[i]);
|
255 |
+
if (_z < 0) {
|
256 |
+
dz[i] = __float2half(__half2float(dz[i]) * slope);
|
257 |
+
z[i] = __float2half(_z / slope);
|
258 |
+
}
|
259 |
+
}
|
260 |
+
}
|
261 |
+
|
262 |
+
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
|
263 |
+
CHECK_CUDA_INPUT(z);
|
264 |
+
CHECK_CUDA_INPUT(dz);
|
265 |
+
|
266 |
+
int64_t count = z.numel();
|
267 |
+
dim3 threads(getNumThreads(count));
|
268 |
+
dim3 blocks = (count + threads.x - 1) / threads.x;
|
269 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
270 |
+
leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
|
271 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
272 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
273 |
+
slope, count);
|
274 |
+
}
|
275 |
+
|
annotator/segm/modules/src/utils/checks.h
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pragma once
|
2 |
+
|
3 |
+
#include <ATen/ATen.h>
|
4 |
+
|
5 |
+
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
6 |
+
#ifndef AT_CHECK
|
7 |
+
#define AT_CHECK AT_ASSERT
|
8 |
+
#endif
|
9 |
+
|
10 |
+
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
11 |
+
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
12 |
+
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
13 |
+
|
14 |
+
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
15 |
+
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
annotator/segm/modules/src/utils/common.h
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pragma once
|
2 |
+
|
3 |
+
#include <ATen/ATen.h>
|
4 |
+
|
5 |
+
/*
|
6 |
+
* Functions to share code between CPU and GPU
|
7 |
+
*/
|
8 |
+
|
9 |
+
#ifdef __CUDACC__
|
10 |
+
// CUDA versions
|
11 |
+
|
12 |
+
#define HOST_DEVICE __host__ __device__
|
13 |
+
#define INLINE_HOST_DEVICE __host__ __device__ inline
|
14 |
+
#define FLOOR(x) floor(x)
|
15 |
+
|
16 |
+
#if __CUDA_ARCH__ >= 600
|
17 |
+
// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
|
18 |
+
#define ACCUM(x,y) atomicAdd_block(&(x),(y))
|
19 |
+
#else
|
20 |
+
// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
|
21 |
+
// and use the known atomicCAS-based implementation for double
|
22 |
+
template<typename data_t>
|
23 |
+
__device__ inline data_t atomic_add(data_t *address, data_t val) {
|
24 |
+
return atomicAdd(address, val);
|
25 |
+
}
|
26 |
+
|
27 |
+
template<>
|
28 |
+
__device__ inline double atomic_add(double *address, double val) {
|
29 |
+
unsigned long long int* address_as_ull = (unsigned long long int*)address;
|
30 |
+
unsigned long long int old = *address_as_ull, assumed;
|
31 |
+
do {
|
32 |
+
assumed = old;
|
33 |
+
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
|
34 |
+
} while (assumed != old);
|
35 |
+
return __longlong_as_double(old);
|
36 |
+
}
|
37 |
+
|
38 |
+
#define ACCUM(x,y) atomic_add(&(x),(y))
|
39 |
+
#endif // #if __CUDA_ARCH__ >= 600
|
40 |
+
|
41 |
+
#else
|
42 |
+
// CPU versions
|
43 |
+
|
44 |
+
#define HOST_DEVICE
|
45 |
+
#define INLINE_HOST_DEVICE inline
|
46 |
+
#define FLOOR(x) std::floor(x)
|
47 |
+
#define ACCUM(x,y) (x) += (y)
|
48 |
+
|
49 |
+
#endif // #ifdef __CUDACC__
|
annotator/segm/modules/src/utils/cuda.cuh
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pragma once
|
2 |
+
|
3 |
+
/*
|
4 |
+
* General settings and functions
|
5 |
+
*/
|
6 |
+
const int WARP_SIZE = 32;
|
7 |
+
const int MAX_BLOCK_SIZE = 1024;
|
8 |
+
|
9 |
+
static int getNumThreads(int nElem) {
|
10 |
+
int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
|
11 |
+
for (int i = 0; i < 6; ++i) {
|
12 |
+
if (nElem <= threadSizes[i]) {
|
13 |
+
return threadSizes[i];
|
14 |
+
}
|
15 |
+
}
|
16 |
+
return MAX_BLOCK_SIZE;
|
17 |
+
}
|
18 |
+
|
19 |
+
/*
|
20 |
+
* Reduction utilities
|
21 |
+
*/
|
22 |
+
template <typename T>
|
23 |
+
__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
|
24 |
+
unsigned int mask = 0xffffffff) {
|
25 |
+
#if CUDART_VERSION >= 9000
|
26 |
+
return __shfl_xor_sync(mask, value, laneMask, width);
|
27 |
+
#else
|
28 |
+
return __shfl_xor(value, laneMask, width);
|
29 |
+
#endif
|
30 |
+
}
|
31 |
+
|
32 |
+
__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
|
33 |
+
|
34 |
+
template<typename T>
|
35 |
+
struct Pair {
|
36 |
+
T v1, v2;
|
37 |
+
__device__ Pair() {}
|
38 |
+
__device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
|
39 |
+
__device__ Pair(T v) : v1(v), v2(v) {}
|
40 |
+
__device__ Pair(int v) : v1(v), v2(v) {}
|
41 |
+
__device__ Pair &operator+=(const Pair<T> &a) {
|
42 |
+
v1 += a.v1;
|
43 |
+
v2 += a.v2;
|
44 |
+
return *this;
|
45 |
+
}
|
46 |
+
};
|
47 |
+
|
48 |
+
template<typename T>
|
49 |
+
static __device__ __forceinline__ T warpSum(T val) {
|
50 |
+
#if __CUDA_ARCH__ >= 300
|
51 |
+
for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
|
52 |
+
val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
|
53 |
+
}
|
54 |
+
#else
|
55 |
+
__shared__ T values[MAX_BLOCK_SIZE];
|
56 |
+
values[threadIdx.x] = val;
|
57 |
+
__threadfence_block();
|
58 |
+
const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
|
59 |
+
for (int i = 1; i < WARP_SIZE; i++) {
|
60 |
+
val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
|
61 |
+
}
|
62 |
+
#endif
|
63 |
+
return val;
|
64 |
+
}
|
65 |
+
|
66 |
+
template<typename T>
|
67 |
+
static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
|
68 |
+
value.v1 = warpSum(value.v1);
|
69 |
+
value.v2 = warpSum(value.v2);
|
70 |
+
return value;
|
71 |
+
}
|
annotator/segm/networks/AugmentCE2P.py
ADDED
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : AugmentCE2P.py
|
8 |
+
@Time : 8/4/19 3:35 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import functools
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import torch.nn as nn
|
18 |
+
from torch.nn import functional as F
|
19 |
+
# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
|
20 |
+
# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
|
21 |
+
from ..modules import InPlaceABNSync
|
22 |
+
|
23 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
24 |
+
|
25 |
+
affine_par = True
|
26 |
+
|
27 |
+
pretrained_settings = {
|
28 |
+
'resnet101': {
|
29 |
+
'imagenet': {
|
30 |
+
'input_space': 'BGR',
|
31 |
+
'input_size': [3, 224, 224],
|
32 |
+
'input_range': [0, 1],
|
33 |
+
'mean': [0.406, 0.456, 0.485],
|
34 |
+
'std': [0.225, 0.224, 0.229],
|
35 |
+
'num_classes': 1000
|
36 |
+
}
|
37 |
+
},
|
38 |
+
}
|
39 |
+
|
40 |
+
|
41 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
42 |
+
"3x3 convolution with padding"
|
43 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
44 |
+
padding=1, bias=False)
|
45 |
+
|
46 |
+
|
47 |
+
class Bottleneck(nn.Module):
|
48 |
+
expansion = 4
|
49 |
+
|
50 |
+
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
|
51 |
+
super(Bottleneck, self).__init__()
|
52 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
53 |
+
self.bn1 = BatchNorm2d(planes)
|
54 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
55 |
+
padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
|
56 |
+
self.bn2 = BatchNorm2d(planes)
|
57 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
58 |
+
self.bn3 = BatchNorm2d(planes * 4)
|
59 |
+
self.relu = nn.ReLU(inplace=False)
|
60 |
+
self.relu_inplace = nn.ReLU(inplace=True)
|
61 |
+
self.downsample = downsample
|
62 |
+
self.dilation = dilation
|
63 |
+
self.stride = stride
|
64 |
+
|
65 |
+
def forward(self, x):
|
66 |
+
residual = x
|
67 |
+
|
68 |
+
out = self.conv1(x)
|
69 |
+
out = self.bn1(out)
|
70 |
+
out = self.relu(out)
|
71 |
+
|
72 |
+
out = self.conv2(out)
|
73 |
+
out = self.bn2(out)
|
74 |
+
out = self.relu(out)
|
75 |
+
|
76 |
+
out = self.conv3(out)
|
77 |
+
out = self.bn3(out)
|
78 |
+
|
79 |
+
if self.downsample is not None:
|
80 |
+
residual = self.downsample(x)
|
81 |
+
|
82 |
+
out = out + residual
|
83 |
+
out = self.relu_inplace(out)
|
84 |
+
|
85 |
+
return out
|
86 |
+
|
87 |
+
|
88 |
+
class PSPModule(nn.Module):
|
89 |
+
"""
|
90 |
+
Reference:
|
91 |
+
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
92 |
+
"""
|
93 |
+
|
94 |
+
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
95 |
+
super(PSPModule, self).__init__()
|
96 |
+
|
97 |
+
self.stages = []
|
98 |
+
self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
99 |
+
self.bottleneck = nn.Sequential(
|
100 |
+
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
101 |
+
bias=False),
|
102 |
+
InPlaceABNSync(out_features),
|
103 |
+
)
|
104 |
+
|
105 |
+
def _make_stage(self, features, out_features, size):
|
106 |
+
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
107 |
+
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
108 |
+
bn = InPlaceABNSync(out_features)
|
109 |
+
return nn.Sequential(prior, conv, bn)
|
110 |
+
|
111 |
+
def forward(self, feats):
|
112 |
+
h, w = feats.size(2), feats.size(3)
|
113 |
+
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
114 |
+
self.stages] + [feats]
|
115 |
+
bottle = self.bottleneck(torch.cat(priors, 1))
|
116 |
+
return bottle
|
117 |
+
|
118 |
+
|
119 |
+
class ASPPModule(nn.Module):
|
120 |
+
"""
|
121 |
+
Reference:
|
122 |
+
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
123 |
+
"""
|
124 |
+
|
125 |
+
def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
|
126 |
+
super(ASPPModule, self).__init__()
|
127 |
+
|
128 |
+
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
129 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
130 |
+
bias=False),
|
131 |
+
InPlaceABNSync(inner_features))
|
132 |
+
self.conv2 = nn.Sequential(
|
133 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
134 |
+
InPlaceABNSync(inner_features))
|
135 |
+
self.conv3 = nn.Sequential(
|
136 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
137 |
+
InPlaceABNSync(inner_features))
|
138 |
+
self.conv4 = nn.Sequential(
|
139 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
140 |
+
InPlaceABNSync(inner_features))
|
141 |
+
self.conv5 = nn.Sequential(
|
142 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
143 |
+
InPlaceABNSync(inner_features))
|
144 |
+
|
145 |
+
self.bottleneck = nn.Sequential(
|
146 |
+
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
147 |
+
InPlaceABNSync(out_features),
|
148 |
+
nn.Dropout2d(0.1)
|
149 |
+
)
|
150 |
+
|
151 |
+
def forward(self, x):
|
152 |
+
_, _, h, w = x.size()
|
153 |
+
|
154 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
155 |
+
|
156 |
+
feat2 = self.conv2(x)
|
157 |
+
feat3 = self.conv3(x)
|
158 |
+
feat4 = self.conv4(x)
|
159 |
+
feat5 = self.conv5(x)
|
160 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
161 |
+
|
162 |
+
bottle = self.bottleneck(out)
|
163 |
+
return bottle
|
164 |
+
|
165 |
+
|
166 |
+
class Edge_Module(nn.Module):
|
167 |
+
"""
|
168 |
+
Edge Learning Branch
|
169 |
+
"""
|
170 |
+
|
171 |
+
def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
|
172 |
+
super(Edge_Module, self).__init__()
|
173 |
+
|
174 |
+
self.conv1 = nn.Sequential(
|
175 |
+
nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
176 |
+
InPlaceABNSync(mid_fea)
|
177 |
+
)
|
178 |
+
self.conv2 = nn.Sequential(
|
179 |
+
nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
180 |
+
InPlaceABNSync(mid_fea)
|
181 |
+
)
|
182 |
+
self.conv3 = nn.Sequential(
|
183 |
+
nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
184 |
+
InPlaceABNSync(mid_fea)
|
185 |
+
)
|
186 |
+
self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
|
187 |
+
self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
|
188 |
+
|
189 |
+
def forward(self, x1, x2, x3):
|
190 |
+
_, _, h, w = x1.size()
|
191 |
+
|
192 |
+
edge1_fea = self.conv1(x1)
|
193 |
+
edge1 = self.conv4(edge1_fea)
|
194 |
+
edge2_fea = self.conv2(x2)
|
195 |
+
edge2 = self.conv4(edge2_fea)
|
196 |
+
edge3_fea = self.conv3(x3)
|
197 |
+
edge3 = self.conv4(edge3_fea)
|
198 |
+
|
199 |
+
edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
|
200 |
+
edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
|
201 |
+
edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
|
202 |
+
edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
|
203 |
+
|
204 |
+
edge = torch.cat([edge1, edge2, edge3], dim=1)
|
205 |
+
edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
|
206 |
+
edge = self.conv5(edge)
|
207 |
+
|
208 |
+
return edge, edge_fea
|
209 |
+
|
210 |
+
|
211 |
+
class Decoder_Module(nn.Module):
|
212 |
+
"""
|
213 |
+
Parsing Branch Decoder Module.
|
214 |
+
"""
|
215 |
+
|
216 |
+
def __init__(self, num_classes):
|
217 |
+
super(Decoder_Module, self).__init__()
|
218 |
+
self.conv1 = nn.Sequential(
|
219 |
+
nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
220 |
+
InPlaceABNSync(256)
|
221 |
+
)
|
222 |
+
self.conv2 = nn.Sequential(
|
223 |
+
nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
|
224 |
+
InPlaceABNSync(48)
|
225 |
+
)
|
226 |
+
self.conv3 = nn.Sequential(
|
227 |
+
nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
228 |
+
InPlaceABNSync(256),
|
229 |
+
nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
230 |
+
InPlaceABNSync(256)
|
231 |
+
)
|
232 |
+
|
233 |
+
self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
234 |
+
|
235 |
+
def forward(self, xt, xl):
|
236 |
+
_, _, h, w = xl.size()
|
237 |
+
xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
|
238 |
+
xl = self.conv2(xl)
|
239 |
+
x = torch.cat([xt, xl], dim=1)
|
240 |
+
x = self.conv3(x)
|
241 |
+
seg = self.conv4(x)
|
242 |
+
return seg, x
|
243 |
+
|
244 |
+
|
245 |
+
class ResNet(nn.Module):
|
246 |
+
def __init__(self, block, layers, num_classes):
|
247 |
+
self.inplanes = 128
|
248 |
+
super(ResNet, self).__init__()
|
249 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
250 |
+
self.bn1 = BatchNorm2d(64)
|
251 |
+
self.relu1 = nn.ReLU(inplace=False)
|
252 |
+
self.conv2 = conv3x3(64, 64)
|
253 |
+
self.bn2 = BatchNorm2d(64)
|
254 |
+
self.relu2 = nn.ReLU(inplace=False)
|
255 |
+
self.conv3 = conv3x3(64, 128)
|
256 |
+
self.bn3 = BatchNorm2d(128)
|
257 |
+
self.relu3 = nn.ReLU(inplace=False)
|
258 |
+
|
259 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
260 |
+
|
261 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
262 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
263 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
264 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
|
265 |
+
|
266 |
+
self.context_encoding = PSPModule(2048, 512)
|
267 |
+
|
268 |
+
self.edge = Edge_Module()
|
269 |
+
self.decoder = Decoder_Module(num_classes)
|
270 |
+
|
271 |
+
self.fushion = nn.Sequential(
|
272 |
+
nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
273 |
+
InPlaceABNSync(256),
|
274 |
+
nn.Dropout2d(0.1),
|
275 |
+
nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
276 |
+
)
|
277 |
+
|
278 |
+
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
|
279 |
+
downsample = None
|
280 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
281 |
+
downsample = nn.Sequential(
|
282 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
283 |
+
kernel_size=1, stride=stride, bias=False),
|
284 |
+
BatchNorm2d(planes * block.expansion, affine=affine_par))
|
285 |
+
|
286 |
+
layers = []
|
287 |
+
generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
|
288 |
+
layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
|
289 |
+
multi_grid=generate_multi_grid(0, multi_grid)))
|
290 |
+
self.inplanes = planes * block.expansion
|
291 |
+
for i in range(1, blocks):
|
292 |
+
layers.append(
|
293 |
+
block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
|
294 |
+
|
295 |
+
return nn.Sequential(*layers)
|
296 |
+
|
297 |
+
def forward(self, x):
|
298 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
299 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
300 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
301 |
+
x = self.maxpool(x)
|
302 |
+
x2 = self.layer1(x)
|
303 |
+
x3 = self.layer2(x2)
|
304 |
+
x4 = self.layer3(x3)
|
305 |
+
x5 = self.layer4(x4)
|
306 |
+
x = self.context_encoding(x5)
|
307 |
+
parsing_result, parsing_fea = self.decoder(x, x2)
|
308 |
+
# Edge Branch
|
309 |
+
edge_result, edge_fea = self.edge(x2, x3, x4)
|
310 |
+
# Fusion Branch
|
311 |
+
x = torch.cat([parsing_fea, edge_fea], dim=1)
|
312 |
+
fusion_result = self.fushion(x)
|
313 |
+
return [[parsing_result, fusion_result], [edge_result]]
|
314 |
+
|
315 |
+
|
316 |
+
def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
|
317 |
+
model.input_space = settings['input_space']
|
318 |
+
model.input_size = settings['input_size']
|
319 |
+
model.input_range = settings['input_range']
|
320 |
+
model.mean = settings['mean']
|
321 |
+
model.std = settings['std']
|
322 |
+
|
323 |
+
if pretrained is not None:
|
324 |
+
saved_state_dict = torch.load(pretrained)
|
325 |
+
new_params = model.state_dict().copy()
|
326 |
+
for i in saved_state_dict:
|
327 |
+
i_parts = i.split('.')
|
328 |
+
if not i_parts[0] == 'fc':
|
329 |
+
new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
|
330 |
+
model.load_state_dict(new_params)
|
331 |
+
|
332 |
+
|
333 |
+
def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
|
334 |
+
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
|
335 |
+
settings = pretrained_settings['resnet101']['imagenet']
|
336 |
+
initialize_pretrained_model(model, settings, pretrained)
|
337 |
+
return model
|
annotator/segm/networks/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import absolute_import
|
2 |
+
|
3 |
+
from ..networks.AugmentCE2P import resnet101
|
4 |
+
|
5 |
+
__factory = {
|
6 |
+
'resnet101': resnet101,
|
7 |
+
}
|
8 |
+
|
9 |
+
|
10 |
+
def init_model(name, *args, **kwargs):
|
11 |
+
if name not in __factory.keys():
|
12 |
+
raise KeyError("Unknown model arch: {}".format(name))
|
13 |
+
return __factory[name](*args, **kwargs)
|
annotator/segm/networks/backbone/mobilenetv2.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : mobilenetv2.py
|
8 |
+
@Time : 8/4/19 3:35 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import torch.nn as nn
|
15 |
+
import math
|
16 |
+
import functools
|
17 |
+
|
18 |
+
from modules import InPlaceABN, InPlaceABNSync
|
19 |
+
|
20 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
21 |
+
|
22 |
+
__all__ = ['mobilenetv2']
|
23 |
+
|
24 |
+
|
25 |
+
def conv_bn(inp, oup, stride):
|
26 |
+
return nn.Sequential(
|
27 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
28 |
+
BatchNorm2d(oup),
|
29 |
+
nn.ReLU6(inplace=True)
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
def conv_1x1_bn(inp, oup):
|
34 |
+
return nn.Sequential(
|
35 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
36 |
+
BatchNorm2d(oup),
|
37 |
+
nn.ReLU6(inplace=True)
|
38 |
+
)
|
39 |
+
|
40 |
+
|
41 |
+
class InvertedResidual(nn.Module):
|
42 |
+
def __init__(self, inp, oup, stride, expand_ratio):
|
43 |
+
super(InvertedResidual, self).__init__()
|
44 |
+
self.stride = stride
|
45 |
+
assert stride in [1, 2]
|
46 |
+
|
47 |
+
hidden_dim = round(inp * expand_ratio)
|
48 |
+
self.use_res_connect = self.stride == 1 and inp == oup
|
49 |
+
|
50 |
+
if expand_ratio == 1:
|
51 |
+
self.conv = nn.Sequential(
|
52 |
+
# dw
|
53 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
54 |
+
BatchNorm2d(hidden_dim),
|
55 |
+
nn.ReLU6(inplace=True),
|
56 |
+
# pw-linear
|
57 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
58 |
+
BatchNorm2d(oup),
|
59 |
+
)
|
60 |
+
else:
|
61 |
+
self.conv = nn.Sequential(
|
62 |
+
# pw
|
63 |
+
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
64 |
+
BatchNorm2d(hidden_dim),
|
65 |
+
nn.ReLU6(inplace=True),
|
66 |
+
# dw
|
67 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
68 |
+
BatchNorm2d(hidden_dim),
|
69 |
+
nn.ReLU6(inplace=True),
|
70 |
+
# pw-linear
|
71 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
72 |
+
BatchNorm2d(oup),
|
73 |
+
)
|
74 |
+
|
75 |
+
def forward(self, x):
|
76 |
+
if self.use_res_connect:
|
77 |
+
return x + self.conv(x)
|
78 |
+
else:
|
79 |
+
return self.conv(x)
|
80 |
+
|
81 |
+
|
82 |
+
class MobileNetV2(nn.Module):
|
83 |
+
def __init__(self, n_class=1000, input_size=224, width_mult=1.):
|
84 |
+
super(MobileNetV2, self).__init__()
|
85 |
+
block = InvertedResidual
|
86 |
+
input_channel = 32
|
87 |
+
last_channel = 1280
|
88 |
+
interverted_residual_setting = [
|
89 |
+
# t, c, n, s
|
90 |
+
[1, 16, 1, 1],
|
91 |
+
[6, 24, 2, 2], # layer 2
|
92 |
+
[6, 32, 3, 2], # layer 3
|
93 |
+
[6, 64, 4, 2],
|
94 |
+
[6, 96, 3, 1], # layer 4
|
95 |
+
[6, 160, 3, 2],
|
96 |
+
[6, 320, 1, 1], # layer 5
|
97 |
+
]
|
98 |
+
|
99 |
+
# building first layer
|
100 |
+
assert input_size % 32 == 0
|
101 |
+
input_channel = int(input_channel * width_mult)
|
102 |
+
self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
|
103 |
+
self.features = [conv_bn(3, input_channel, 2)]
|
104 |
+
# building inverted residual blocks
|
105 |
+
for t, c, n, s in interverted_residual_setting:
|
106 |
+
output_channel = int(c * width_mult)
|
107 |
+
for i in range(n):
|
108 |
+
if i == 0:
|
109 |
+
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
|
110 |
+
else:
|
111 |
+
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
|
112 |
+
input_channel = output_channel
|
113 |
+
# building last several layers
|
114 |
+
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
115 |
+
# make it nn.Sequential
|
116 |
+
self.features = nn.Sequential(*self.features)
|
117 |
+
|
118 |
+
# building classifier
|
119 |
+
self.classifier = nn.Sequential(
|
120 |
+
nn.Dropout(0.2),
|
121 |
+
nn.Linear(self.last_channel, n_class),
|
122 |
+
)
|
123 |
+
|
124 |
+
self._initialize_weights()
|
125 |
+
|
126 |
+
def forward(self, x):
|
127 |
+
x = self.features(x)
|
128 |
+
x = x.mean(3).mean(2)
|
129 |
+
x = self.classifier(x)
|
130 |
+
return x
|
131 |
+
|
132 |
+
def _initialize_weights(self):
|
133 |
+
for m in self.modules():
|
134 |
+
if isinstance(m, nn.Conv2d):
|
135 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
136 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
137 |
+
if m.bias is not None:
|
138 |
+
m.bias.data.zero_()
|
139 |
+
elif isinstance(m, BatchNorm2d):
|
140 |
+
m.weight.data.fill_(1)
|
141 |
+
m.bias.data.zero_()
|
142 |
+
elif isinstance(m, nn.Linear):
|
143 |
+
n = m.weight.size(1)
|
144 |
+
m.weight.data.normal_(0, 0.01)
|
145 |
+
m.bias.data.zero_()
|
146 |
+
|
147 |
+
|
148 |
+
def mobilenetv2(pretrained=False, **kwargs):
|
149 |
+
"""Constructs a MobileNet_V2 model.
|
150 |
+
Args:
|
151 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
152 |
+
"""
|
153 |
+
model = MobileNetV2(n_class=1000, **kwargs)
|
154 |
+
if pretrained:
|
155 |
+
model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
|
156 |
+
return model
|
annotator/segm/networks/backbone/resnet.py
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : resnet.py
|
8 |
+
@Time : 8/4/19 3:35 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import functools
|
15 |
+
import torch.nn as nn
|
16 |
+
import math
|
17 |
+
from torch.utils.model_zoo import load_url
|
18 |
+
|
19 |
+
from modules import InPlaceABNSync
|
20 |
+
|
21 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
22 |
+
|
23 |
+
__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon!
|
24 |
+
|
25 |
+
model_urls = {
|
26 |
+
'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
|
27 |
+
'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
|
28 |
+
'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
|
29 |
+
}
|
30 |
+
|
31 |
+
|
32 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
33 |
+
"3x3 convolution with padding"
|
34 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
35 |
+
padding=1, bias=False)
|
36 |
+
|
37 |
+
|
38 |
+
class BasicBlock(nn.Module):
|
39 |
+
expansion = 1
|
40 |
+
|
41 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
42 |
+
super(BasicBlock, self).__init__()
|
43 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
44 |
+
self.bn1 = BatchNorm2d(planes)
|
45 |
+
self.relu = nn.ReLU(inplace=True)
|
46 |
+
self.conv2 = conv3x3(planes, planes)
|
47 |
+
self.bn2 = BatchNorm2d(planes)
|
48 |
+
self.downsample = downsample
|
49 |
+
self.stride = stride
|
50 |
+
|
51 |
+
def forward(self, x):
|
52 |
+
residual = x
|
53 |
+
|
54 |
+
out = self.conv1(x)
|
55 |
+
out = self.bn1(out)
|
56 |
+
out = self.relu(out)
|
57 |
+
|
58 |
+
out = self.conv2(out)
|
59 |
+
out = self.bn2(out)
|
60 |
+
|
61 |
+
if self.downsample is not None:
|
62 |
+
residual = self.downsample(x)
|
63 |
+
|
64 |
+
out += residual
|
65 |
+
out = self.relu(out)
|
66 |
+
|
67 |
+
return out
|
68 |
+
|
69 |
+
|
70 |
+
class Bottleneck(nn.Module):
|
71 |
+
expansion = 4
|
72 |
+
|
73 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
74 |
+
super(Bottleneck, self).__init__()
|
75 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
76 |
+
self.bn1 = BatchNorm2d(planes)
|
77 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
78 |
+
padding=1, bias=False)
|
79 |
+
self.bn2 = BatchNorm2d(planes)
|
80 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
81 |
+
self.bn3 = BatchNorm2d(planes * 4)
|
82 |
+
self.relu = nn.ReLU(inplace=True)
|
83 |
+
self.downsample = downsample
|
84 |
+
self.stride = stride
|
85 |
+
|
86 |
+
def forward(self, x):
|
87 |
+
residual = x
|
88 |
+
|
89 |
+
out = self.conv1(x)
|
90 |
+
out = self.bn1(out)
|
91 |
+
out = self.relu(out)
|
92 |
+
|
93 |
+
out = self.conv2(out)
|
94 |
+
out = self.bn2(out)
|
95 |
+
out = self.relu(out)
|
96 |
+
|
97 |
+
out = self.conv3(out)
|
98 |
+
out = self.bn3(out)
|
99 |
+
|
100 |
+
if self.downsample is not None:
|
101 |
+
residual = self.downsample(x)
|
102 |
+
|
103 |
+
out += residual
|
104 |
+
out = self.relu(out)
|
105 |
+
|
106 |
+
return out
|
107 |
+
|
108 |
+
|
109 |
+
class ResNet(nn.Module):
|
110 |
+
|
111 |
+
def __init__(self, block, layers, num_classes=1000):
|
112 |
+
self.inplanes = 128
|
113 |
+
super(ResNet, self).__init__()
|
114 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
115 |
+
self.bn1 = BatchNorm2d(64)
|
116 |
+
self.relu1 = nn.ReLU(inplace=True)
|
117 |
+
self.conv2 = conv3x3(64, 64)
|
118 |
+
self.bn2 = BatchNorm2d(64)
|
119 |
+
self.relu2 = nn.ReLU(inplace=True)
|
120 |
+
self.conv3 = conv3x3(64, 128)
|
121 |
+
self.bn3 = BatchNorm2d(128)
|
122 |
+
self.relu3 = nn.ReLU(inplace=True)
|
123 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
124 |
+
|
125 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
126 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
127 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
128 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
129 |
+
self.avgpool = nn.AvgPool2d(7, stride=1)
|
130 |
+
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
131 |
+
|
132 |
+
for m in self.modules():
|
133 |
+
if isinstance(m, nn.Conv2d):
|
134 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
135 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
136 |
+
elif isinstance(m, BatchNorm2d):
|
137 |
+
m.weight.data.fill_(1)
|
138 |
+
m.bias.data.zero_()
|
139 |
+
|
140 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
141 |
+
downsample = None
|
142 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
143 |
+
downsample = nn.Sequential(
|
144 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
145 |
+
kernel_size=1, stride=stride, bias=False),
|
146 |
+
BatchNorm2d(planes * block.expansion),
|
147 |
+
)
|
148 |
+
|
149 |
+
layers = []
|
150 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
151 |
+
self.inplanes = planes * block.expansion
|
152 |
+
for i in range(1, blocks):
|
153 |
+
layers.append(block(self.inplanes, planes))
|
154 |
+
|
155 |
+
return nn.Sequential(*layers)
|
156 |
+
|
157 |
+
def forward(self, x):
|
158 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
159 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
160 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
161 |
+
x = self.maxpool(x)
|
162 |
+
|
163 |
+
x = self.layer1(x)
|
164 |
+
x = self.layer2(x)
|
165 |
+
x = self.layer3(x)
|
166 |
+
x = self.layer4(x)
|
167 |
+
|
168 |
+
x = self.avgpool(x)
|
169 |
+
x = x.view(x.size(0), -1)
|
170 |
+
x = self.fc(x)
|
171 |
+
|
172 |
+
return x
|
173 |
+
|
174 |
+
|
175 |
+
def resnet18(pretrained=False, **kwargs):
|
176 |
+
"""Constructs a ResNet-18 model.
|
177 |
+
Args:
|
178 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
179 |
+
"""
|
180 |
+
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
181 |
+
if pretrained:
|
182 |
+
model.load_state_dict(load_url(model_urls['resnet18']))
|
183 |
+
return model
|
184 |
+
|
185 |
+
|
186 |
+
def resnet50(pretrained=False, **kwargs):
|
187 |
+
"""Constructs a ResNet-50 model.
|
188 |
+
Args:
|
189 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
190 |
+
"""
|
191 |
+
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
192 |
+
if pretrained:
|
193 |
+
model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
|
194 |
+
return model
|
195 |
+
|
196 |
+
|
197 |
+
def resnet101(pretrained=False, **kwargs):
|
198 |
+
"""Constructs a ResNet-101 model.
|
199 |
+
Args:
|
200 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
201 |
+
"""
|
202 |
+
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
203 |
+
if pretrained:
|
204 |
+
model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
|
205 |
+
return model
|
annotator/segm/networks/backbone/resnext.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : resnext.py.py
|
8 |
+
@Time : 8/11/19 8:58 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
import functools
|
14 |
+
import torch.nn as nn
|
15 |
+
import math
|
16 |
+
from torch.utils.model_zoo import load_url
|
17 |
+
|
18 |
+
from modules import InPlaceABNSync
|
19 |
+
|
20 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
21 |
+
|
22 |
+
__all__ = ['ResNeXt', 'resnext101'] # support resnext 101
|
23 |
+
|
24 |
+
model_urls = {
|
25 |
+
'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
|
26 |
+
'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
31 |
+
"3x3 convolution with padding"
|
32 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
33 |
+
padding=1, bias=False)
|
34 |
+
|
35 |
+
|
36 |
+
class GroupBottleneck(nn.Module):
|
37 |
+
expansion = 2
|
38 |
+
|
39 |
+
def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
|
40 |
+
super(GroupBottleneck, self).__init__()
|
41 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
42 |
+
self.bn1 = BatchNorm2d(planes)
|
43 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
44 |
+
padding=1, groups=groups, bias=False)
|
45 |
+
self.bn2 = BatchNorm2d(planes)
|
46 |
+
self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
|
47 |
+
self.bn3 = BatchNorm2d(planes * 2)
|
48 |
+
self.relu = nn.ReLU(inplace=True)
|
49 |
+
self.downsample = downsample
|
50 |
+
self.stride = stride
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
residual = x
|
54 |
+
|
55 |
+
out = self.conv1(x)
|
56 |
+
out = self.bn1(out)
|
57 |
+
out = self.relu(out)
|
58 |
+
|
59 |
+
out = self.conv2(out)
|
60 |
+
out = self.bn2(out)
|
61 |
+
out = self.relu(out)
|
62 |
+
|
63 |
+
out = self.conv3(out)
|
64 |
+
out = self.bn3(out)
|
65 |
+
|
66 |
+
if self.downsample is not None:
|
67 |
+
residual = self.downsample(x)
|
68 |
+
|
69 |
+
out += residual
|
70 |
+
out = self.relu(out)
|
71 |
+
|
72 |
+
return out
|
73 |
+
|
74 |
+
|
75 |
+
class ResNeXt(nn.Module):
|
76 |
+
|
77 |
+
def __init__(self, block, layers, groups=32, num_classes=1000):
|
78 |
+
self.inplanes = 128
|
79 |
+
super(ResNeXt, self).__init__()
|
80 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
81 |
+
self.bn1 = BatchNorm2d(64)
|
82 |
+
self.relu1 = nn.ReLU(inplace=True)
|
83 |
+
self.conv2 = conv3x3(64, 64)
|
84 |
+
self.bn2 = BatchNorm2d(64)
|
85 |
+
self.relu2 = nn.ReLU(inplace=True)
|
86 |
+
self.conv3 = conv3x3(64, 128)
|
87 |
+
self.bn3 = BatchNorm2d(128)
|
88 |
+
self.relu3 = nn.ReLU(inplace=True)
|
89 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
90 |
+
|
91 |
+
self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
|
92 |
+
self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
|
93 |
+
self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
|
94 |
+
self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
|
95 |
+
self.avgpool = nn.AvgPool2d(7, stride=1)
|
96 |
+
self.fc = nn.Linear(1024 * block.expansion, num_classes)
|
97 |
+
|
98 |
+
for m in self.modules():
|
99 |
+
if isinstance(m, nn.Conv2d):
|
100 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
|
101 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
102 |
+
elif isinstance(m, BatchNorm2d):
|
103 |
+
m.weight.data.fill_(1)
|
104 |
+
m.bias.data.zero_()
|
105 |
+
|
106 |
+
def _make_layer(self, block, planes, blocks, stride=1, groups=1):
|
107 |
+
downsample = None
|
108 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
109 |
+
downsample = nn.Sequential(
|
110 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
111 |
+
kernel_size=1, stride=stride, bias=False),
|
112 |
+
BatchNorm2d(planes * block.expansion),
|
113 |
+
)
|
114 |
+
|
115 |
+
layers = []
|
116 |
+
layers.append(block(self.inplanes, planes, stride, groups, downsample))
|
117 |
+
self.inplanes = planes * block.expansion
|
118 |
+
for i in range(1, blocks):
|
119 |
+
layers.append(block(self.inplanes, planes, groups=groups))
|
120 |
+
|
121 |
+
return nn.Sequential(*layers)
|
122 |
+
|
123 |
+
def forward(self, x):
|
124 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
125 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
126 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
127 |
+
x = self.maxpool(x)
|
128 |
+
|
129 |
+
x = self.layer1(x)
|
130 |
+
x = self.layer2(x)
|
131 |
+
x = self.layer3(x)
|
132 |
+
x = self.layer4(x)
|
133 |
+
|
134 |
+
x = self.avgpool(x)
|
135 |
+
x = x.view(x.size(0), -1)
|
136 |
+
x = self.fc(x)
|
137 |
+
|
138 |
+
return x
|
139 |
+
|
140 |
+
|
141 |
+
def resnext101(pretrained=False, **kwargs):
|
142 |
+
"""Constructs a ResNet-101 model.
|
143 |
+
Args:
|
144 |
+
pretrained (bool): If True, returns a model pre-trained on Places
|
145 |
+
"""
|
146 |
+
model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
|
147 |
+
if pretrained:
|
148 |
+
model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
|
149 |
+
return model
|
annotator/segm/networks/context_encoding/aspp.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : aspp.py
|
8 |
+
@Time : 8/4/19 3:36 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import torch
|
15 |
+
import torch.nn as nn
|
16 |
+
from torch.nn import functional as F
|
17 |
+
|
18 |
+
from modules import InPlaceABNSync
|
19 |
+
|
20 |
+
|
21 |
+
class ASPPModule(nn.Module):
|
22 |
+
"""
|
23 |
+
Reference:
|
24 |
+
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
25 |
+
"""
|
26 |
+
def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
|
27 |
+
super(ASPPModule, self).__init__()
|
28 |
+
|
29 |
+
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
30 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
31 |
+
bias=False),
|
32 |
+
InPlaceABNSync(inner_features))
|
33 |
+
self.conv2 = nn.Sequential(
|
34 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
35 |
+
InPlaceABNSync(inner_features))
|
36 |
+
self.conv3 = nn.Sequential(
|
37 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
38 |
+
InPlaceABNSync(inner_features))
|
39 |
+
self.conv4 = nn.Sequential(
|
40 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
41 |
+
InPlaceABNSync(inner_features))
|
42 |
+
self.conv5 = nn.Sequential(
|
43 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
44 |
+
InPlaceABNSync(inner_features))
|
45 |
+
|
46 |
+
self.bottleneck = nn.Sequential(
|
47 |
+
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
48 |
+
InPlaceABNSync(out_features),
|
49 |
+
nn.Dropout2d(0.1)
|
50 |
+
)
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
_, _, h, w = x.size()
|
54 |
+
|
55 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
56 |
+
|
57 |
+
feat2 = self.conv2(x)
|
58 |
+
feat3 = self.conv3(x)
|
59 |
+
feat4 = self.conv4(x)
|
60 |
+
feat5 = self.conv5(x)
|
61 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
62 |
+
|
63 |
+
bottle = self.bottleneck(out)
|
64 |
+
return bottle
|
annotator/segm/networks/context_encoding/ocnet.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : ocnet.py
|
8 |
+
@Time : 8/4/19 3:36 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import functools
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import torch.nn as nn
|
18 |
+
from torch.autograd import Variable
|
19 |
+
from torch.nn import functional as F
|
20 |
+
|
21 |
+
from modules import InPlaceABNSync
|
22 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
23 |
+
|
24 |
+
|
25 |
+
class _SelfAttentionBlock(nn.Module):
|
26 |
+
'''
|
27 |
+
The basic implementation for self-attention block/non-local block
|
28 |
+
Input:
|
29 |
+
N X C X H X W
|
30 |
+
Parameters:
|
31 |
+
in_channels : the dimension of the input feature map
|
32 |
+
key_channels : the dimension after the key/query transform
|
33 |
+
value_channels : the dimension after the value transform
|
34 |
+
scale : choose the scale to downsample the input feature maps (save memory cost)
|
35 |
+
Return:
|
36 |
+
N X C X H X W
|
37 |
+
position-aware context features.(w/o concate or add with the input)
|
38 |
+
'''
|
39 |
+
|
40 |
+
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
41 |
+
super(_SelfAttentionBlock, self).__init__()
|
42 |
+
self.scale = scale
|
43 |
+
self.in_channels = in_channels
|
44 |
+
self.out_channels = out_channels
|
45 |
+
self.key_channels = key_channels
|
46 |
+
self.value_channels = value_channels
|
47 |
+
if out_channels == None:
|
48 |
+
self.out_channels = in_channels
|
49 |
+
self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
|
50 |
+
self.f_key = nn.Sequential(
|
51 |
+
nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
|
52 |
+
kernel_size=1, stride=1, padding=0),
|
53 |
+
InPlaceABNSync(self.key_channels),
|
54 |
+
)
|
55 |
+
self.f_query = self.f_key
|
56 |
+
self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
|
57 |
+
kernel_size=1, stride=1, padding=0)
|
58 |
+
self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
|
59 |
+
kernel_size=1, stride=1, padding=0)
|
60 |
+
nn.init.constant(self.W.weight, 0)
|
61 |
+
nn.init.constant(self.W.bias, 0)
|
62 |
+
|
63 |
+
def forward(self, x):
|
64 |
+
batch_size, h, w = x.size(0), x.size(2), x.size(3)
|
65 |
+
if self.scale > 1:
|
66 |
+
x = self.pool(x)
|
67 |
+
|
68 |
+
value = self.f_value(x).view(batch_size, self.value_channels, -1)
|
69 |
+
value = value.permute(0, 2, 1)
|
70 |
+
query = self.f_query(x).view(batch_size, self.key_channels, -1)
|
71 |
+
query = query.permute(0, 2, 1)
|
72 |
+
key = self.f_key(x).view(batch_size, self.key_channels, -1)
|
73 |
+
|
74 |
+
sim_map = torch.matmul(query, key)
|
75 |
+
sim_map = (self.key_channels ** -.5) * sim_map
|
76 |
+
sim_map = F.softmax(sim_map, dim=-1)
|
77 |
+
|
78 |
+
context = torch.matmul(sim_map, value)
|
79 |
+
context = context.permute(0, 2, 1).contiguous()
|
80 |
+
context = context.view(batch_size, self.value_channels, *x.size()[2:])
|
81 |
+
context = self.W(context)
|
82 |
+
if self.scale > 1:
|
83 |
+
context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
|
84 |
+
return context
|
85 |
+
|
86 |
+
|
87 |
+
class SelfAttentionBlock2D(_SelfAttentionBlock):
|
88 |
+
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
89 |
+
super(SelfAttentionBlock2D, self).__init__(in_channels,
|
90 |
+
key_channels,
|
91 |
+
value_channels,
|
92 |
+
out_channels,
|
93 |
+
scale)
|
94 |
+
|
95 |
+
|
96 |
+
class BaseOC_Module(nn.Module):
|
97 |
+
"""
|
98 |
+
Implementation of the BaseOC module
|
99 |
+
Parameters:
|
100 |
+
in_features / out_features: the channels of the input / output feature maps.
|
101 |
+
dropout: we choose 0.05 as the default value.
|
102 |
+
size: you can apply multiple sizes. Here we only use one size.
|
103 |
+
Return:
|
104 |
+
features fused with Object context information.
|
105 |
+
"""
|
106 |
+
|
107 |
+
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
108 |
+
super(BaseOC_Module, self).__init__()
|
109 |
+
self.stages = []
|
110 |
+
self.stages = nn.ModuleList(
|
111 |
+
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
112 |
+
self.conv_bn_dropout = nn.Sequential(
|
113 |
+
nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
|
114 |
+
InPlaceABNSync(out_channels),
|
115 |
+
nn.Dropout2d(dropout)
|
116 |
+
)
|
117 |
+
|
118 |
+
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
119 |
+
return SelfAttentionBlock2D(in_channels,
|
120 |
+
key_channels,
|
121 |
+
value_channels,
|
122 |
+
output_channels,
|
123 |
+
size)
|
124 |
+
|
125 |
+
def forward(self, feats):
|
126 |
+
priors = [stage(feats) for stage in self.stages]
|
127 |
+
context = priors[0]
|
128 |
+
for i in range(1, len(priors)):
|
129 |
+
context += priors[i]
|
130 |
+
output = self.conv_bn_dropout(torch.cat([context, feats], 1))
|
131 |
+
return output
|
132 |
+
|
133 |
+
|
134 |
+
class BaseOC_Context_Module(nn.Module):
|
135 |
+
"""
|
136 |
+
Output only the context features.
|
137 |
+
Parameters:
|
138 |
+
in_features / out_features: the channels of the input / output feature maps.
|
139 |
+
dropout: specify the dropout ratio
|
140 |
+
fusion: We provide two different fusion method, "concat" or "add"
|
141 |
+
size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
|
142 |
+
Return:
|
143 |
+
features after "concat" or "add"
|
144 |
+
"""
|
145 |
+
|
146 |
+
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
147 |
+
super(BaseOC_Context_Module, self).__init__()
|
148 |
+
self.stages = []
|
149 |
+
self.stages = nn.ModuleList(
|
150 |
+
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
151 |
+
self.conv_bn_dropout = nn.Sequential(
|
152 |
+
nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
|
153 |
+
InPlaceABNSync(out_channels),
|
154 |
+
)
|
155 |
+
|
156 |
+
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
157 |
+
return SelfAttentionBlock2D(in_channels,
|
158 |
+
key_channels,
|
159 |
+
value_channels,
|
160 |
+
output_channels,
|
161 |
+
size)
|
162 |
+
|
163 |
+
def forward(self, feats):
|
164 |
+
priors = [stage(feats) for stage in self.stages]
|
165 |
+
context = priors[0]
|
166 |
+
for i in range(1, len(priors)):
|
167 |
+
context += priors[i]
|
168 |
+
output = self.conv_bn_dropout(context)
|
169 |
+
return output
|
170 |
+
|
171 |
+
|
172 |
+
class ASP_OC_Module(nn.Module):
|
173 |
+
def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
|
174 |
+
super(ASP_OC_Module, self).__init__()
|
175 |
+
self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
|
176 |
+
InPlaceABNSync(out_features),
|
177 |
+
BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
|
178 |
+
key_channels=out_features // 2, value_channels=out_features,
|
179 |
+
dropout=0, sizes=([2])))
|
180 |
+
self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
181 |
+
InPlaceABNSync(out_features))
|
182 |
+
self.conv3 = nn.Sequential(
|
183 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
184 |
+
InPlaceABNSync(out_features))
|
185 |
+
self.conv4 = nn.Sequential(
|
186 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
187 |
+
InPlaceABNSync(out_features))
|
188 |
+
self.conv5 = nn.Sequential(
|
189 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
190 |
+
InPlaceABNSync(out_features))
|
191 |
+
|
192 |
+
self.conv_bn_dropout = nn.Sequential(
|
193 |
+
nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
194 |
+
InPlaceABNSync(out_features),
|
195 |
+
nn.Dropout2d(0.1)
|
196 |
+
)
|
197 |
+
|
198 |
+
def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
|
199 |
+
assert (len(feat1) == len(feat2))
|
200 |
+
z = []
|
201 |
+
for i in range(len(feat1)):
|
202 |
+
z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
|
203 |
+
return z
|
204 |
+
|
205 |
+
def forward(self, x):
|
206 |
+
if isinstance(x, Variable):
|
207 |
+
_, _, h, w = x.size()
|
208 |
+
elif isinstance(x, tuple) or isinstance(x, list):
|
209 |
+
_, _, h, w = x[0].size()
|
210 |
+
else:
|
211 |
+
raise RuntimeError('unknown input type')
|
212 |
+
|
213 |
+
feat1 = self.context(x)
|
214 |
+
feat2 = self.conv2(x)
|
215 |
+
feat3 = self.conv3(x)
|
216 |
+
feat4 = self.conv4(x)
|
217 |
+
feat5 = self.conv5(x)
|
218 |
+
|
219 |
+
if isinstance(x, Variable):
|
220 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
221 |
+
elif isinstance(x, tuple) or isinstance(x, list):
|
222 |
+
out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
|
223 |
+
else:
|
224 |
+
raise RuntimeError('unknown input type')
|
225 |
+
output = self.conv_bn_dropout(out)
|
226 |
+
return output
|
annotator/segm/networks/context_encoding/psp.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
@Author : Peike Li
|
6 |
+
@Contact : peike.li@yahoo.com
|
7 |
+
@File : psp.py
|
8 |
+
@Time : 8/4/19 3:36 PM
|
9 |
+
@Desc :
|
10 |
+
@License : This source code is licensed under the license found in the
|
11 |
+
LICENSE file in the root directory of this source tree.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import torch
|
15 |
+
import torch.nn as nn
|
16 |
+
from torch.nn import functional as F
|
17 |
+
|
18 |
+
from modules import InPlaceABNSync
|
19 |
+
|
20 |
+
|
21 |
+
class PSPModule(nn.Module):
|
22 |
+
"""
|
23 |
+
Reference:
|
24 |
+
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
25 |
+
"""
|
26 |
+
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
27 |
+
super(PSPModule, self).__init__()
|
28 |
+
|
29 |
+
self.stages = []
|
30 |
+
self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
31 |
+
self.bottleneck = nn.Sequential(
|
32 |
+
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
33 |
+
bias=False),
|
34 |
+
InPlaceABNSync(out_features),
|
35 |
+
)
|
36 |
+
|
37 |
+
def _make_stage(self, features, out_features, size):
|
38 |
+
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
39 |
+
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
40 |
+
bn = InPlaceABNSync(out_features)
|
41 |
+
return nn.Sequential(prior, conv, bn)
|
42 |
+
|
43 |
+
def forward(self, feats):
|
44 |
+
h, w = feats.size(2), feats.size(3)
|
45 |
+
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
46 |
+
self.stages] + [feats]
|
47 |
+
bottle = self.bottleneck(torch.cat(priors, 1))
|
48 |
+
return bottle
|
annotator/segm/transforms.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ------------------------------------------------------------------------------
|
2 |
+
# Copyright (c) Microsoft
|
3 |
+
# Licensed under the MIT License.
|
4 |
+
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
|
5 |
+
# ------------------------------------------------------------------------------
|
6 |
+
|
7 |
+
from __future__ import absolute_import
|
8 |
+
from __future__ import division
|
9 |
+
from __future__ import print_function
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
import cv2
|
13 |
+
import torch
|
14 |
+
|
15 |
+
class BRG2Tensor_transform(object):
|
16 |
+
def __call__(self, pic):
|
17 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
18 |
+
if isinstance(img, torch.ByteTensor):
|
19 |
+
return img.float()
|
20 |
+
else:
|
21 |
+
return img
|
22 |
+
|
23 |
+
class BGR2RGB_transform(object):
|
24 |
+
def __call__(self, tensor):
|
25 |
+
return tensor[[2,1,0],:,:]
|
26 |
+
|
27 |
+
def flip_back(output_flipped, matched_parts):
|
28 |
+
'''
|
29 |
+
ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
|
30 |
+
'''
|
31 |
+
assert output_flipped.ndim == 4,\
|
32 |
+
'output_flipped should be [batch_size, num_joints, height, width]'
|
33 |
+
|
34 |
+
output_flipped = output_flipped[:, :, :, ::-1]
|
35 |
+
|
36 |
+
for pair in matched_parts:
|
37 |
+
tmp = output_flipped[:, pair[0], :, :].copy()
|
38 |
+
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
39 |
+
output_flipped[:, pair[1], :, :] = tmp
|
40 |
+
|
41 |
+
return output_flipped
|
42 |
+
|
43 |
+
|
44 |
+
def fliplr_joints(joints, joints_vis, width, matched_parts):
|
45 |
+
"""
|
46 |
+
flip coords
|
47 |
+
"""
|
48 |
+
# Flip horizontal
|
49 |
+
joints[:, 0] = width - joints[:, 0] - 1
|
50 |
+
|
51 |
+
# Change left-right parts
|
52 |
+
for pair in matched_parts:
|
53 |
+
joints[pair[0], :], joints[pair[1], :] = \
|
54 |
+
joints[pair[1], :], joints[pair[0], :].copy()
|
55 |
+
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
|
56 |
+
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
|
57 |
+
|
58 |
+
return joints*joints_vis, joints_vis
|
59 |
+
|
60 |
+
|
61 |
+
def transform_preds(coords, center, scale, input_size):
|
62 |
+
target_coords = np.zeros(coords.shape)
|
63 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
64 |
+
for p in range(coords.shape[0]):
|
65 |
+
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
|
66 |
+
return target_coords
|
67 |
+
|
68 |
+
def transform_parsing(pred, center, scale, width, height, input_size):
|
69 |
+
|
70 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
71 |
+
target_pred = cv2.warpAffine(
|
72 |
+
pred,
|
73 |
+
trans,
|
74 |
+
(int(width), int(height)), #(int(width), int(height)),
|
75 |
+
flags=cv2.INTER_NEAREST,
|
76 |
+
borderMode=cv2.BORDER_CONSTANT,
|
77 |
+
borderValue=(0))
|
78 |
+
|
79 |
+
return target_pred
|
80 |
+
|
81 |
+
def transform_logits(logits, center, scale, width, height, input_size):
|
82 |
+
|
83 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
84 |
+
channel = logits.shape[2]
|
85 |
+
target_logits = []
|
86 |
+
for i in range(channel):
|
87 |
+
target_logit = cv2.warpAffine(
|
88 |
+
logits[:,:,i],
|
89 |
+
trans,
|
90 |
+
(int(width), int(height)), #(int(width), int(height)),
|
91 |
+
flags=cv2.INTER_LINEAR,
|
92 |
+
borderMode=cv2.BORDER_CONSTANT,
|
93 |
+
borderValue=(0))
|
94 |
+
target_logits.append(target_logit)
|
95 |
+
target_logits = np.stack(target_logits,axis=2)
|
96 |
+
|
97 |
+
return target_logits
|
98 |
+
|
99 |
+
|
100 |
+
def get_affine_transform(center,
|
101 |
+
scale,
|
102 |
+
rot,
|
103 |
+
output_size,
|
104 |
+
shift=np.array([0, 0], dtype=np.float32),
|
105 |
+
inv=0):
|
106 |
+
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
|
107 |
+
print(scale)
|
108 |
+
scale = np.array([scale, scale])
|
109 |
+
|
110 |
+
scale_tmp = scale
|
111 |
+
|
112 |
+
src_w = scale_tmp[0]
|
113 |
+
dst_w = output_size[1]
|
114 |
+
dst_h = output_size[0]
|
115 |
+
|
116 |
+
rot_rad = np.pi * rot / 180
|
117 |
+
src_dir = get_dir([0, src_w * -0.5], rot_rad)
|
118 |
+
dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
|
119 |
+
|
120 |
+
src = np.zeros((3, 2), dtype=np.float32)
|
121 |
+
dst = np.zeros((3, 2), dtype=np.float32)
|
122 |
+
src[0, :] = center + scale_tmp * shift
|
123 |
+
src[1, :] = center + src_dir + scale_tmp * shift
|
124 |
+
dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
|
125 |
+
dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
|
126 |
+
|
127 |
+
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
128 |
+
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
129 |
+
|
130 |
+
if inv:
|
131 |
+
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
132 |
+
else:
|
133 |
+
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
134 |
+
|
135 |
+
return trans
|
136 |
+
|
137 |
+
|
138 |
+
def affine_transform(pt, t):
|
139 |
+
new_pt = np.array([pt[0], pt[1], 1.]).T
|
140 |
+
new_pt = np.dot(t, new_pt)
|
141 |
+
return new_pt[:2]
|
142 |
+
|
143 |
+
|
144 |
+
def get_3rd_point(a, b):
|
145 |
+
direct = a - b
|
146 |
+
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def get_dir(src_point, rot_rad):
|
150 |
+
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
151 |
+
|
152 |
+
src_result = [0, 0]
|
153 |
+
src_result[0] = src_point[0] * cs - src_point[1] * sn
|
154 |
+
src_result[1] = src_point[0] * sn + src_point[1] * cs
|
155 |
+
|
156 |
+
return src_result
|
157 |
+
|
158 |
+
|
159 |
+
def crop(img, center, scale, output_size, rot=0):
|
160 |
+
trans = get_affine_transform(center, scale, rot, output_size)
|
161 |
+
|
162 |
+
dst_img = cv2.warpAffine(img,
|
163 |
+
trans,
|
164 |
+
(int(output_size[1]), int(output_size[0])),
|
165 |
+
flags=cv2.INTER_LINEAR)
|
166 |
+
|
167 |
+
return dst_img
|
annotator/util.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')
|
7 |
+
|
8 |
+
|
9 |
+
def HWC3(x):
|
10 |
+
assert x.dtype == np.uint8
|
11 |
+
if x.ndim == 2:
|
12 |
+
x = x[:, :, None]
|
13 |
+
assert x.ndim == 3
|
14 |
+
H, W, C = x.shape
|
15 |
+
assert C == 1 or C == 3 or C == 4
|
16 |
+
if C == 3:
|
17 |
+
return x
|
18 |
+
if C == 1:
|
19 |
+
return np.concatenate([x, x, x], axis=2)
|
20 |
+
if C == 4:
|
21 |
+
color = x[:, :, 0:3].astype(np.float32)
|
22 |
+
alpha = x[:, :, 3:4].astype(np.float32) / 255.0
|
23 |
+
y = color * alpha + 255.0 * (1.0 - alpha)
|
24 |
+
y = y.clip(0, 255).astype(np.uint8)
|
25 |
+
return y
|
26 |
+
|
27 |
+
|
28 |
+
def resize_image(input_image, resolution):
|
29 |
+
H, W, C = input_image.shape
|
30 |
+
H = float(H)
|
31 |
+
W = float(W)
|
32 |
+
k = float(resolution) / min(H, W)
|
33 |
+
H *= k
|
34 |
+
W *= k
|
35 |
+
H = int(np.round(H / 64.0)) * 64
|
36 |
+
W = int(np.round(W / 64.0)) * 64
|
37 |
+
img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
|
38 |
+
return img
|
39 |
+
|
40 |
+
def pad_image(img, min_aspect_ratio=0.625):
|
41 |
+
H, W, C = img.shape
|
42 |
+
if W/H < min_aspect_ratio:
|
43 |
+
NEW_W = int(min_aspect_ratio * H)
|
44 |
+
width_padding = (NEW_W-W)//2
|
45 |
+
black_bg = np.zeros((H, NEW_W, 3), dtype=img.dtype)
|
46 |
+
black_bg[:, width_padding:width_padding+W,:] = img
|
47 |
+
return black_bg
|
48 |
+
else:
|
49 |
+
return img
|
app.py
ADDED
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from share import *
|
2 |
+
import config
|
3 |
+
import os
|
4 |
+
import cv2
|
5 |
+
import einops
|
6 |
+
import gradio as gr
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
import random
|
10 |
+
import re
|
11 |
+
from datetime import datetime
|
12 |
+
from glob import glob
|
13 |
+
import argparse
|
14 |
+
|
15 |
+
from pytorch_lightning import seed_everything
|
16 |
+
from torchvision.transforms import ToPILImage
|
17 |
+
from annotator.util import pad_image, resize_image, HWC3
|
18 |
+
from annotator.openpose import OpenposeDetector
|
19 |
+
from cldm.model import create_model, load_state_dict
|
20 |
+
from cldm.ddim_hacked import DDIMSampler
|
21 |
+
from pathlib import Path
|
22 |
+
from PIL import Image
|
23 |
+
from omegaconf import OmegaConf
|
24 |
+
from ldm.util import instantiate_from_config, log_txt_as_img
|
25 |
+
from visconet.segm import ATRSegmentCropper as SegmentCropper
|
26 |
+
from huggingface_hub import snapshot_download
|
27 |
+
|
28 |
+
# supply directory of visual prompt images
|
29 |
+
HF_REPO = 'soonyau/visconet'
|
30 |
+
GALLERY_PATH = Path('./fashion/')
|
31 |
+
WOMEN_GALLERY_PATH = GALLERY_PATH/'WOMEN'
|
32 |
+
MEN_GALLERY_PATH = GALLERY_PATH/'MEN'
|
33 |
+
|
34 |
+
DEMO = True
|
35 |
+
LOG_SAMPLES = False
|
36 |
+
APP_FILES_PATH = Path('./app_files')
|
37 |
+
VISCON_IMAGE_PATH = APP_FILES_PATH/'default_images'
|
38 |
+
LOG_PATH = APP_FILES_PATH/'logs'
|
39 |
+
SAMPLE_IMAGE_PATH = APP_FILES_PATH/'samples'
|
40 |
+
|
41 |
+
DEFAULT_CONTROL_SCALE = 1.0
|
42 |
+
SCALE_CONFIG = {
|
43 |
+
'Default': [DEFAULT_CONTROL_SCALE]*13,
|
44 |
+
'DeepFakes':[1.0, 1.0, 1.0,
|
45 |
+
1.0, 1.0, 1.0,
|
46 |
+
0.5, 0.5, 0.5,
|
47 |
+
0.0, 0.0, 0.0, 0.0,],
|
48 |
+
'Faithful':[1,1,1,
|
49 |
+
1,1,1,
|
50 |
+
1,1,0.5,
|
51 |
+
0.5,0.5,0,0],
|
52 |
+
'Painting':[0.0,0.0,0.0,
|
53 |
+
0.5,0.5,0.5,
|
54 |
+
0.5,0.5,0.5,
|
55 |
+
0.5,0,0,0],
|
56 |
+
'Pose': [0.0,0.0,0.0,
|
57 |
+
0.0,0.0,0.0,
|
58 |
+
0.0,0.0,0.5,
|
59 |
+
0.0,0.0,0,0],
|
60 |
+
'Texture Transfer': [1.0,1.0,1.0,
|
61 |
+
1.0,1.0,1.0,
|
62 |
+
0.5,0.0,0.5,
|
63 |
+
0.0,0.0,0,0]
|
64 |
+
}
|
65 |
+
DEFAULT_SCALE_CONFIG = 'Default'
|
66 |
+
ignore_style_list = ['headwear', 'accesories', 'shoes']
|
67 |
+
|
68 |
+
global device
|
69 |
+
global segmentor
|
70 |
+
global apply_openpose
|
71 |
+
global style_encoder
|
72 |
+
global model
|
73 |
+
global ddim_sampler
|
74 |
+
|
75 |
+
def convert_fname(long_name):
|
76 |
+
gender = 'MEN' if long_name[7:10] == 'MEN' else 'WOMEN'
|
77 |
+
|
78 |
+
input_list = long_name.replace('fashion','').split('___')
|
79 |
+
|
80 |
+
# Define a regular expression pattern to match the relevant parts of each input string
|
81 |
+
if gender == 'MEN':
|
82 |
+
pattern = r'MEN(\w+)id(\d+)_(\d)(\w+)'
|
83 |
+
else:
|
84 |
+
pattern = r'WOMEN(\w+)id(\d+)_(\d)(\w+)'
|
85 |
+
# Use a list comprehension to extract the matching substrings from each input string, and format them into the desired output format
|
86 |
+
output_list = [f'{gender}/{category}/id_{id_num[:8]}/{id_num[8:]}_{view_num}_{view_desc}' for (category, id_num, view_num, view_desc) in re.findall(pattern, ' '.join(input_list))]
|
87 |
+
|
88 |
+
# Print the resulting list of formatted strings
|
89 |
+
return [f +'.jpg' for f in output_list]
|
90 |
+
|
91 |
+
def fetch_deepfashion(deepfashion_names):
|
92 |
+
src_name, dst_name = convert_fname(deepfashion_names)
|
93 |
+
input_image = np.array(Image.open(image_root/src_name))
|
94 |
+
pose_image = np.array(Image.open(str(pose_root/dst_name)))
|
95 |
+
mask_image = Image.open(str(mask_root/dst_name).replace('.jpg','_mask.png'))
|
96 |
+
|
97 |
+
temp = src_name.replace('.jpg','').split('/')
|
98 |
+
lastfolder = temp.pop(-1).replace('_','/', 1)
|
99 |
+
style_folder = style_root/('/'.join(temp+[lastfolder]))
|
100 |
+
viscon_images = []
|
101 |
+
for style_name in style_names:
|
102 |
+
f_path = style_folder/f'{style_name}.jpg'
|
103 |
+
if os.path.exists(str(f_path)):
|
104 |
+
viscon_images.append(np.array(Image.open(f_path)))
|
105 |
+
else:
|
106 |
+
viscon_images.append(None)
|
107 |
+
return [input_image, pose_image, mask_image, *viscon_images]
|
108 |
+
|
109 |
+
def select_gallery_image(evt: gr.SelectData):
|
110 |
+
return evt.target.value[evt.index]['name']
|
111 |
+
|
112 |
+
def select_default_strength(strength_config):
|
113 |
+
return SCALE_CONFIG[strength_config]
|
114 |
+
|
115 |
+
def change_all_scales(scale):
|
116 |
+
return [float(scale)]*13
|
117 |
+
|
118 |
+
def encode_style_images(style_images):
|
119 |
+
style_embeddings = []
|
120 |
+
|
121 |
+
for style_name, style_image in zip(style_names, style_images):
|
122 |
+
if style_image == None:
|
123 |
+
style_image = Image.fromarray(np.zeros((224, 224, 3), dtype=np.uint8))
|
124 |
+
|
125 |
+
#style_image = style_image.resize((224,224))
|
126 |
+
style_image = style_encoder.preprocess(style_image).to(device)
|
127 |
+
style_emb = style_encoder.postprocess(style_encoder(style_image)[0])
|
128 |
+
style_embeddings.append(style_emb)
|
129 |
+
|
130 |
+
styles = torch.tensor(np.array(style_embeddings)).squeeze(-2).unsqueeze(0).float().to(device)
|
131 |
+
return styles
|
132 |
+
|
133 |
+
def save_viscon_images(*viscon_images):
|
134 |
+
ret_images = []
|
135 |
+
for image, name in zip(viscon_images, style_names):
|
136 |
+
fname = str(VISCON_IMAGE_PATH/name)+'.jpg'
|
137 |
+
if image:
|
138 |
+
image = image.resize((224,224))
|
139 |
+
if os.path.exists(fname):
|
140 |
+
os.remove(fname)
|
141 |
+
image.save(fname)
|
142 |
+
ret_images.append(image)
|
143 |
+
return ret_images
|
144 |
+
|
145 |
+
|
146 |
+
def extract_pose_mask(input_image, detect_resolution,
|
147 |
+
ignore_head=True, ignore_hair=False):
|
148 |
+
# skeleton
|
149 |
+
input_image = pad_image(input_image, min_aspect_ratio=0.625)
|
150 |
+
detected_map, _ = apply_openpose(resize_image(input_image, detect_resolution), hand=True)
|
151 |
+
detected_map = HWC3(detected_map)
|
152 |
+
|
153 |
+
# human mask
|
154 |
+
cropped = segmentor(input_image, ignore_head=ignore_head, ignore_hair=ignore_hair)
|
155 |
+
mask = cropped['human_mask']
|
156 |
+
mask = Image.fromarray(np.array(mask*255, dtype=np.uint8), mode='L')
|
157 |
+
|
158 |
+
return [detected_map, mask]
|
159 |
+
|
160 |
+
def extract_fashion(input_image):
|
161 |
+
|
162 |
+
# style images
|
163 |
+
cropped = segmentor(input_image)
|
164 |
+
cropped_images = []
|
165 |
+
for style_name in style_names:
|
166 |
+
if style_name in cropped and style_name not in ignore_style_list:
|
167 |
+
cropped_images.append(cropped[style_name])
|
168 |
+
else:
|
169 |
+
cropped_images.append(None)
|
170 |
+
|
171 |
+
return [*cropped_images]
|
172 |
+
|
173 |
+
def get_image_files(image_path, ret_image=True, exts=['.jpg','.jpeg','.png']):
|
174 |
+
images = []
|
175 |
+
for ext in exts:
|
176 |
+
images += [x for x in glob(str(Path(image_path)/f'*{ext}'))]
|
177 |
+
if ret_image:
|
178 |
+
images = [Image.open(x) for x in images]
|
179 |
+
return images
|
180 |
+
|
181 |
+
def log_sample(seed, results, prompt, skeleton_image, mask_image, control_scales, *viscon_images):
|
182 |
+
time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
183 |
+
|
184 |
+
log_dir = LOG_PATH/time_str
|
185 |
+
os.makedirs(str(log_dir), exist_ok=True)
|
186 |
+
|
187 |
+
# save result
|
188 |
+
concat = np.hstack((skeleton_image, *results))
|
189 |
+
Image.fromarray(skeleton_image).save(str(log_dir/'skeleton.jpg'))
|
190 |
+
Image.fromarray(mask_image).save(str(log_dir/'mask.png'))
|
191 |
+
for i, result in enumerate(results):
|
192 |
+
Image.fromarray(result).save(str(log_dir/f'result_{i}.jpg'))
|
193 |
+
|
194 |
+
# save text
|
195 |
+
with open(str(log_dir/'info.txt'),'w') as f:
|
196 |
+
f.write(f'prompt: {prompt} \n')
|
197 |
+
f.write(f'seed: {seed}\n')
|
198 |
+
control_str = [str(x) for x in control_scales]
|
199 |
+
f.write(','.join(control_str) + '\n')
|
200 |
+
# save vison images
|
201 |
+
for style_name, style_image in zip(style_names, viscon_images):
|
202 |
+
if style_image is not None:
|
203 |
+
style_image.save(str(log_dir/f'{style_name}.jpg'))
|
204 |
+
|
205 |
+
|
206 |
+
def process(prompt, a_prompt, n_prompt, num_samples,
|
207 |
+
ddim_steps, scale, seed, eta, mask_image, pose_image,
|
208 |
+
c12, c11, c10, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0,
|
209 |
+
*viscon_images):
|
210 |
+
|
211 |
+
with torch.no_grad():
|
212 |
+
control_scales = [c12, c11, c10, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0]
|
213 |
+
mask = torch.tensor(mask_image.mean(-1)/255.,dtype=torch.float) #(512,512), [0,1]
|
214 |
+
mask = mask.unsqueeze(0).to(device) # (1, 512, 512)
|
215 |
+
style_emb = encode_style_images(viscon_images)
|
216 |
+
|
217 |
+
# fix me
|
218 |
+
detected_map = HWC3(pose_image)
|
219 |
+
#detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)
|
220 |
+
H, W, C = detected_map.shape
|
221 |
+
control = torch.from_numpy(detected_map.copy()).float().to(device) / 255.0
|
222 |
+
control = torch.stack([control for _ in range(num_samples)], dim=0)
|
223 |
+
control = einops.rearrange(control, 'b h w c -> b c h w').clone()
|
224 |
+
|
225 |
+
if seed == -1:
|
226 |
+
seed = random.randint(0, 65535)
|
227 |
+
seed_everything(seed)
|
228 |
+
|
229 |
+
if config.save_memory:
|
230 |
+
model.low_vram_shift(is_diffusing=False)
|
231 |
+
new_style_shape = [num_samples] + [1] * (len(style_emb.shape)-1)
|
232 |
+
|
233 |
+
cond = {"c_concat": [control],
|
234 |
+
"c_crossattn": [style_emb.repeat(new_style_shape)],
|
235 |
+
"c_text": [model.get_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)],
|
236 |
+
'c_concat_mask': [mask.repeat(num_samples, 1, 1, 1)]}
|
237 |
+
|
238 |
+
un_cond = {"c_concat": [control],
|
239 |
+
"c_crossattn": [torch.zeros_like(style_emb).repeat(new_style_shape)],
|
240 |
+
"c_text":[model.get_learned_conditioning([n_prompt] * num_samples)],
|
241 |
+
'c_concat_mask': [torch.zeros_like(mask).repeat(num_samples, 1, 1, 1)]}
|
242 |
+
|
243 |
+
shape = (4, H // 8, W // 8)
|
244 |
+
|
245 |
+
if config.save_memory:
|
246 |
+
model.low_vram_shift(is_diffusing=True)
|
247 |
+
|
248 |
+
model.control_scales = control_scales
|
249 |
+
|
250 |
+
samples, _ = ddim_sampler.sample(ddim_steps, num_samples,
|
251 |
+
shape, cond, verbose=False, eta=eta,
|
252 |
+
unconditional_guidance_scale=scale,
|
253 |
+
unconditional_conditioning=un_cond)
|
254 |
+
|
255 |
+
if config.save_memory:
|
256 |
+
model.low_vram_shift(is_diffusing=False)
|
257 |
+
|
258 |
+
x_samples = model.decode_first_stage(samples)
|
259 |
+
x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
|
260 |
+
|
261 |
+
results = [x_samples[i] for i in range(num_samples)]
|
262 |
+
|
263 |
+
if LOG_SAMPLES:
|
264 |
+
log_sample(seed, results, prompt, detected_map, mask_image, control_scales, *viscon_images)
|
265 |
+
return results
|
266 |
+
|
267 |
+
def get_image(name, file_ext='.jpg'):
|
268 |
+
fname = str(VISCON_IMAGE_PATH/name)+file_ext
|
269 |
+
if not os.path.exists(fname):
|
270 |
+
return None
|
271 |
+
return Image.open(fname)
|
272 |
+
|
273 |
+
def get_image_numpy(name, file_ext='.png'):
|
274 |
+
fname = str(VISCON_IMAGE_PATH/name)+file_ext
|
275 |
+
if not os.path.exists(fname):
|
276 |
+
return None
|
277 |
+
return np.array(Image.open(fname))
|
278 |
+
|
279 |
+
def create_app():
|
280 |
+
block = gr.Blocks().queue()
|
281 |
+
with block:
|
282 |
+
with gr.Row():
|
283 |
+
gr.Markdown("## ViscoNet: Visual ControlNet with Human Pose and Fashion <br> [Video tutorial](https://youtu.be/85NyIuLeV00)")
|
284 |
+
with gr.Row():
|
285 |
+
with gr.Column():
|
286 |
+
with gr.Accordion("Get pose and mask", open=False):
|
287 |
+
with gr.Row():
|
288 |
+
input_image = gr.Image(source='upload', type="numpy", label='input image', value=np.array(get_image_numpy('ref')))
|
289 |
+
pose_image = gr.Image(source='upload', type="numpy", label='pose', value=np.array(get_image_numpy('pose')))
|
290 |
+
mask_image = gr.Image(source='upload', type="numpy", label='mask', value=np.array(get_image_numpy('mask')))
|
291 |
+
with gr.Accordion("Samples", open=False):
|
292 |
+
with gr.Tab('Female'):
|
293 |
+
samples = get_image_files(str(SAMPLE_IMAGE_PATH/'pose/WOMEN/'))
|
294 |
+
female_pose_gallery = gr.Gallery(label='pose', show_label=False, value=samples).style(grid=3, height='auto')
|
295 |
+
with gr.Tab('Male'):
|
296 |
+
samples = get_image_files(str(SAMPLE_IMAGE_PATH/'pose/MEN/'))
|
297 |
+
male_pose_gallery = gr.Gallery(label='pose', show_label=False, value=samples).style(grid=3, height='auto')
|
298 |
+
with gr.Row():
|
299 |
+
#pad_checkbox = gr.Checkbox(label='Pad pose to square', value=True)
|
300 |
+
ignorehead_checkbox = gr.Checkbox(label='Ignore face in masking (for DeepFake)', value=True)
|
301 |
+
ignorehair_checkbox = gr.Checkbox(label='Ignore hair in masking', value=False, visible=True)
|
302 |
+
with gr.Row():
|
303 |
+
#ignore_head_checkbox = gr.Checkbox(label='Ignore head', value=False)
|
304 |
+
get_pose_button = gr.Button(label="Get pose", value='Get pose')
|
305 |
+
get_fashion_button = gr.Button(label="Get visual", value='Get visual prompt')
|
306 |
+
|
307 |
+
|
308 |
+
with gr.Accordion("Visual Conditions", open=False):
|
309 |
+
gr.Markdown('Drag-and-drop, or click from samples below.')
|
310 |
+
with gr.Column():
|
311 |
+
viscon_images = []
|
312 |
+
viscon_images_names2index = {}
|
313 |
+
viscon_len = len(style_names)
|
314 |
+
v_idx = 0
|
315 |
+
|
316 |
+
with gr.Row():
|
317 |
+
for _ in range(8):
|
318 |
+
viscon_name = style_names[v_idx]
|
319 |
+
vis = False if viscon_name in ignore_style_list else True
|
320 |
+
viscon_images.append(gr.Image(source='upload', type="pil", min_height=112, min_width=112, label=viscon_name, value=get_image(viscon_name), visible=vis))
|
321 |
+
viscon_images_names2index[viscon_name] = v_idx
|
322 |
+
v_idx += 1
|
323 |
+
|
324 |
+
viscon_button = gr.Button(value='Save as Default',visible=False if DEMO else True)
|
325 |
+
|
326 |
+
viscon_galleries = []
|
327 |
+
|
328 |
+
with gr.Column():
|
329 |
+
with gr.Accordion("Female", open=False):
|
330 |
+
for garment, number in zip(['hair', 'top', 'bottom', 'outer'], [150, 500, 500, 250]):
|
331 |
+
with gr.Tab(garment):
|
332 |
+
samples = []
|
333 |
+
if WOMEN_GALLERY_PATH and os.path.exists(WOMEN_GALLERY_PATH):
|
334 |
+
samples = glob(os.path.join(WOMEN_GALLERY_PATH, f'**/{garment}.jpg'), recursive=True)
|
335 |
+
#samples = glob(f'/home/soon/datasets/deepfashion_inshop/styles_default/WOMEN/**/{garment}.jpg', recursive=True)
|
336 |
+
samples = random.choices(samples, k=number)
|
337 |
+
viscon_gallery = gr.Gallery(label='hair', allow_preview=False, show_label=False, value=samples).style(grid=4, height='auto')
|
338 |
+
viscon_galleries.append({'component':viscon_gallery, 'inputs':[garment]})
|
339 |
+
with gr.Accordion("Male", open=False):
|
340 |
+
for garment, number in zip(['hair', 'top', 'bottom', 'outer'], [150, 500, 500, 250]):
|
341 |
+
with gr.Tab(garment):
|
342 |
+
samples = []
|
343 |
+
if MEN_GALLERY_PATH and os.path.exists(MEN_GALLERY_PATH):
|
344 |
+
samples = glob(os.path.join(MEN_GALLERY_PATH, f'**/{garment}.jpg'), recursive=True)
|
345 |
+
samples = random.choices(samples, k=number)
|
346 |
+
viscon_gallery = gr.Gallery(label='hair', allow_preview=False, show_label=False, value=samples).style(grid=4, height='auto')
|
347 |
+
viscon_galleries.append({'component':viscon_gallery, 'inputs':[garment]})
|
348 |
+
|
349 |
+
with gr.Accordion("Control Strength Scaling", open=False):
|
350 |
+
gr.Markdown("smaller value for stronger textual influence. c12 is highest spatial resolution controlling textures")
|
351 |
+
with gr.Row():
|
352 |
+
strength_select = gr.Dropdown(list(SCALE_CONFIG.keys()), label='strength settings', value=DEFAULT_SCALE_CONFIG)
|
353 |
+
scale_all = gr.Slider(label=f'set all scales', minimum=0, maximum=1, value=DEFAULT_CONTROL_SCALE, step=0.05)
|
354 |
+
scale_values = SCALE_CONFIG[DEFAULT_SCALE_CONFIG]
|
355 |
+
control_scales = []
|
356 |
+
c_idx = 12
|
357 |
+
with gr.Accordion("Advanced settings", open=False):
|
358 |
+
with gr.Row():
|
359 |
+
for _ in range(3):
|
360 |
+
control_scales.append(gr.Slider(label=f'c{c_idx}', minimum=0, maximum=1, value=scale_values[12-c_idx], step=0.05))
|
361 |
+
c_idx -= 1
|
362 |
+
with gr.Row():
|
363 |
+
for _ in range(3):
|
364 |
+
control_scales.append(gr.Slider(label=f'c{c_idx}', minimum=0, maximum=1, value=scale_values[12-c_idx], step=0.05))
|
365 |
+
c_idx -= 1
|
366 |
+
with gr.Row():
|
367 |
+
for _ in range(3):
|
368 |
+
control_scales.append(gr.Slider(label=f'c{c_idx}', minimum=0, maximum=1, value=scale_values[12-c_idx], step=0.05))
|
369 |
+
c_idx -= 1
|
370 |
+
with gr.Row():
|
371 |
+
for _ in range(4):
|
372 |
+
control_scales.append(gr.Slider(label=f'c{c_idx}', minimum=0, maximum=1, value=scale_values[12-c_idx], step=0.05))
|
373 |
+
c_idx -= 1
|
374 |
+
|
375 |
+
with gr.Accordion("Advanced options", open=False):
|
376 |
+
with gr.Row():
|
377 |
+
detect_resolution = gr.Slider(label="OpenPose Resolution", minimum=128, maximum=512, value=512, step=1)
|
378 |
+
ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=50, value=20, step=1)
|
379 |
+
scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=12.0, step=0.1)
|
380 |
+
|
381 |
+
eta = gr.Number(label="eta (DDIM)", value=0.0, visible=False)
|
382 |
+
a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
|
383 |
+
n_prompt = gr.Textbox(label="Negative Prompt",
|
384 |
+
value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, sunglasses, hat')
|
385 |
+
with gr.Column():
|
386 |
+
result_gallery = gr.Gallery(label='Output', show_label=False, show_download_button=True, elem_id="gallery").style(grid=1, height='auto')
|
387 |
+
with gr.Row():
|
388 |
+
max_samples = 8 if not DEMO else 4
|
389 |
+
num_samples = gr.Slider(label="Images", minimum=1, maximum=max_samples, value=1, step=1)
|
390 |
+
seed = gr.Slider(label="Seed (-1 for random)", minimum=-1, maximum=2147483647, step=1, value=1561194236)#randomize=True) #value=1561194234)
|
391 |
+
if not DEMO:
|
392 |
+
DF_DEMO = 'fashionWOMENTees_Tanksid0000762403_1front___fashionWOMENTees_Tanksid0000762403_1front'
|
393 |
+
DF_EVAL = 'fashionWOMENBlouses_Shirtsid0000035501_1front___fashionWOMENBlouses_Shirtsid0000035501_1front'
|
394 |
+
DF_RESULT ="fashionWOMENTees_Tanksid0000796209_1front___fashionWOMENTees_Tanksid0000796209_2side"
|
395 |
+
deepfashion_names = gr.Textbox(label='Deepfashion name', value=DF_EVAL)
|
396 |
+
gr.Markdown("Default config reconstruct image faithful to pose, mask and visual condition. Reduce control strength to tip balance towards text prompt for more creativity.")
|
397 |
+
prompt = gr.Textbox(label="Text Prompt", value="")
|
398 |
+
|
399 |
+
run_button = gr.Button(label="Run")
|
400 |
+
|
401 |
+
|
402 |
+
female_pose_gallery.select(fn=select_gallery_image, inputs=None, outputs=input_image)
|
403 |
+
male_pose_gallery.select(fn=select_gallery_image, inputs=None, outputs=input_image)
|
404 |
+
for vision_gallery in viscon_galleries:
|
405 |
+
viscon_idx = viscon_images_names2index[vision_gallery['inputs'][0]]
|
406 |
+
vision_gallery['component'].select(fn=select_gallery_image, inputs=None,
|
407 |
+
outputs=viscon_images[viscon_idx])
|
408 |
+
ips = [prompt, a_prompt, n_prompt, num_samples, ddim_steps, scale, seed, eta, mask_image, pose_image,
|
409 |
+
*control_scales, *viscon_images]
|
410 |
+
run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
|
411 |
+
prompt.submit(fn=process, inputs=ips, outputs=[result_gallery])
|
412 |
+
get_pose_button.click(fn=extract_pose_mask, inputs=[input_image, detect_resolution,
|
413 |
+
ignorehead_checkbox, ignorehair_checkbox],
|
414 |
+
outputs=[pose_image, mask_image])
|
415 |
+
get_fashion_button.click(fn=extract_fashion, inputs=input_image, outputs=[*viscon_images])
|
416 |
+
viscon_button.click(fn=save_viscon_images, inputs=[*viscon_images], outputs=[*viscon_images])
|
417 |
+
strength_select.select(fn=select_default_strength, inputs=[strength_select], outputs=[*control_scales])
|
418 |
+
scale_all.release(fn=change_all_scales, inputs=[scale_all], outputs=[*control_scales])
|
419 |
+
if not DEMO:
|
420 |
+
deepfashion_names.submit(fn=fetch_deepfashion, inputs=[deepfashion_names], outputs=[input_image, pose_image, mask_image, *viscon_images])
|
421 |
+
return block
|
422 |
+
|
423 |
+
if __name__ == "__main__":
|
424 |
+
parser = argparse.ArgumentParser(description='Calculate image-text similarity score.')
|
425 |
+
|
426 |
+
parser.add_argument('--gpu', type=int, default=0, help='GPU id')
|
427 |
+
parser.add_argument('--config', type=str, default='./configs/visconet_v1.yaml')
|
428 |
+
parser.add_argument('--ckpt', type=str, default='./models/visconet_v1.pth')
|
429 |
+
parser.add_argument('--public_link', action='store_true', default='', help='Create public link')
|
430 |
+
args = parser.parse_args()
|
431 |
+
|
432 |
+
global device
|
433 |
+
global segmentor
|
434 |
+
global apply_openpose
|
435 |
+
global style_encoder
|
436 |
+
global model
|
437 |
+
global ddim_sampler
|
438 |
+
|
439 |
+
device = f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu'
|
440 |
+
config_file = args.config
|
441 |
+
model_ckpt = args.ckpt
|
442 |
+
|
443 |
+
proj_config = OmegaConf.load(config_file)
|
444 |
+
style_names = proj_config.dataset.train.params.style_names
|
445 |
+
data_root = Path(proj_config.dataset.train.params.image_root)
|
446 |
+
image_root = data_root/proj_config.dataset.train.params.image_dir
|
447 |
+
style_root = data_root/proj_config.dataset.train.params.style_dir
|
448 |
+
pose_root = data_root/proj_config.dataset.train.params.pose_dir
|
449 |
+
mask_root = data_root/proj_config.dataset.train.params.mask_dir
|
450 |
+
|
451 |
+
segmentor = SegmentCropper()
|
452 |
+
apply_openpose = OpenposeDetector()
|
453 |
+
|
454 |
+
snapshot_download(repo_id=HF_REPO, local_dir='./models',
|
455 |
+
allow_patterns=os.path.basename(model_ckpt))
|
456 |
+
|
457 |
+
style_encoder = instantiate_from_config(proj_config.model.style_embedding_config).to(device)
|
458 |
+
model = create_model(config_file).cpu()
|
459 |
+
model.load_state_dict(load_state_dict(model_ckpt, location=device))
|
460 |
+
|
461 |
+
model = model.to(device)
|
462 |
+
model.cond_stage_model.device = device
|
463 |
+
ddim_sampler = DDIMSampler(model)
|
464 |
+
|
465 |
+
if not GALLERY_PATH.exists():
|
466 |
+
zip_name = 'fashion.zip'
|
467 |
+
snapshot_download(repo_id=HF_REPO, allow_patterns=zip_name, local_dir='.')
|
468 |
+
from zipfile import ZipFile
|
469 |
+
with ZipFile(zip_name, 'r') as zip_ref:
|
470 |
+
zip_ref.extractall('.')
|
471 |
+
os.remove(zip_name)
|
472 |
+
|
473 |
+
# Calling the main function with parsed arguments
|
474 |
+
block = create_app()
|
475 |
+
block.launch(server_name='0.0.0.0', share=args.public_link)
|
app_files/default_images/mask.png
ADDED
app_files/default_images/pose.png
ADDED
app_files/default_images/ref.png
ADDED
app_files/samples/pose/MEN/full_1.png
ADDED
app_files/samples/pose/MEN/full_2.png
ADDED
app_files/samples/pose/MEN/half_back.png
ADDED
app_files/samples/pose/MEN/half_front.png
ADDED
app_files/samples/pose/MEN/half_left.png
ADDED
app_files/samples/pose/WOMEN/pose_0.png
ADDED
app_files/samples/pose/WOMEN/pose_1.png
ADDED
app_files/samples/pose/WOMEN/pose_2.png
ADDED
app_files/samples/pose/WOMEN/pose_3.png
ADDED
app_files/samples/pose/WOMEN/pose_4.png
ADDED
app_files/samples/pose/WOMEN/pose_5.png
ADDED
app_files/samples/pose/WOMEN/pose_6.png
ADDED