guru001 commited on
Commit
1f77b91
1 Parent(s): 596759d
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.weights filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from model import api
4
+ from PIL import Image
5
+ from yolo.model import *
6
+
7
+ model = yolo_model()
8
+
9
+ def predict(input_img):
10
+ input_img = Image.fromarray(input_img)
11
+ _, payload = model.predict(image)
12
+ # print('prediction',prediction)
13
+ return payload
14
+
15
+ css = ''
16
+
17
+ # with gr.Blocks(css=css) as demo:
18
+ # gr.HTML("<h1><center>Signsapp: Classify the signs based on the hands sign images<center><h1>")
19
+ # gr.Interface(sign,inputs=gr.Image(shape=(200, 200)), outputs=gr.Label())
20
+
21
+ title = r"yolov3"
22
+
23
+ description = r"""
24
+ <center>
25
+ Recognize common objects using the model
26
+ <img src="file/det_dog-cycle-car.png" width=350px>
27
+ </center>
28
+ """
29
+ article = r"""
30
+ ### Credits
31
+ - [Coursera](https://www.coursera.org/learn/convolutional-neural-networks/)
32
+ """
33
+
34
+ demo = gr.Interface(
35
+ title = title,
36
+ description = description,
37
+ article = article,
38
+ fn=predict,
39
+ inputs = gr.Image(shape=(200, 200)),
40
+ outputs = gr.Image(shape=(200, 200)),
41
+ examples=["dog-cycle-car.png"]
42
+ # allow_flagging = "manual",
43
+ # flagging_options = ['recule', 'tournedroite', 'arretetoi', 'tournegauche', 'gauche', 'avance', 'droite'],
44
+ # flagging_dir = "./flag/men"
45
+ )
46
+
47
+ # demo.queue()
48
+ demo.launch(debug=True)
det_dog-cycle-car.png ADDED
dog-cycle-car.png ADDED
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Flask-Cors
2
+ Flask
3
+ Werkzeug
4
+ pillow
5
+ numpy
6
+ boto3
7
+ pytorch==1.7.1
8
+ opencv==3.4.2
yolo/Poster3.jpg ADDED
yolo/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Yolo
2
+
3
+ [Part 1 : Understanding How YOLO works](https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/)
4
+
5
+ [Part 2 : Creating the layers of the network architecture](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-2/)
6
+
7
+ [Part 3 : How to implement a YOLO (v3) object detector from scratch in PyTorch](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-3/)
8
+
9
+ [Part 4 : Objectness Confidence Thresholding and Non-maximum Suppression](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-4/)
10
+
11
+ [Part 5 : Designing the input and the output pipelines](https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-5/)
12
+
yolo/__pycache__/darknet.cpython-37.pyc ADDED
Binary file (11.9 kB). View file
 
yolo/__pycache__/model.cpython-37.pyc ADDED
Binary file (4.87 kB). View file
 
yolo/__pycache__/utils.cpython-37.pyc ADDED
Binary file (7.7 kB). View file
 
yolo/cfg/yolov3.cfg ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ # batch=1
4
+ # subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=16
8
+ width=624
9
+ height=624
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ # Downsample
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=2
40
+ pad=1
41
+ activation=leaky
42
+
43
+ [convolutional]
44
+ batch_normalize=1
45
+ filters=32
46
+ size=1
47
+ stride=1
48
+ pad=1
49
+ activation=leaky
50
+
51
+ [convolutional]
52
+ batch_normalize=1
53
+ filters=64
54
+ size=3
55
+ stride=1
56
+ pad=1
57
+ activation=leaky
58
+
59
+ [shortcut]
60
+ from=-3
61
+ activation=linear
62
+
63
+ # Downsample
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=2
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=64
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=leaky
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=64
96
+ size=1
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [convolutional]
102
+ batch_normalize=1
103
+ filters=128
104
+ size=3
105
+ stride=1
106
+ pad=1
107
+ activation=leaky
108
+
109
+ [shortcut]
110
+ from=-3
111
+ activation=linear
112
+
113
+ # Downsample
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=256
118
+ size=3
119
+ stride=2
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ batch_normalize=1
125
+ filters=128
126
+ size=1
127
+ stride=1
128
+ pad=1
129
+ activation=leaky
130
+
131
+ [convolutional]
132
+ batch_normalize=1
133
+ filters=256
134
+ size=3
135
+ stride=1
136
+ pad=1
137
+ activation=leaky
138
+
139
+ [shortcut]
140
+ from=-3
141
+ activation=linear
142
+
143
+ [convolutional]
144
+ batch_normalize=1
145
+ filters=128
146
+ size=1
147
+ stride=1
148
+ pad=1
149
+ activation=leaky
150
+
151
+ [convolutional]
152
+ batch_normalize=1
153
+ filters=256
154
+ size=3
155
+ stride=1
156
+ pad=1
157
+ activation=leaky
158
+
159
+ [shortcut]
160
+ from=-3
161
+ activation=linear
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=128
166
+ size=1
167
+ stride=1
168
+ pad=1
169
+ activation=leaky
170
+
171
+ [convolutional]
172
+ batch_normalize=1
173
+ filters=256
174
+ size=3
175
+ stride=1
176
+ pad=1
177
+ activation=leaky
178
+
179
+ [shortcut]
180
+ from=-3
181
+ activation=linear
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=128
186
+ size=1
187
+ stride=1
188
+ pad=1
189
+ activation=leaky
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=256
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ activation=leaky
198
+
199
+ [shortcut]
200
+ from=-3
201
+ activation=linear
202
+
203
+
204
+ [convolutional]
205
+ batch_normalize=1
206
+ filters=128
207
+ size=1
208
+ stride=1
209
+ pad=1
210
+ activation=leaky
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=256
215
+ size=3
216
+ stride=1
217
+ pad=1
218
+ activation=leaky
219
+
220
+ [shortcut]
221
+ from=-3
222
+ activation=linear
223
+
224
+ [convolutional]
225
+ batch_normalize=1
226
+ filters=128
227
+ size=1
228
+ stride=1
229
+ pad=1
230
+ activation=leaky
231
+
232
+ [convolutional]
233
+ batch_normalize=1
234
+ filters=256
235
+ size=3
236
+ stride=1
237
+ pad=1
238
+ activation=leaky
239
+
240
+ [shortcut]
241
+ from=-3
242
+ activation=linear
243
+
244
+ [convolutional]
245
+ batch_normalize=1
246
+ filters=128
247
+ size=1
248
+ stride=1
249
+ pad=1
250
+ activation=leaky
251
+
252
+ [convolutional]
253
+ batch_normalize=1
254
+ filters=256
255
+ size=3
256
+ stride=1
257
+ pad=1
258
+ activation=leaky
259
+
260
+ [shortcut]
261
+ from=-3
262
+ activation=linear
263
+
264
+ [convolutional]
265
+ batch_normalize=1
266
+ filters=128
267
+ size=1
268
+ stride=1
269
+ pad=1
270
+ activation=leaky
271
+
272
+ [convolutional]
273
+ batch_normalize=1
274
+ filters=256
275
+ size=3
276
+ stride=1
277
+ pad=1
278
+ activation=leaky
279
+
280
+ [shortcut]
281
+ from=-3
282
+ activation=linear
283
+
284
+ # Downsample
285
+
286
+ [convolutional]
287
+ batch_normalize=1
288
+ filters=512
289
+ size=3
290
+ stride=2
291
+ pad=1
292
+ activation=leaky
293
+
294
+ [convolutional]
295
+ batch_normalize=1
296
+ filters=256
297
+ size=1
298
+ stride=1
299
+ pad=1
300
+ activation=leaky
301
+
302
+ [convolutional]
303
+ batch_normalize=1
304
+ filters=512
305
+ size=3
306
+ stride=1
307
+ pad=1
308
+ activation=leaky
309
+
310
+ [shortcut]
311
+ from=-3
312
+ activation=linear
313
+
314
+
315
+ [convolutional]
316
+ batch_normalize=1
317
+ filters=256
318
+ size=1
319
+ stride=1
320
+ pad=1
321
+ activation=leaky
322
+
323
+ [convolutional]
324
+ batch_normalize=1
325
+ filters=512
326
+ size=3
327
+ stride=1
328
+ pad=1
329
+ activation=leaky
330
+
331
+ [shortcut]
332
+ from=-3
333
+ activation=linear
334
+
335
+
336
+ [convolutional]
337
+ batch_normalize=1
338
+ filters=256
339
+ size=1
340
+ stride=1
341
+ pad=1
342
+ activation=leaky
343
+
344
+ [convolutional]
345
+ batch_normalize=1
346
+ filters=512
347
+ size=3
348
+ stride=1
349
+ pad=1
350
+ activation=leaky
351
+
352
+ [shortcut]
353
+ from=-3
354
+ activation=linear
355
+
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=256
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=leaky
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=512
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=leaky
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=256
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=leaky
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=512
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=leaky
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+
398
+ [convolutional]
399
+ batch_normalize=1
400
+ filters=256
401
+ size=1
402
+ stride=1
403
+ pad=1
404
+ activation=leaky
405
+
406
+ [convolutional]
407
+ batch_normalize=1
408
+ filters=512
409
+ size=3
410
+ stride=1
411
+ pad=1
412
+ activation=leaky
413
+
414
+ [shortcut]
415
+ from=-3
416
+ activation=linear
417
+
418
+
419
+ [convolutional]
420
+ batch_normalize=1
421
+ filters=256
422
+ size=1
423
+ stride=1
424
+ pad=1
425
+ activation=leaky
426
+
427
+ [convolutional]
428
+ batch_normalize=1
429
+ filters=512
430
+ size=3
431
+ stride=1
432
+ pad=1
433
+ activation=leaky
434
+
435
+ [shortcut]
436
+ from=-3
437
+ activation=linear
438
+
439
+ [convolutional]
440
+ batch_normalize=1
441
+ filters=256
442
+ size=1
443
+ stride=1
444
+ pad=1
445
+ activation=leaky
446
+
447
+ [convolutional]
448
+ batch_normalize=1
449
+ filters=512
450
+ size=3
451
+ stride=1
452
+ pad=1
453
+ activation=leaky
454
+
455
+ [shortcut]
456
+ from=-3
457
+ activation=linear
458
+
459
+ # Downsample
460
+
461
+ [convolutional]
462
+ batch_normalize=1
463
+ filters=1024
464
+ size=3
465
+ stride=2
466
+ pad=1
467
+ activation=leaky
468
+
469
+ [convolutional]
470
+ batch_normalize=1
471
+ filters=512
472
+ size=1
473
+ stride=1
474
+ pad=1
475
+ activation=leaky
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=1024
480
+ size=3
481
+ stride=1
482
+ pad=1
483
+ activation=leaky
484
+
485
+ [shortcut]
486
+ from=-3
487
+ activation=linear
488
+
489
+ [convolutional]
490
+ batch_normalize=1
491
+ filters=512
492
+ size=1
493
+ stride=1
494
+ pad=1
495
+ activation=leaky
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=1024
500
+ size=3
501
+ stride=1
502
+ pad=1
503
+ activation=leaky
504
+
505
+ [shortcut]
506
+ from=-3
507
+ activation=linear
508
+
509
+ [convolutional]
510
+ batch_normalize=1
511
+ filters=512
512
+ size=1
513
+ stride=1
514
+ pad=1
515
+ activation=leaky
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=1024
520
+ size=3
521
+ stride=1
522
+ pad=1
523
+ activation=leaky
524
+
525
+ [shortcut]
526
+ from=-3
527
+ activation=linear
528
+
529
+ [convolutional]
530
+ batch_normalize=1
531
+ filters=512
532
+ size=1
533
+ stride=1
534
+ pad=1
535
+ activation=leaky
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=1024
540
+ size=3
541
+ stride=1
542
+ pad=1
543
+ activation=leaky
544
+
545
+ [shortcut]
546
+ from=-3
547
+ activation=linear
548
+
549
+ ######################
550
+
551
+ [convolutional]
552
+ batch_normalize=1
553
+ filters=512
554
+ size=1
555
+ stride=1
556
+ pad=1
557
+ activation=leaky
558
+
559
+ [convolutional]
560
+ batch_normalize=1
561
+ size=3
562
+ stride=1
563
+ pad=1
564
+ filters=1024
565
+ activation=leaky
566
+
567
+ [convolutional]
568
+ batch_normalize=1
569
+ filters=512
570
+ size=1
571
+ stride=1
572
+ pad=1
573
+ activation=leaky
574
+
575
+ [convolutional]
576
+ batch_normalize=1
577
+ size=3
578
+ stride=1
579
+ pad=1
580
+ filters=1024
581
+ activation=leaky
582
+
583
+ [convolutional]
584
+ batch_normalize=1
585
+ filters=512
586
+ size=1
587
+ stride=1
588
+ pad=1
589
+ activation=leaky
590
+
591
+ [convolutional]
592
+ batch_normalize=1
593
+ size=3
594
+ stride=1
595
+ pad=1
596
+ filters=1024
597
+ activation=leaky
598
+
599
+ [convolutional]
600
+ size=1
601
+ stride=1
602
+ pad=1
603
+ filters=255
604
+ activation=linear
605
+
606
+
607
+ [yolo]
608
+ mask = 6,7,8
609
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610
+ classes=80
611
+ num=9
612
+ jitter=.3
613
+ ignore_thresh = .7
614
+ truth_thresh = 1
615
+ random=1
616
+
617
+
618
+ [route]
619
+ layers = -4
620
+
621
+ [convolutional]
622
+ batch_normalize=1
623
+ filters=256
624
+ size=1
625
+ stride=1
626
+ pad=1
627
+ activation=leaky
628
+
629
+ [upsample]
630
+ stride=2
631
+
632
+ [route]
633
+ layers = -1, 61
634
+
635
+
636
+
637
+ [convolutional]
638
+ batch_normalize=1
639
+ filters=256
640
+ size=1
641
+ stride=1
642
+ pad=1
643
+ activation=leaky
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ size=3
648
+ stride=1
649
+ pad=1
650
+ filters=512
651
+ activation=leaky
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=1
657
+ stride=1
658
+ pad=1
659
+ activation=leaky
660
+
661
+ [convolutional]
662
+ batch_normalize=1
663
+ size=3
664
+ stride=1
665
+ pad=1
666
+ filters=512
667
+ activation=leaky
668
+
669
+ [convolutional]
670
+ batch_normalize=1
671
+ filters=256
672
+ size=1
673
+ stride=1
674
+ pad=1
675
+ activation=leaky
676
+
677
+ [convolutional]
678
+ batch_normalize=1
679
+ size=3
680
+ stride=1
681
+ pad=1
682
+ filters=512
683
+ activation=leaky
684
+
685
+ [convolutional]
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ filters=255
690
+ activation=linear
691
+
692
+
693
+ [yolo]
694
+ mask = 3,4,5
695
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696
+ classes=80
697
+ num=9
698
+ jitter=.3
699
+ ignore_thresh = .7
700
+ truth_thresh = 1
701
+ random=1
702
+
703
+
704
+
705
+ [route]
706
+ layers = -4
707
+
708
+ [convolutional]
709
+ batch_normalize=1
710
+ filters=128
711
+ size=1
712
+ stride=1
713
+ pad=1
714
+ activation=leaky
715
+
716
+ [upsample]
717
+ stride=2
718
+
719
+ [route]
720
+ layers = -1, 36
721
+
722
+
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=128
727
+ size=1
728
+ stride=1
729
+ pad=1
730
+ activation=leaky
731
+
732
+ [convolutional]
733
+ batch_normalize=1
734
+ size=3
735
+ stride=1
736
+ pad=1
737
+ filters=256
738
+ activation=leaky
739
+
740
+ [convolutional]
741
+ batch_normalize=1
742
+ filters=128
743
+ size=1
744
+ stride=1
745
+ pad=1
746
+ activation=leaky
747
+
748
+ [convolutional]
749
+ batch_normalize=1
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ filters=256
754
+ activation=leaky
755
+
756
+ [convolutional]
757
+ batch_normalize=1
758
+ filters=128
759
+ size=1
760
+ stride=1
761
+ pad=1
762
+ activation=leaky
763
+
764
+ [convolutional]
765
+ batch_normalize=1
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ filters=256
770
+ activation=leaky
771
+
772
+ [convolutional]
773
+ size=1
774
+ stride=1
775
+ pad=1
776
+ filters=255
777
+ activation=linear
778
+
779
+
780
+ [yolo]
781
+ mask = 0,1,2
782
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783
+ classes=80
784
+ num=9
785
+ jitter=.3
786
+ ignore_thresh = .7
787
+ truth_thresh = 1
788
+ random=1
yolo/darknet.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from torch.autograd import Variable
7
+ import numpy as np
8
+ from PIL import Image
9
+
10
+ from yolo.utils import *
11
+
12
+ # from utils import *
13
+
14
+ def get_test_input_normal():
15
+
16
+ input_image = "dog-cycle-car.png"
17
+ image = Image.open(input_image)
18
+ image = image.convert("RGB")
19
+
20
+ img = image.resize(( 416 , 416 ))
21
+
22
+ img = np.asarray(img)
23
+
24
+ img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H X W C -> C X H X W
25
+ img_ = img_[np.newaxis,:,:,:]/255.0 #Add a channel at 0 (for batch) | Normalise
26
+ img_ = torch.from_numpy(img_).float() #Convert to float
27
+ img_ = Variable(img_) # Convert to Variable
28
+ return img_
29
+
30
+ def get_test_input():
31
+ img = cv2.imread("dog-cycle-car.png")
32
+ img = cv2.resize(img, (416,416)) #Resize to the input dimension
33
+ img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H X W C -> C X H X W
34
+ img_ = img_[np.newaxis,:,:,:]/255.0 #Add a channel at 0 (for batch) | Normalise
35
+ img_ = torch.from_numpy(img_).float() #Convert to float
36
+ img_ = Variable(img_) # Convert to Variable
37
+ return img_
38
+
39
+
40
+ def parse_cfg(cfgfile):
41
+ """
42
+ Takes a configuration file
43
+
44
+ Returns a list of blocks. Each blocks describes a block in the neural
45
+ network to be built. Block is represented as a dictionary in the list
46
+
47
+ """
48
+ file = open(cfgfile, 'r')
49
+ lines = file.read().split('\n') #store the lines in a list
50
+ lines = [x for x in lines if len(x) > 0] #get read of the empty lines
51
+ lines = [x for x in lines if x[0] != '#']
52
+ lines = [x.rstrip().lstrip() for x in lines]
53
+
54
+
55
+ block = {}
56
+ blocks = []
57
+
58
+ for line in lines:
59
+ if line[0] == "[": #This marks the start of a new block
60
+ if len(block) != 0:
61
+ blocks.append(block)
62
+ block = {}
63
+ block["type"] = line[1:-1].rstrip()
64
+ else:
65
+ key,value = line.split("=")
66
+ block[key.rstrip()] = value.lstrip()
67
+ blocks.append(block)
68
+
69
+ return blocks
70
+ # print('\n\n'.join([repr(x) for x in blocks]))
71
+
72
+ import pickle as pkl
73
+
74
+ class MaxPoolStride1(nn.Module):
75
+ def __init__(self, kernel_size):
76
+ super(MaxPoolStride1, self).__init__()
77
+ self.kernel_size = kernel_size
78
+ self.pad = kernel_size - 1
79
+
80
+ def forward(self, x):
81
+ padded_x = F.pad(x, (0,self.pad,0,self.pad), mode="replicate")
82
+ pooled_x = nn.MaxPool2d(self.kernel_size, self.pad)(padded_x)
83
+ return pooled_x
84
+
85
+
86
+ class EmptyLayer(nn.Module):
87
+ def __init__(self):
88
+ super(EmptyLayer, self).__init__()
89
+
90
+
91
+ class DetectionLayer(nn.Module):
92
+ def __init__(self, anchors):
93
+ super(DetectionLayer, self).__init__()
94
+ self.anchors = anchors
95
+
96
+ def forward(self, x, inp_dim, num_classes, confidence):
97
+ x = x.data
98
+ global CUDA
99
+ prediction = x
100
+ prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence, CUDA)
101
+ return prediction
102
+
103
+
104
+
105
+
106
+
107
+ class Upsample(nn.Module):
108
+ def __init__(self, stride=2):
109
+ super(Upsample, self).__init__()
110
+ self.stride = stride
111
+
112
+ def forward(self, x):
113
+ stride = self.stride
114
+ assert(x.data.dim() == 4)
115
+ B = x.data.size(0)
116
+ C = x.data.size(1)
117
+ H = x.data.size(2)
118
+ W = x.data.size(3)
119
+ ws = stride
120
+ hs = stride
121
+ x = x.view(B, C, H, 1, W, 1).expand(B, C, H, stride, W, stride).contiguous().view(B, C, H*stride, W*stride)
122
+ return x
123
+ #
124
+
125
+ class ReOrgLayer(nn.Module):
126
+ def __init__(self, stride = 2):
127
+ super(ReOrgLayer, self).__init__()
128
+ self.stride= stride
129
+
130
+ def forward(self,x):
131
+ assert(x.data.dim() == 4)
132
+ B,C,H,W = x.data.shape
133
+ hs = self.stride
134
+ ws = self.stride
135
+ assert(H % hs == 0), "The stride " + str(self.stride) + " is not a proper divisor of height " + str(H)
136
+ assert(W % ws == 0), "The stride " + str(self.stride) + " is not a proper divisor of height " + str(W)
137
+ x = x.view(B,C, H // hs, hs, W // ws, ws).transpose(-2,-3).contiguous()
138
+ x = x.view(B,C, H // hs * W // ws, hs, ws)
139
+ x = x.view(B,C, H // hs * W // ws, hs*ws).transpose(-1,-2).contiguous()
140
+ x = x.view(B, C, ws*hs, H // ws, W // ws).transpose(1,2).contiguous()
141
+ x = x.view(B, C*ws*hs, H // ws, W // ws)
142
+ return x
143
+
144
+
145
+ def create_modules(blocks):
146
+ net_info = blocks[0] #Captures the information about the input and pre-processing
147
+
148
+ module_list = nn.ModuleList()
149
+
150
+ index = 0 #indexing blocks helps with implementing route layers (skip connections)
151
+
152
+
153
+ prev_filters = 3
154
+
155
+ output_filters = []
156
+
157
+ for x in blocks:
158
+ module = nn.Sequential()
159
+
160
+ if (x["type"] == "net"):
161
+ continue
162
+
163
+ #If it's a convolutional layer
164
+ if (x["type"] == "convolutional"):
165
+ #Get the info about the layer
166
+ activation = x["activation"]
167
+ try:
168
+ batch_normalize = int(x["batch_normalize"])
169
+ bias = False
170
+ except:
171
+ batch_normalize = 0
172
+ bias = True
173
+
174
+ filters= int(x["filters"])
175
+ padding = int(x["pad"])
176
+ kernel_size = int(x["size"])
177
+ stride = int(x["stride"])
178
+
179
+ if padding:
180
+ pad = (kernel_size - 1) // 2
181
+ else:
182
+ pad = 0
183
+
184
+ #Add the convolutional layer
185
+ conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
186
+ module.add_module("conv_{0}".format(index), conv)
187
+
188
+ #Add the Batch Norm Layer
189
+ if batch_normalize:
190
+ bn = nn.BatchNorm2d(filters)
191
+ module.add_module("batch_norm_{0}".format(index), bn)
192
+
193
+ #Check the activation.
194
+ #It is either Linear or a Leaky ReLU for YOLO
195
+ if activation == "leaky":
196
+ activn = nn.LeakyReLU(0.1, inplace = True)
197
+ module.add_module("leaky_{0}".format(index), activn)
198
+
199
+
200
+
201
+ #If it's an upsampling layer
202
+ #We use Bilinear2dUpsampling
203
+
204
+ elif (x["type"] == "upsample"):
205
+ stride = int(x["stride"])
206
+ # upsample = Upsample(stride)
207
+ upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
208
+ module.add_module("upsample_{}".format(index), upsample)
209
+
210
+ #If it is a route layer
211
+ elif (x["type"] == "route"):
212
+ x["layers"] = x["layers"].split(',')
213
+
214
+ #Start of a route
215
+ start = int(x["layers"][0])
216
+
217
+ #end, if there exists one.
218
+ try:
219
+ end = int(x["layers"][1])
220
+ except:
221
+ end = 0
222
+
223
+
224
+
225
+ #Positive anotation
226
+ if start > 0:
227
+ start = start - index
228
+
229
+ if end > 0:
230
+ end = end - index
231
+
232
+
233
+ route = EmptyLayer()
234
+ module.add_module("route_{0}".format(index), route)
235
+
236
+
237
+
238
+ if end < 0:
239
+ filters = output_filters[index + start] + output_filters[index + end]
240
+ else:
241
+ filters= output_filters[index + start]
242
+
243
+
244
+
245
+ #shortcut corresponds to skip connection
246
+ elif x["type"] == "shortcut":
247
+ from_ = int(x["from"])
248
+ shortcut = EmptyLayer()
249
+ module.add_module("shortcut_{}".format(index), shortcut)
250
+
251
+
252
+ elif x["type"] == "maxpool":
253
+ stride = int(x["stride"])
254
+ size = int(x["size"])
255
+ if stride != 1:
256
+ maxpool = nn.MaxPool2d(size, stride)
257
+ else:
258
+ maxpool = MaxPoolStride1(size)
259
+
260
+ module.add_module("maxpool_{}".format(index), maxpool)
261
+
262
+ #Yolo is the detection layer
263
+ elif x["type"] == "yolo":
264
+ mask = x["mask"].split(",")
265
+ mask = [int(x) for x in mask]
266
+
267
+
268
+ anchors = x["anchors"].split(",")
269
+ anchors = [int(a) for a in anchors]
270
+ anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
271
+ anchors = [anchors[i] for i in mask]
272
+
273
+ detection = DetectionLayer(anchors)
274
+ module.add_module("Detection_{}".format(index), detection)
275
+
276
+
277
+
278
+ else:
279
+ print("Something I dunno")
280
+ assert False
281
+
282
+
283
+ module_list.append(module)
284
+ prev_filters = filters
285
+ output_filters.append(filters)
286
+ index += 1
287
+
288
+
289
+ return (net_info, module_list)
290
+
291
+
292
+
293
+ class Darknet(nn.Module):
294
+ def __init__(self, cfgfile):
295
+ super(Darknet, self).__init__()
296
+ self.blocks = parse_cfg(cfgfile)
297
+ self.net_info, self.module_list = create_modules(self.blocks)
298
+ self.header = torch.IntTensor([0,0,0,0])
299
+ self.seen = 0
300
+
301
+
302
+
303
+ def get_blocks(self):
304
+ return self.blocks
305
+
306
+ def get_module_list(self):
307
+ return self.module_list
308
+
309
+
310
+ def forward(self, x, CUDA):
311
+ detections = []
312
+ modules = self.blocks[1:]
313
+ outputs = {} #We cache the outputs for the route layer
314
+
315
+
316
+ write = 0
317
+ for i in range(len(modules)):
318
+
319
+ module_type = (modules[i]["type"])
320
+ if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":
321
+
322
+ x = self.module_list[i](x)
323
+ outputs[i] = x
324
+
325
+
326
+ elif module_type == "route":
327
+ layers = modules[i]["layers"]
328
+ layers = [int(a) for a in layers]
329
+
330
+ if (layers[0]) > 0:
331
+ layers[0] = layers[0] - i
332
+
333
+ if len(layers) == 1:
334
+ x = outputs[i + (layers[0])]
335
+
336
+ else:
337
+ if (layers[1]) > 0:
338
+ layers[1] = layers[1] - i
339
+
340
+ map1 = outputs[i + layers[0]]
341
+ map2 = outputs[i + layers[1]]
342
+
343
+
344
+ x = torch.cat((map1, map2), 1)
345
+ outputs[i] = x
346
+
347
+ elif module_type == "shortcut":
348
+ from_ = int(modules[i]["from"])
349
+ x = outputs[i-1] + outputs[i+from_]
350
+ outputs[i] = x
351
+
352
+
353
+
354
+ elif module_type == 'yolo':
355
+
356
+ anchors = self.module_list[i][0].anchors
357
+ #Get the input dimensions
358
+ inp_dim = int (self.net_info["height"])
359
+
360
+ #Get the number of classes
361
+ num_classes = int (modules[i]["classes"])
362
+
363
+ #Output the result
364
+ x = x.data
365
+ x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
366
+
367
+ if type(x) == int:
368
+ continue
369
+
370
+
371
+ if not write:
372
+ detections = x
373
+ write = 1
374
+
375
+ else:
376
+ detections = torch.cat((detections, x), 1)
377
+
378
+ outputs[i] = outputs[i-1]
379
+
380
+
381
+
382
+ try:
383
+ return detections
384
+ except:
385
+ return 0
386
+
387
+ def load_weights_url(self, weightfile):
388
+
389
+ # Open the weights file
390
+ fp = get_data_s3(weightfile)
391
+
392
+ # The first 5 values are header information
393
+ # 1. Major version number
394
+ # 2. Minor Version Number
395
+ # 3. Subversion number
396
+ # 4,5. Images seen by the network (during training)
397
+ header = np.frombuffer( fp.getvalue() , dtype = np.int32, count = 5)
398
+ self.header = torch.from_numpy(header)
399
+ self.seen = self.header[3]
400
+
401
+ weights = np.frombuffer( fp.getvalue() , dtype = np.float32)
402
+
403
+ ptr = 0
404
+
405
+ for i in range(len(self.module_list)):
406
+ module_type = self.blocks[i + 1]["type"]
407
+
408
+ #If module_type is convolutional load weights
409
+ #Otherwise ignore.
410
+
411
+ if module_type == "convolutional":
412
+ model = self.module_list[i]
413
+ try:
414
+ batch_normalize = int(self.blocks[i+1]["batch_normalize"])
415
+ except:
416
+ batch_normalize = 0
417
+
418
+ conv = model[0]
419
+
420
+ if (batch_normalize):
421
+ bn = model[1]
422
+
423
+ #Get the number of weights of Batch Norm Layer
424
+ num_bn_biases = bn.bias.numel()
425
+
426
+ #Load the weights
427
+ bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
428
+ ptr += num_bn_biases
429
+
430
+ bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
431
+ ptr += num_bn_biases
432
+
433
+ bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
434
+ ptr += num_bn_biases
435
+
436
+ bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
437
+ ptr += num_bn_biases
438
+
439
+ #Cast the loaded weights into dims of model weights.
440
+ bn_biases = bn_biases.view_as(bn.bias.data)
441
+ bn_weights = bn_weights.view_as(bn.weight.data)
442
+ bn_running_mean = bn_running_mean.view_as(bn.running_mean)
443
+ bn_running_var = bn_running_var.view_as(bn.running_var)
444
+
445
+ #Copy the data to model
446
+ bn.bias.data.copy_(bn_biases)
447
+ bn.weight.data.copy_(bn_weights)
448
+ bn.running_mean.copy_(bn_running_mean)
449
+ bn.running_var.copy_(bn_running_var)
450
+
451
+ else:
452
+
453
+ #Number of biases
454
+ num_biases = conv.bias.numel()
455
+
456
+ #Load the weights
457
+ conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
458
+ ptr = ptr + num_biases
459
+
460
+ #reshape the loaded weights according to the dims of the model weights
461
+ conv_biases = conv_biases.view_as(conv.bias.data)
462
+
463
+ #Finally copy the data
464
+ conv.bias.data.copy_(conv_biases)
465
+
466
+ #Let us load the weights for the Convolutional layers
467
+ num_weights = conv.weight.numel()
468
+
469
+ #Do the same as above for weights
470
+ conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
471
+ ptr = ptr + num_weights
472
+
473
+ conv_weights = conv_weights.view_as(conv.weight.data)
474
+ conv.weight.data.copy_(conv_weights)
475
+
476
+
477
+ def load_weights(self, weightfile):
478
+
479
+ # Open the weights file
480
+ fp = open(weightfile, "rb")
481
+
482
+ # The first 5 values are header information
483
+ # 1. Major version number
484
+ # 2. Minor Version Number
485
+ # 3. Subversion number
486
+ # 4,5. Images seen by the network (during training)
487
+ header = np.fromfile(fp, dtype = np.int32, count = 5)
488
+ self.header = torch.from_numpy(header)
489
+ self.seen = self.header[3]
490
+
491
+ weights = np.fromfile(fp, dtype = np.float32)
492
+
493
+ ptr = 0
494
+
495
+ for i in range(len(self.module_list)):
496
+ module_type = self.blocks[i + 1]["type"]
497
+
498
+ #If module_type is convolutional load weights
499
+ #Otherwise ignore.
500
+
501
+ if module_type == "convolutional":
502
+ model = self.module_list[i]
503
+ try:
504
+ batch_normalize = int(self.blocks[i+1]["batch_normalize"])
505
+ except:
506
+ batch_normalize = 0
507
+
508
+ conv = model[0]
509
+
510
+ if (batch_normalize):
511
+ bn = model[1]
512
+
513
+ #Get the number of weights of Batch Norm Layer
514
+ num_bn_biases = bn.bias.numel()
515
+
516
+ #Load the weights
517
+ bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
518
+ ptr += num_bn_biases
519
+
520
+ bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
521
+ ptr += num_bn_biases
522
+
523
+ bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
524
+ ptr += num_bn_biases
525
+
526
+ bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases])
527
+ ptr += num_bn_biases
528
+
529
+ #Cast the loaded weights into dims of model weights.
530
+ bn_biases = bn_biases.view_as(bn.bias.data)
531
+ bn_weights = bn_weights.view_as(bn.weight.data)
532
+ bn_running_mean = bn_running_mean.view_as(bn.running_mean)
533
+ bn_running_var = bn_running_var.view_as(bn.running_var)
534
+
535
+ #Copy the data to model
536
+ bn.bias.data.copy_(bn_biases)
537
+ bn.weight.data.copy_(bn_weights)
538
+ bn.running_mean.copy_(bn_running_mean)
539
+ bn.running_var.copy_(bn_running_var)
540
+
541
+ else:
542
+
543
+ #Number of biases
544
+ num_biases = conv.bias.numel()
545
+
546
+ #Load the weights
547
+ conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases])
548
+ ptr = ptr + num_biases
549
+
550
+ #reshape the loaded weights according to the dims of the model weights
551
+ conv_biases = conv_biases.view_as(conv.bias.data)
552
+
553
+ #Finally copy the data
554
+ conv.bias.data.copy_(conv_biases)
555
+
556
+ #Let us load the weights for the Convolutional layers
557
+ num_weights = conv.weight.numel()
558
+
559
+ #Do the same as above for weights
560
+ conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
561
+ ptr = ptr + num_weights
562
+
563
+ conv_weights = conv_weights.view_as(conv.weight.data)
564
+ conv.weight.data.copy_(conv_weights)
565
+
566
+
567
+ if __name__ == '__main__':
568
+
569
+ model = Darknet("yolov3.cfg")
570
+ model.load_weights_url("yolov3.weights")
571
+
572
+ CUDA = torch.cuda.is_available()
573
+
574
+ print(' cuda : ' , CUDA )
575
+
576
+ inp = get_test_input()
577
+
578
+ # if CUDA:
579
+
580
+ # model.cuda()
581
+ # inp.cuda()
582
+
583
+ pred = model( inp , False )
584
+
585
+ print (pred)
586
+ print( 'shape' , pred.shape )
yolo/data/coco.names ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorbike
5
+ aeroplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ sofa
59
+ pottedplant
60
+ bed
61
+ diningtable
62
+ toilet
63
+ tvmonitor
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
yolo/det/det_Poster3.jpg ADDED
yolo/det/det_dog-cycle-car.png ADDED
yolo/det/det_sample.jpeg ADDED
yolo/det/det_victoria.jpg ADDED
yolo/detector.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+ import time
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.autograd import Variable
6
+ import numpy as np
7
+ import cv2
8
+ from utils import *
9
+ import argparse
10
+ import os
11
+ import os.path as osp
12
+ from darknet import Darknet
13
+ # from preprocess import prep_image, inp_to_image
14
+ import pandas as pd
15
+ import random
16
+ import pickle as pkl
17
+ import itertools
18
+
19
+ class test_net(nn.Module):
20
+ def __init__(self, num_layers, input_size):
21
+ super(test_net, self).__init__()
22
+ self.num_layers= num_layers
23
+ self.linear_1 = nn.Linear(input_size, 5)
24
+ self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
25
+ self.output = nn.Linear(5,2)
26
+
27
+ def forward(self, x):
28
+ x = x.view(-1)
29
+ fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
30
+ return fwd(x)
31
+
32
+ def get_test_input(input_dim, CUDA):
33
+ img = cv2.imread("dog-cycle-car.png")
34
+ img = cv2.resize(img, (input_dim, input_dim))
35
+ img_ = img[:,:,::-1].transpose((2,0,1))
36
+ img_ = img_[np.newaxis,:,:,:]/255.0
37
+ img_ = torch.from_numpy(img_).float()
38
+ img_ = Variable(img_)
39
+
40
+ if CUDA:
41
+ img_ = img_.cuda()
42
+ num_classes
43
+ return img_
44
+
45
+
46
+
47
+ def arg_parse():
48
+ """
49
+ Parse arguements to the detect module
50
+
51
+ """
52
+
53
+
54
+ parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
55
+
56
+ parser.add_argument("--images", dest = 'images', help =
57
+ "Image / Directory containing images to perform detection upon",
58
+ default = "imgs", type = str)
59
+ parser.add_argument("--det", dest = 'det', help =
60
+ "Image / Directory to store detections to",
61
+ default = "det", type = str)
62
+ parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
63
+ parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
64
+ parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
65
+ parser.add_argument("--cfg", dest = 'cfgfile', help =
66
+ "Config file",
67
+ default = "cfg/yolov3.cfg", type = str)
68
+ parser.add_argument("--weights", dest = 'weightsfile', help =
69
+ "weightsfile",
70
+ default = "yolov3.weights", type = str)
71
+ parser.add_argument("--reso", dest = 'reso', help =
72
+ "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
73
+ default = "416", type = str)
74
+ parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
75
+ default = "1,2,3", type = str)
76
+
77
+ return parser.parse_args()
78
+
79
+ if __name__ == '__main__':
80
+ args = arg_parse()
81
+
82
+ scales = args.scales
83
+
84
+
85
+ # scales = [int(x) for x in scales.split(',')]
86
+ #
87
+ #
88
+ #
89
+ # args.reso = int(args.reso)
90
+ #
91
+ # num_boxes = [args.reso//32, args.reso//16, args.reso//8]
92
+ # scale_indices = [3*(x**2) for x in num_boxes]
93
+ # scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y))
94
+ #
95
+ #
96
+ # li = []
97
+ # i = 0
98
+ # for scale in scale_indices:
99
+ # li.extend(list(range(i, scale)))
100
+ # i = scale
101
+ #
102
+ # scale_indices = li
103
+
104
+ images = args.images
105
+ batch_size = int(args.bs)
106
+ confidence = float(args.confidence)
107
+ nms_thesh = float(args.nms_thresh)
108
+ start = 0
109
+
110
+ CUDA = torch.cuda.is_available()
111
+
112
+ num_classes = 80
113
+ classes = load_classes('data/coco.names')
114
+
115
+ #Set up the neural network
116
+ print("Loading network.....")
117
+ model = Darknet(args.cfgfile)
118
+ model.load_weights(args.weightsfile)
119
+ print("Network successfully loaded")
120
+
121
+ model.net_info["height"] = args.reso
122
+ inp_dim = int(model.net_info["height"])
123
+ assert inp_dim % 32 == 0
124
+ assert inp_dim > 32
125
+
126
+ #If there's a GPU availible, put the model on GPU
127
+ if CUDA:
128
+ model.cuda()
129
+
130
+
131
+ #Set the model in evaluation mode
132
+ model.eval()
133
+
134
+ read_dir = time.time()
135
+ #Detection phase
136
+ try:
137
+ imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
138
+ except NotADirectoryError:
139
+ imlist = []
140
+ imlist.append(osp.join(osp.realpath('.'), images))
141
+ except FileNotFoundError:
142
+ print ("No file or directory with the name {}".format(images))
143
+ exit()
144
+
145
+ if not os.path.exists(args.det):
146
+ os.makedirs(args.det)
147
+
148
+ load_batch = time.time()
149
+
150
+ batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
151
+ im_batches = [x[0] for x in batches]
152
+ orig_ims = [x[1] for x in batches]
153
+ im_dim_list = [x[2] for x in batches]
154
+ im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
155
+
156
+
157
+
158
+ if CUDA:
159
+ im_dim_list = im_dim_list.cuda()
160
+
161
+ leftover = 0
162
+
163
+ if (len(im_dim_list) % batch_size):
164
+ leftover = 1
165
+
166
+
167
+ if batch_size != 1:
168
+ num_batches = len(imlist) // batch_size + leftover
169
+ im_batches = [torch.cat((im_batches[i*batch_size : min((i + 1)*batch_size,
170
+ len(im_batches))])) for i in range(num_batches)]
171
+
172
+
173
+ i = 0
174
+
175
+
176
+ write = False
177
+ model(get_test_input(inp_dim, CUDA), CUDA)
178
+
179
+ start_det_loop = time.time()
180
+
181
+ objs = {}
182
+
183
+
184
+
185
+ for batch in im_batches:
186
+ #load the image
187
+ start = time.time()
188
+ if CUDA:
189
+ batch = batch.cuda()
190
+
191
+
192
+ #Apply offsets to the result predictions
193
+ #Tranform the predictions as described in the YOLO paper
194
+ #flatten the prediction vector
195
+ # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
196
+ # Put every proposed box as a row.
197
+ with torch.no_grad():
198
+ prediction = model(Variable(batch), CUDA)
199
+
200
+ # prediction = prediction[:,scale_indices]
201
+
202
+
203
+ #get the boxes with object confidence > threshold
204
+ #Convert the cordinates to absolute coordinates
205
+ #perform NMS on these boxes, and save the results
206
+ #I could have done NMS and saving seperately to have a better abstraction
207
+ #But both these operations require looping, hence
208
+ #clubbing these ops in one loop instead of two.
209
+ #loops are slower than vectorised operations.
210
+
211
+ prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
212
+
213
+
214
+ if type(prediction) == int:
215
+ i += 1
216
+ continue
217
+
218
+ end = time.time()
219
+
220
+ # print(end - start)
221
+
222
+ prediction[:,0] += i*batch_size
223
+
224
+ if not write:
225
+ output = prediction
226
+ write = 1
227
+ else:
228
+ output = torch.cat((output,prediction))
229
+
230
+
231
+
232
+
233
+ for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]):
234
+ im_id = i*batch_size + im_num
235
+ objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
236
+ print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
237
+ print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
238
+ print("----------------------------------------------------------")
239
+ i += 1
240
+
241
+
242
+ if CUDA:
243
+ torch.cuda.synchronize()
244
+
245
+ try:
246
+ output
247
+ except NameError:
248
+ print("No detections were made")
249
+ exit()
250
+
251
+ im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
252
+
253
+ scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
254
+
255
+
256
+ output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
257
+ output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
258
+
259
+
260
+
261
+ output[:,1:5] /= scaling_factor
262
+
263
+ for i in range(output.shape[0]):
264
+ output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
265
+ output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
266
+
267
+
268
+ output_recast = time.time()
269
+
270
+ class_load = time.time()
271
+
272
+ colors = pkl.load(open("pallete", "rb"))
273
+
274
+ draw = time.time()
275
+
276
+ def write(x, batches, results):
277
+ c1 = tuple(x[1:3].int())
278
+ c2 = tuple(x[3:5].int())
279
+ img = results[int(x[0])]
280
+
281
+ print( 'img' , int( x[0] ) )
282
+ print( 'cls' , int( x[-1] ) )
283
+
284
+ cls = int(x[-1])
285
+ label = "{0}".format(classes[cls])
286
+ color = random.choice(colors)
287
+ cv2.rectangle(img, c1, c2,color, 1)
288
+ t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
289
+ c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
290
+ cv2.rectangle(img, c1, c2,color, -1)
291
+ cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
292
+ return img
293
+
294
+
295
+ list(map(lambda x: write(x, im_batches, orig_ims), output))
296
+
297
+ det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
298
+
299
+ print('det_names ',det_names)
300
+ print('orig_ims ',orig_ims[0].shape)
301
+ print('output : ',output)
302
+
303
+ list(map(cv2.imwrite, det_names, orig_ims))
304
+
305
+ end = time.time()
306
+
307
+ print()
308
+ print("SUMMARY")
309
+ print("----------------------------------------------------------")
310
+ print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
311
+ print()
312
+ print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
313
+ print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
314
+ print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop))
315
+ print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
316
+ print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
317
+ print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
318
+ print("----------------------------------------------------------")
319
+
320
+
321
+ torch.cuda.empty_cache()
yolo/dog-cycle-car.png ADDED
yolo/model.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+ import time
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.autograd import Variable
6
+ import numpy as np
7
+ import cv2
8
+ from yolo.utils import *
9
+ import argparse
10
+ import os
11
+ import os.path as osp
12
+ from yolo.darknet import Darknet
13
+ # from preprocess import prep_image, inp_to_image
14
+ import pandas as pd
15
+ import random
16
+ import pickle as pkl
17
+ import itertools
18
+ import os
19
+ import base64
20
+ from PIL import Image
21
+ from io import BytesIO
22
+
23
+ class yolo_model():
24
+
25
+
26
+ batch_size = int(1)
27
+ confidence = float(0.5)
28
+ nms_thesh = float(0.4)
29
+ reso = 416
30
+ start = 0
31
+
32
+ CUDA = torch.cuda.is_available()
33
+
34
+ num_classes = 80
35
+
36
+
37
+ def __init__(self):
38
+
39
+ self.classes = load_classes( os.path.join( 'yolo' , 'data', 'coco.names' ) )
40
+
41
+ # self.colors = pkl.load( get_data_s3( "pallete" ) )
42
+
43
+ # Set up the neural network
44
+
45
+ self.model = Darknet( os.path.join( 'yolo' , 'yolov3-tiny.cfg' ) )
46
+ self.model.load_weights( os.path.join( 'yolo' , 'yolov3-tiny.weights' ) )
47
+ print(' [*] Model Loaded Successfuly')
48
+
49
+ # set model resolution
50
+
51
+ self.model.net_info["height"] = self.reso
52
+ self.inp_dim = int(self.model.net_info["height"])
53
+
54
+ assert self.inp_dim % 32 == 0
55
+ assert self.inp_dim > 32
56
+
57
+ # If there's a GPU availible, put the model on GPU
58
+ if self.CUDA:
59
+ self.model.cuda()
60
+
61
+ # Set the model in evaluation mode
62
+ self.model.eval()
63
+
64
+ def write( self , x , batches , results , colors=[] ):
65
+ c1 = tuple(x[1:3].int())
66
+ c2 = tuple(x[3:5].int())
67
+ img = results[int(x[0])]
68
+
69
+ print( 'img' , int( x[0] ) )
70
+ print( 'cls' , int( x[-1] ) )
71
+
72
+ cls = int(x[-1])
73
+ label = "{0}".format(self.classes[cls])
74
+ color = random.choice(colors)
75
+ cv2.rectangle(img, c1, c2,color, 1)
76
+ t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
77
+ c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
78
+ cv2.rectangle(img, c1, c2,color, -1)
79
+ cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
80
+ return img
81
+
82
+ def img_to_base64_str(self,img):
83
+ buffered = BytesIO()
84
+ img.save(buffered, format="PNG")
85
+ buffered.seek(0)
86
+ img_byte = buffered.getvalue()
87
+ img_str = "data:image/png;base64," + base64.b64encode(img_byte).decode()
88
+ return img_str
89
+
90
+
91
+ def predict( self , image ):
92
+
93
+ imlist = []
94
+ imlist.append( image )
95
+
96
+ batches = list( map( prep_image_org , imlist , [ self.inp_dim for x in range( len(imlist) ) ] ) )
97
+ im_batches = [x[0] for x in batches]
98
+ orig_ims = [x[1] for x in batches]
99
+ im_dim_list = [x[2] for x in batches]
100
+
101
+ print( 'im_dim_list : ' , im_dim_list )
102
+
103
+ im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
104
+
105
+ if self.CUDA:
106
+ im_dim_list = im_dim_list.cuda()
107
+
108
+ print('im_batches' , len(im_batches))
109
+
110
+ batch = im_batches[0]
111
+
112
+ if self.CUDA:
113
+ batch = batch.cuda()
114
+
115
+
116
+ #Apply offsets to the result predictions
117
+ #Tranform the predictions as described in the YOLO paper
118
+ #flatten the prediction vector
119
+ # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
120
+ # Put every proposed box as a row.
121
+ with torch.no_grad():
122
+ prediction = self.model(Variable(batch), self.CUDA)
123
+
124
+ # prediction = prediction[:,scale_indices]
125
+
126
+
127
+ #get the boxes with object confidence > threshold
128
+ #Convert the cordinates to absolute coordinates
129
+ #perform NMS on these boxes, and save the results
130
+ #I could have done NMS and saving seperately to have a better abstraction
131
+ #But both these operations require looping, hence
132
+ #clubbing these ops in one loop instead of two.
133
+ #loops are slower than vectorised operations.
134
+
135
+ prediction = write_results(prediction, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)
136
+
137
+ end = time.time()
138
+
139
+ # print(end - start)
140
+
141
+ # prediction[:,0] += i*batch_size
142
+
143
+ output = prediction
144
+
145
+ # 1, 1, 1
146
+ # print( 'enumerate : ' , batch_size , len(imlist) , min( batch_size , len(imlist) ) )
147
+
148
+ for im_num, image in enumerate( imlist ):
149
+ im_id = im_num
150
+ objs = [self.classes[int(x[-1])] for x in output if int(x[0]) == im_id]
151
+ # print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - self.start)/self.batch_size))
152
+ print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
153
+ print("----------------------------------------------------------")
154
+
155
+ im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
156
+
157
+ scaling_factor = torch.min(self.inp_dim/im_dim_list,1)[0].view(-1,1)
158
+
159
+ output[:,[1,3]] -= (self.inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
160
+ output[:,[2,4]] -= (self.inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
161
+
162
+ output[:,1:5] /= scaling_factor
163
+
164
+ for i in range(output.shape[0]):
165
+ output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
166
+ output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
167
+
168
+ colors = pkl.load( open( "yolo/pallete", "rb") )
169
+
170
+ list(map(lambda x: self.write( x , im_batches , orig_ims , colors=colors ) , output ) )
171
+
172
+ print('orig_ims : shape ',orig_ims[0].shape)
173
+ # print('orig_ims : ',orig_ims[0])
174
+
175
+ output_image = Image.fromarray(orig_ims[0])
176
+
177
+ img_str = self.img_to_base64_str(output_image)
178
+
179
+ # im_bytes = orig_ims[0].tobytes()
180
+ # im_b64 = base64.b64encode(im_bytes)
181
+
182
+ # im_b64 = im_b64.decode('utf-8')
183
+
184
+ # print( 'im_b64' , im_b64 )
185
+
186
+ payload = dict({ 'image' : img_str , 'objects' : objs })
187
+
188
+ return payload,output_image
189
+
yolo/pallete ADDED
Binary file (908 Bytes). View file
 
yolo/sample.jpeg ADDED
yolo/sample.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import boto3
4
+ from io import BytesIO , StringIO
5
+ import pickle as pkl
6
+ from utils import *
7
+
8
+ def get_data_s3(filename):
9
+
10
+ ACCESS_KEY = "AKIAUKUH7S3OIVOEIRWY"
11
+ SECRET_KEY = "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"
12
+ bucket = "root-models"
13
+
14
+ s3 = boto3.client( "s3" , aws_access_key_id=ACCESS_KEY , aws_secret_access_key=SECRET_KEY )
15
+
16
+ response = s3.get_object(Bucket=bucket, Key=filename)
17
+
18
+ data = BytesIO( response["Body"].read() )
19
+
20
+ return data
21
+
22
+
23
+ def parse_cfg_url(filename='yolov3.cfg'):
24
+
25
+ data = get_data_s3(filename)
26
+
27
+ lines = data.getvalue().decode().rstrip().lstrip().split('\n') #store the lines in a list
28
+ lines = [x.rstrip().lstrip() for x in lines]
29
+
30
+ lines = [x for x in lines if len(x) > 0] #get read of the empty lines
31
+ lines = [x for x in lines if x[0] != '#']
32
+ lines = [x.rstrip().lstrip() for x in lines]
33
+
34
+
35
+ block = {}
36
+ blocks = []
37
+
38
+ for line in lines:
39
+ # print('line:' , line)
40
+ if line[0] == "[": #This marks the start of a new block
41
+ if len(block) != 0:
42
+ blocks.append(block)
43
+ block = {}
44
+ block["type"] = line[1:-1].rstrip()
45
+ else:
46
+ key,value = line.split("=")
47
+ block[key.rstrip()] = value.lstrip()
48
+ blocks.append(block)
49
+
50
+ # print('blocks : 2 ' , blocks )
51
+
52
+ return blocks
53
+
54
+ if __name__ == '__main__':
55
+ # parse_cfg('yolov3.cfg')
56
+
57
+ # parse_cfg_url('yolov3.cfg')
58
+
59
+ # colors = pkl.load( open( "pallete", "rb") )
60
+
61
+ # print(colors)
62
+
63
+ # print()
64
+
65
+ # colors = pkl.load( get_data_s3( "pallete" ) )
66
+
67
+ # print(colors)
68
+
69
+ classes = load_classes('data/coco.names')
70
+
71
+ print( classes )
72
+
73
+ print()
74
+
75
+ classes = load_classes_url('coco.names')
76
+
77
+ print( classes )
yolo/test.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+ import time
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.autograd import Variable
6
+ import numpy as np
7
+ import cv2
8
+ from utils import *
9
+ import argparse
10
+ import os
11
+ import os.path as osp
12
+ from darknet import Darknet
13
+ # from preprocess import prep_image, inp_to_image
14
+ import pandas as pd
15
+ import random
16
+ import pickle as pkl
17
+ import itertools
18
+ import os
19
+
20
+ if __name__ == '__main__':
21
+
22
+ images = os.path.join('victoria.jpg')
23
+
24
+ batch_size = int(1)
25
+ confidence = float(0.5)
26
+ nms_thesh = float(0.4)
27
+ reso = 416
28
+ start = 0
29
+
30
+ CUDA = torch.cuda.is_available()
31
+
32
+ num_classes = 80
33
+ classes = load_classes('data/coco.names')
34
+
35
+ #Set up the neural network
36
+
37
+ model = Darknet("yolov3.cfg")
38
+ model.load_weights("yolov3.weights")
39
+ print(' [*] Model Loaded Successfuly')
40
+
41
+ # set model resolution
42
+
43
+ model.net_info["height"] = reso
44
+ inp_dim = int(model.net_info["height"])
45
+
46
+ assert inp_dim % 32 == 0
47
+ assert inp_dim > 32
48
+
49
+ # If there's a GPU availible, put the model on GPU
50
+ if CUDA:
51
+ model.cuda()
52
+
53
+ # Set the model in evaluation mode
54
+ model.eval()
55
+
56
+ imlist = []
57
+ imlist.append( osp.join(osp.realpath('.') , images) )
58
+
59
+ batches = list( map( prep_image , imlist , [ inp_dim for x in range( len(imlist) ) ] ) )
60
+ im_batches = [x[0] for x in batches]
61
+ orig_ims = [x[1] for x in batches]
62
+ im_dim_list = [x[2] for x in batches]
63
+
64
+ print( 'im_dim_list : ' , im_dim_list )
65
+
66
+ im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
67
+
68
+ print( 'im_dim_list : after' , im_dim_list )
69
+
70
+ if CUDA:
71
+ im_dim_list = im_dim_list.cuda()
72
+
73
+ print('im_batches' , len(im_batches))
74
+
75
+ batch = im_batches[0]
76
+
77
+ if CUDA:
78
+ batch = batch.cuda()
79
+
80
+
81
+ #Apply offsets to the result predictions
82
+ #Tranform the predictions as described in the YOLO paper
83
+ #flatten the prediction vector
84
+ # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
85
+ # Put every proposed box as a row.
86
+ with torch.no_grad():
87
+ prediction = model(Variable(batch), CUDA)
88
+
89
+ # prediction = prediction[:,scale_indices]
90
+
91
+
92
+ #get the boxes with object confidence > threshold
93
+ #Convert the cordinates to absolute coordinates
94
+ #perform NMS on these boxes, and save the results
95
+ #I could have done NMS and saving seperately to have a better abstraction
96
+ #But both these operations require looping, hence
97
+ #clubbing these ops in one loop instead of two.
98
+ #loops are slower than vectorised operations.
99
+
100
+ prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
101
+
102
+
103
+ # if type(prediction) == int:
104
+ # continue
105
+
106
+ end = time.time()
107
+
108
+ # print(end - start)
109
+
110
+ # prediction[:,0] += i*batch_size
111
+
112
+ output = prediction
113
+
114
+ # 1, 1, 1
115
+ # print( 'enumerate : ' , batch_size , len(imlist) , min( batch_size , len(imlist) ) )
116
+
117
+ for im_num, image in enumerate( imlist ):
118
+ im_id = im_num
119
+ objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
120
+ print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
121
+ print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
122
+ print("----------------------------------------------------------")
123
+
124
+ im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
125
+
126
+ scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
127
+
128
+ output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
129
+ output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
130
+
131
+ output[:,1:5] /= scaling_factor
132
+
133
+ for i in range(output.shape[0]):
134
+ output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
135
+ output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
136
+
137
+ colors = pkl.load(open("pallete", "rb"))
138
+
139
+ def write(x, batches, results):
140
+ c1 = tuple(x[1:3].int())
141
+ c2 = tuple(x[3:5].int())
142
+ img = results[int(x[0])]
143
+
144
+ print( 'img' , int( x[0] ) )
145
+ print( 'cls' , int( x[-1] ) )
146
+
147
+ cls = int(x[-1])
148
+ label = "{0}".format(classes[cls])
149
+ color = random.choice(colors)
150
+ cv2.rectangle(img, c1, c2,color, 1)
151
+ t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
152
+ c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
153
+ cv2.rectangle(img, c1, c2,color, -1)
154
+ cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
155
+ return img
156
+
157
+
158
+ list(map(lambda x: write(x, im_batches, orig_ims), output))
159
+
160
+ det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format('det',x.split("/")[-1]))
161
+
162
+ print('det_names ',det_names)
163
+ print('orig_ims ',orig_ims[0].shape)
164
+ print('output : ',output)
165
+
166
+ list(map(cv2.imwrite, det_names, orig_ims))
yolo/utils.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from torch.autograd import Variable
7
+ import numpy as np
8
+ import cv2
9
+ import boto3
10
+ from io import BytesIO
11
+
12
+ def get_data_s3(filename):
13
+
14
+ ACCESS_KEY = "AKIAUKUH7S3OIVOEIRWY"
15
+ SECRET_KEY = "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"
16
+ bucket = "root-models"
17
+
18
+ s3 = boto3.client( "s3" , aws_access_key_id=ACCESS_KEY , aws_secret_access_key=SECRET_KEY )
19
+
20
+ response = s3.get_object(Bucket=bucket, Key=filename)
21
+
22
+ data = BytesIO( response["Body"].read() )
23
+
24
+ return data
25
+
26
+ def parse_cfg_url(filename='yolov3.cfg'):
27
+
28
+ data = get_data_s3(filename)
29
+
30
+ lines = data.getvalue().decode().rstrip().lstrip().split('\n') #store the lines in a list
31
+ lines = [x.rstrip().lstrip() for x in lines]
32
+
33
+ lines = [x for x in lines if len(x) > 0] #get read of the empty lines
34
+ lines = [x for x in lines if x[0] != '#']
35
+ lines = [x.rstrip().lstrip() for x in lines]
36
+
37
+
38
+ block = {}
39
+ blocks = []
40
+
41
+ for line in lines:
42
+ # print('line:' , line)
43
+ if line[0] == "[": #This marks the start of a new block
44
+ if len(block) != 0:
45
+ blocks.append(block)
46
+ block = {}
47
+ block["type"] = line[1:-1].rstrip()
48
+ else:
49
+ key,value = line.split("=")
50
+ block[key.rstrip()] = value.lstrip()
51
+ blocks.append(block)
52
+
53
+ # print('blocks : 2 ' , blocks )
54
+
55
+ return blocks
56
+
57
+
58
+
59
+ def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
60
+ batch_size = prediction.size(0)
61
+ stride = inp_dim // prediction.size(2)
62
+ grid_size = inp_dim // stride
63
+ bbox_attrs = 5 + num_classes
64
+ num_anchors = len(anchors)
65
+
66
+ anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
67
+
68
+
69
+
70
+ prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
71
+ prediction = prediction.transpose(1,2).contiguous()
72
+ prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
73
+
74
+
75
+ #Sigmoid the centre_X, centre_Y. and object confidencce
76
+ prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
77
+ prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
78
+ prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
79
+
80
+
81
+
82
+ #Add the center offsets
83
+ grid_len = np.arange(grid_size)
84
+ a,b = np.meshgrid(grid_len, grid_len)
85
+
86
+ x_offset = torch.FloatTensor(a).view(-1,1)
87
+ y_offset = torch.FloatTensor(b).view(-1,1)
88
+
89
+ if CUDA:
90
+ x_offset = x_offset.cuda()
91
+ y_offset = y_offset.cuda()
92
+
93
+ x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
94
+
95
+ prediction[:,:,:2] += x_y_offset
96
+
97
+ #log space transform height and the width
98
+ anchors = torch.FloatTensor(anchors)
99
+
100
+ if CUDA:
101
+ anchors = anchors.cuda()
102
+
103
+ anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
104
+ prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
105
+
106
+ #Softmax the class scores
107
+ prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
108
+
109
+ prediction[:,:,:4] *= stride
110
+
111
+
112
+ return prediction
113
+
114
+ def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
115
+ conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
116
+ prediction = prediction*conf_mask
117
+
118
+
119
+ try:
120
+ ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
121
+ except:
122
+ return 0
123
+
124
+
125
+ box_a = prediction.new(prediction.shape)
126
+ box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
127
+ box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
128
+ box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
129
+ box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
130
+ prediction[:,:,:4] = box_a[:,:,:4]
131
+
132
+
133
+
134
+ batch_size = prediction.size(0)
135
+
136
+ output = prediction.new(1, prediction.size(2) + 1)
137
+ write = False
138
+
139
+
140
+ for ind in range(batch_size):
141
+ #select the image from the batch
142
+ image_pred = prediction[ind]
143
+
144
+
145
+
146
+ #Get the class having maximum score, and the index of that class
147
+ #Get rid of num_classes softmax scores
148
+ #Add the class index and the class score of class having maximum score
149
+ max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
150
+ max_conf = max_conf.float().unsqueeze(1)
151
+ max_conf_score = max_conf_score.float().unsqueeze(1)
152
+ seq = (image_pred[:,:5], max_conf, max_conf_score)
153
+ image_pred = torch.cat(seq, 1)
154
+
155
+
156
+
157
+ #Get rid of the zero entries
158
+ non_zero_ind = (torch.nonzero(image_pred[:,4]))
159
+
160
+
161
+ image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
162
+
163
+ #Get the various classes detected in the image
164
+ try:
165
+ img_classes = unique(image_pred_[:,-1])
166
+ except:
167
+ continue
168
+ #WE will do NMS classwise
169
+ for cls in img_classes:
170
+ #get the detections with one particular class
171
+ cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
172
+ class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
173
+
174
+
175
+ image_pred_class = image_pred_[class_mask_ind].view(-1,7)
176
+
177
+
178
+
179
+ #sort the detections such that the entry with the maximum objectness
180
+ #confidence is at the top
181
+ conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
182
+ image_pred_class = image_pred_class[conf_sort_index]
183
+ idx = image_pred_class.size(0)
184
+
185
+ #if nms has to be done
186
+ if nms:
187
+ #For each detection
188
+ for i in range(idx):
189
+ #Get the IOUs of all boxes that come after the one we are looking at
190
+ #in the loop
191
+ try:
192
+ ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
193
+ except ValueError:
194
+ break
195
+
196
+ except IndexError:
197
+ break
198
+
199
+ #Zero out all the detections that have IoU > treshhold
200
+ iou_mask = (ious < nms_conf).float().unsqueeze(1)
201
+ image_pred_class[i+1:] *= iou_mask
202
+
203
+ #Remove the non-zero entries
204
+ non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
205
+ image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
206
+
207
+
208
+
209
+ #Concatenate the batch_id of the image to the detection
210
+ #this helps us identify which image does the detection correspond to
211
+ #We use a linear straucture to hold ALL the detections from the batch
212
+ #the batch_dim is flattened
213
+ #batch is identified by extra batch column
214
+
215
+
216
+ batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
217
+ seq = batch_ind, image_pred_class
218
+ if not write:
219
+ output = torch.cat(seq,1)
220
+ write = True
221
+ else:
222
+ out = torch.cat(seq,1)
223
+ output = torch.cat((output,out))
224
+
225
+ try:
226
+ return output
227
+ except:
228
+ return 0
229
+
230
+ def unique(tensor):
231
+ tensor_np = tensor.cpu().numpy()
232
+ unique_np = np.unique(tensor_np)
233
+ unique_tensor = torch.from_numpy(unique_np)
234
+
235
+ tensor_res = tensor.new(unique_tensor.shape)
236
+ tensor_res.copy_(unique_tensor)
237
+ return tensor_res
238
+
239
+ def load_classes_url(namesfile):
240
+ fp = get_data_s3(namesfile)
241
+ names = fp.getvalue().decode().split("\n")[:-1]
242
+ return names
243
+
244
+
245
+ def load_classes(namesfile):
246
+ fp = open(namesfile, "r")
247
+ names = fp.read().split("\n")[:-1]
248
+ return names
249
+
250
+ def bbox_iou(box1, box2):
251
+ """
252
+ Returns the IoU of two bounding boxes
253
+
254
+
255
+ """
256
+ #Get the coordinates of bounding boxes
257
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
258
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
259
+
260
+ #get the corrdinates of the intersection rectangle
261
+ inter_rect_x1 = torch.max(b1_x1, b2_x1)
262
+ inter_rect_y1 = torch.max(b1_y1, b2_y1)
263
+ inter_rect_x2 = torch.min(b1_x2, b2_x2)
264
+ inter_rect_y2 = torch.min(b1_y2, b2_y2)
265
+
266
+ #Intersection area
267
+ if torch.cuda.is_available():
268
+ inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
269
+ else:
270
+ inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
271
+
272
+ #Union Area
273
+ b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
274
+ b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
275
+
276
+ iou = inter_area / (b1_area + b2_area - inter_area)
277
+
278
+ return iou
279
+
280
+ def letterbox_image(img, inp_dim):
281
+ '''resize image with unchanged aspect ratio using padding'''
282
+ img_w, img_h = img.shape[1], img.shape[0]
283
+ w, h = inp_dim
284
+ new_w = int(img_w * min(w/img_w, h/img_h))
285
+ new_h = int(img_h * min(w/img_w, h/img_h))
286
+ resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
287
+
288
+ canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
289
+
290
+ canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
291
+
292
+ return canvas
293
+
294
+
295
+ def prep_image_org(orig_im, inp_dim):
296
+ """
297
+ Prepare image for inputting to the neural network.
298
+
299
+ Returns a Variable
300
+ """
301
+
302
+ # orig_im = cv2.imread(img)
303
+ dim = orig_im.shape[1], orig_im.shape[0]
304
+ img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
305
+ img_ = img[:,:,::-1].transpose((2,0,1)).copy()
306
+ img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
307
+ return img_, orig_im, dim
308
+
309
+
310
+
311
+ def prep_image(img, inp_dim):
312
+ """
313
+ Prepare image for inputting to the neural network.
314
+
315
+ Returns a Variable
316
+ """
317
+
318
+ orig_im = cv2.imread(img)
319
+ dim = orig_im.shape[1], orig_im.shape[0]
320
+ img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
321
+ img_ = img[:,:,::-1].transpose((2,0,1)).copy()
322
+ img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
323
+ return img_, orig_im, dim
324
+
yolo/victoria.jpg ADDED
yolo/yolov3-tiny.cfg ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ batch=1
4
+ subdivisions=1
5
+ # Training
6
+ # batch=64
7
+ # subdivisions=2
8
+ width=416
9
+ height=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=16
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ [maxpool]
34
+ size=2
35
+ stride=2
36
+
37
+ [convolutional]
38
+ batch_normalize=1
39
+ filters=32
40
+ size=3
41
+ stride=1
42
+ pad=1
43
+ activation=leaky
44
+
45
+ [maxpool]
46
+ size=2
47
+ stride=2
48
+
49
+ [convolutional]
50
+ batch_normalize=1
51
+ filters=64
52
+ size=3
53
+ stride=1
54
+ pad=1
55
+ activation=leaky
56
+
57
+ [maxpool]
58
+ size=2
59
+ stride=2
60
+
61
+ [convolutional]
62
+ batch_normalize=1
63
+ filters=128
64
+ size=3
65
+ stride=1
66
+ pad=1
67
+ activation=leaky
68
+
69
+ [maxpool]
70
+ size=2
71
+ stride=2
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=256
76
+ size=3
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [maxpool]
82
+ size=2
83
+ stride=2
84
+
85
+ [convolutional]
86
+ batch_normalize=1
87
+ filters=512
88
+ size=3
89
+ stride=1
90
+ pad=1
91
+ activation=leaky
92
+
93
+ [maxpool]
94
+ size=2
95
+ stride=1
96
+
97
+ [convolutional]
98
+ batch_normalize=1
99
+ filters=1024
100
+ size=3
101
+ stride=1
102
+ pad=1
103
+ activation=leaky
104
+
105
+ ###########
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ filters=256
110
+ size=1
111
+ stride=1
112
+ pad=1
113
+ activation=leaky
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=512
118
+ size=3
119
+ stride=1
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ size=1
125
+ stride=1
126
+ pad=1
127
+ filters=255
128
+ activation=linear
129
+
130
+
131
+
132
+ [yolo]
133
+ mask = 3,4,5
134
+ anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135
+ classes=80
136
+ num=6
137
+ jitter=.3
138
+ ignore_thresh = .7
139
+ truth_thresh = 1
140
+ random=1
141
+
142
+ [route]
143
+ layers = -4
144
+
145
+ [convolutional]
146
+ batch_normalize=1
147
+ filters=128
148
+ size=1
149
+ stride=1
150
+ pad=1
151
+ activation=leaky
152
+
153
+ [upsample]
154
+ stride=2
155
+
156
+ [route]
157
+ layers = -1, 8
158
+
159
+ [convolutional]
160
+ batch_normalize=1
161
+ filters=256
162
+ size=3
163
+ stride=1
164
+ pad=1
165
+ activation=leaky
166
+
167
+ [convolutional]
168
+ size=1
169
+ stride=1
170
+ pad=1
171
+ filters=255
172
+ activation=linear
173
+
174
+ [yolo]
175
+ mask = 0,1,2
176
+ anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177
+ classes=80
178
+ num=6
179
+ jitter=.3
180
+ ignore_thresh = .7
181
+ truth_thresh = 1
182
+ random=1
yolo/yolov3-tiny.weights ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccea06f59b781ec1234ddf8d1e94b9519a97f4245748a7d4db75d5b7080a42c
3
+ size 35434956
yolo/yolov3.cfg ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ # batch=1
4
+ # subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=16
8
+ width=624
9
+ height=624
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ # Downsample
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=2
40
+ pad=1
41
+ activation=leaky
42
+
43
+ [convolutional]
44
+ batch_normalize=1
45
+ filters=32
46
+ size=1
47
+ stride=1
48
+ pad=1
49
+ activation=leaky
50
+
51
+ [convolutional]
52
+ batch_normalize=1
53
+ filters=64
54
+ size=3
55
+ stride=1
56
+ pad=1
57
+ activation=leaky
58
+
59
+ [shortcut]
60
+ from=-3
61
+ activation=linear
62
+
63
+ # Downsample
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=2
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=64
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=leaky
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=64
96
+ size=1
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [convolutional]
102
+ batch_normalize=1
103
+ filters=128
104
+ size=3
105
+ stride=1
106
+ pad=1
107
+ activation=leaky
108
+
109
+ [shortcut]
110
+ from=-3
111
+ activation=linear
112
+
113
+ # Downsample
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=256
118
+ size=3
119
+ stride=2
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ batch_normalize=1
125
+ filters=128
126
+ size=1
127
+ stride=1
128
+ pad=1
129
+ activation=leaky
130
+
131
+ [convolutional]
132
+ batch_normalize=1
133
+ filters=256
134
+ size=3
135
+ stride=1
136
+ pad=1
137
+ activation=leaky
138
+
139
+ [shortcut]
140
+ from=-3
141
+ activation=linear
142
+
143
+ [convolutional]
144
+ batch_normalize=1
145
+ filters=128
146
+ size=1
147
+ stride=1
148
+ pad=1
149
+ activation=leaky
150
+
151
+ [convolutional]
152
+ batch_normalize=1
153
+ filters=256
154
+ size=3
155
+ stride=1
156
+ pad=1
157
+ activation=leaky
158
+
159
+ [shortcut]
160
+ from=-3
161
+ activation=linear
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=128
166
+ size=1
167
+ stride=1
168
+ pad=1
169
+ activation=leaky
170
+
171
+ [convolutional]
172
+ batch_normalize=1
173
+ filters=256
174
+ size=3
175
+ stride=1
176
+ pad=1
177
+ activation=leaky
178
+
179
+ [shortcut]
180
+ from=-3
181
+ activation=linear
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=128
186
+ size=1
187
+ stride=1
188
+ pad=1
189
+ activation=leaky
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=256
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ activation=leaky
198
+
199
+ [shortcut]
200
+ from=-3
201
+ activation=linear
202
+
203
+
204
+ [convolutional]
205
+ batch_normalize=1
206
+ filters=128
207
+ size=1
208
+ stride=1
209
+ pad=1
210
+ activation=leaky
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=256
215
+ size=3
216
+ stride=1
217
+ pad=1
218
+ activation=leaky
219
+
220
+ [shortcut]
221
+ from=-3
222
+ activation=linear
223
+
224
+ [convolutional]
225
+ batch_normalize=1
226
+ filters=128
227
+ size=1
228
+ stride=1
229
+ pad=1
230
+ activation=leaky
231
+
232
+ [convolutional]
233
+ batch_normalize=1
234
+ filters=256
235
+ size=3
236
+ stride=1
237
+ pad=1
238
+ activation=leaky
239
+
240
+ [shortcut]
241
+ from=-3
242
+ activation=linear
243
+
244
+ [convolutional]
245
+ batch_normalize=1
246
+ filters=128
247
+ size=1
248
+ stride=1
249
+ pad=1
250
+ activation=leaky
251
+
252
+ [convolutional]
253
+ batch_normalize=1
254
+ filters=256
255
+ size=3
256
+ stride=1
257
+ pad=1
258
+ activation=leaky
259
+
260
+ [shortcut]
261
+ from=-3
262
+ activation=linear
263
+
264
+ [convolutional]
265
+ batch_normalize=1
266
+ filters=128
267
+ size=1
268
+ stride=1
269
+ pad=1
270
+ activation=leaky
271
+
272
+ [convolutional]
273
+ batch_normalize=1
274
+ filters=256
275
+ size=3
276
+ stride=1
277
+ pad=1
278
+ activation=leaky
279
+
280
+ [shortcut]
281
+ from=-3
282
+ activation=linear
283
+
284
+ # Downsample
285
+
286
+ [convolutional]
287
+ batch_normalize=1
288
+ filters=512
289
+ size=3
290
+ stride=2
291
+ pad=1
292
+ activation=leaky
293
+
294
+ [convolutional]
295
+ batch_normalize=1
296
+ filters=256
297
+ size=1
298
+ stride=1
299
+ pad=1
300
+ activation=leaky
301
+
302
+ [convolutional]
303
+ batch_normalize=1
304
+ filters=512
305
+ size=3
306
+ stride=1
307
+ pad=1
308
+ activation=leaky
309
+
310
+ [shortcut]
311
+ from=-3
312
+ activation=linear
313
+
314
+
315
+ [convolutional]
316
+ batch_normalize=1
317
+ filters=256
318
+ size=1
319
+ stride=1
320
+ pad=1
321
+ activation=leaky
322
+
323
+ [convolutional]
324
+ batch_normalize=1
325
+ filters=512
326
+ size=3
327
+ stride=1
328
+ pad=1
329
+ activation=leaky
330
+
331
+ [shortcut]
332
+ from=-3
333
+ activation=linear
334
+
335
+
336
+ [convolutional]
337
+ batch_normalize=1
338
+ filters=256
339
+ size=1
340
+ stride=1
341
+ pad=1
342
+ activation=leaky
343
+
344
+ [convolutional]
345
+ batch_normalize=1
346
+ filters=512
347
+ size=3
348
+ stride=1
349
+ pad=1
350
+ activation=leaky
351
+
352
+ [shortcut]
353
+ from=-3
354
+ activation=linear
355
+
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=256
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=leaky
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=512
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=leaky
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=256
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=leaky
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=512
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=leaky
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+
398
+ [convolutional]
399
+ batch_normalize=1
400
+ filters=256
401
+ size=1
402
+ stride=1
403
+ pad=1
404
+ activation=leaky
405
+
406
+ [convolutional]
407
+ batch_normalize=1
408
+ filters=512
409
+ size=3
410
+ stride=1
411
+ pad=1
412
+ activation=leaky
413
+
414
+ [shortcut]
415
+ from=-3
416
+ activation=linear
417
+
418
+
419
+ [convolutional]
420
+ batch_normalize=1
421
+ filters=256
422
+ size=1
423
+ stride=1
424
+ pad=1
425
+ activation=leaky
426
+
427
+ [convolutional]
428
+ batch_normalize=1
429
+ filters=512
430
+ size=3
431
+ stride=1
432
+ pad=1
433
+ activation=leaky
434
+
435
+ [shortcut]
436
+ from=-3
437
+ activation=linear
438
+
439
+ [convolutional]
440
+ batch_normalize=1
441
+ filters=256
442
+ size=1
443
+ stride=1
444
+ pad=1
445
+ activation=leaky
446
+
447
+ [convolutional]
448
+ batch_normalize=1
449
+ filters=512
450
+ size=3
451
+ stride=1
452
+ pad=1
453
+ activation=leaky
454
+
455
+ [shortcut]
456
+ from=-3
457
+ activation=linear
458
+
459
+ # Downsample
460
+
461
+ [convolutional]
462
+ batch_normalize=1
463
+ filters=1024
464
+ size=3
465
+ stride=2
466
+ pad=1
467
+ activation=leaky
468
+
469
+ [convolutional]
470
+ batch_normalize=1
471
+ filters=512
472
+ size=1
473
+ stride=1
474
+ pad=1
475
+ activation=leaky
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=1024
480
+ size=3
481
+ stride=1
482
+ pad=1
483
+ activation=leaky
484
+
485
+ [shortcut]
486
+ from=-3
487
+ activation=linear
488
+
489
+ [convolutional]
490
+ batch_normalize=1
491
+ filters=512
492
+ size=1
493
+ stride=1
494
+ pad=1
495
+ activation=leaky
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=1024
500
+ size=3
501
+ stride=1
502
+ pad=1
503
+ activation=leaky
504
+
505
+ [shortcut]
506
+ from=-3
507
+ activation=linear
508
+
509
+ [convolutional]
510
+ batch_normalize=1
511
+ filters=512
512
+ size=1
513
+ stride=1
514
+ pad=1
515
+ activation=leaky
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=1024
520
+ size=3
521
+ stride=1
522
+ pad=1
523
+ activation=leaky
524
+
525
+ [shortcut]
526
+ from=-3
527
+ activation=linear
528
+
529
+ [convolutional]
530
+ batch_normalize=1
531
+ filters=512
532
+ size=1
533
+ stride=1
534
+ pad=1
535
+ activation=leaky
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=1024
540
+ size=3
541
+ stride=1
542
+ pad=1
543
+ activation=leaky
544
+
545
+ [shortcut]
546
+ from=-3
547
+ activation=linear
548
+
549
+ ######################
550
+
551
+ [convolutional]
552
+ batch_normalize=1
553
+ filters=512
554
+ size=1
555
+ stride=1
556
+ pad=1
557
+ activation=leaky
558
+
559
+ [convolutional]
560
+ batch_normalize=1
561
+ size=3
562
+ stride=1
563
+ pad=1
564
+ filters=1024
565
+ activation=leaky
566
+
567
+ [convolutional]
568
+ batch_normalize=1
569
+ filters=512
570
+ size=1
571
+ stride=1
572
+ pad=1
573
+ activation=leaky
574
+
575
+ [convolutional]
576
+ batch_normalize=1
577
+ size=3
578
+ stride=1
579
+ pad=1
580
+ filters=1024
581
+ activation=leaky
582
+
583
+ [convolutional]
584
+ batch_normalize=1
585
+ filters=512
586
+ size=1
587
+ stride=1
588
+ pad=1
589
+ activation=leaky
590
+
591
+ [convolutional]
592
+ batch_normalize=1
593
+ size=3
594
+ stride=1
595
+ pad=1
596
+ filters=1024
597
+ activation=leaky
598
+
599
+ [convolutional]
600
+ size=1
601
+ stride=1
602
+ pad=1
603
+ filters=255
604
+ activation=linear
605
+
606
+
607
+ [yolo]
608
+ mask = 6,7,8
609
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610
+ classes=80
611
+ num=9
612
+ jitter=.3
613
+ ignore_thresh = .7
614
+ truth_thresh = 1
615
+ random=1
616
+
617
+
618
+ [route]
619
+ layers = -4
620
+
621
+ [convolutional]
622
+ batch_normalize=1
623
+ filters=256
624
+ size=1
625
+ stride=1
626
+ pad=1
627
+ activation=leaky
628
+
629
+ [upsample]
630
+ stride=2
631
+
632
+ [route]
633
+ layers = -1, 61
634
+
635
+
636
+
637
+ [convolutional]
638
+ batch_normalize=1
639
+ filters=256
640
+ size=1
641
+ stride=1
642
+ pad=1
643
+ activation=leaky
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ size=3
648
+ stride=1
649
+ pad=1
650
+ filters=512
651
+ activation=leaky
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=1
657
+ stride=1
658
+ pad=1
659
+ activation=leaky
660
+
661
+ [convolutional]
662
+ batch_normalize=1
663
+ size=3
664
+ stride=1
665
+ pad=1
666
+ filters=512
667
+ activation=leaky
668
+
669
+ [convolutional]
670
+ batch_normalize=1
671
+ filters=256
672
+ size=1
673
+ stride=1
674
+ pad=1
675
+ activation=leaky
676
+
677
+ [convolutional]
678
+ batch_normalize=1
679
+ size=3
680
+ stride=1
681
+ pad=1
682
+ filters=512
683
+ activation=leaky
684
+
685
+ [convolutional]
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ filters=255
690
+ activation=linear
691
+
692
+
693
+ [yolo]
694
+ mask = 3,4,5
695
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696
+ classes=80
697
+ num=9
698
+ jitter=.3
699
+ ignore_thresh = .7
700
+ truth_thresh = 1
701
+ random=1
702
+
703
+
704
+
705
+ [route]
706
+ layers = -4
707
+
708
+ [convolutional]
709
+ batch_normalize=1
710
+ filters=128
711
+ size=1
712
+ stride=1
713
+ pad=1
714
+ activation=leaky
715
+
716
+ [upsample]
717
+ stride=2
718
+
719
+ [route]
720
+ layers = -1, 36
721
+
722
+
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=128
727
+ size=1
728
+ stride=1
729
+ pad=1
730
+ activation=leaky
731
+
732
+ [convolutional]
733
+ batch_normalize=1
734
+ size=3
735
+ stride=1
736
+ pad=1
737
+ filters=256
738
+ activation=leaky
739
+
740
+ [convolutional]
741
+ batch_normalize=1
742
+ filters=128
743
+ size=1
744
+ stride=1
745
+ pad=1
746
+ activation=leaky
747
+
748
+ [convolutional]
749
+ batch_normalize=1
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ filters=256
754
+ activation=leaky
755
+
756
+ [convolutional]
757
+ batch_normalize=1
758
+ filters=128
759
+ size=1
760
+ stride=1
761
+ pad=1
762
+ activation=leaky
763
+
764
+ [convolutional]
765
+ batch_normalize=1
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ filters=256
770
+ activation=leaky
771
+
772
+ [convolutional]
773
+ size=1
774
+ stride=1
775
+ pad=1
776
+ filters=255
777
+ activation=linear
778
+
779
+
780
+ [yolo]
781
+ mask = 0,1,2
782
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783
+ classes=80
784
+ num=9
785
+ jitter=.3
786
+ ignore_thresh = .7
787
+ truth_thresh = 1
788
+ random=1