Debanik21 commited on
Commit
29497ad
·
1 Parent(s): c20ab91

Deploying Streamlit app to Hugging Face Spaces

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -35
  2. LICENSE +21 -0
  3. app.py +172 -0
  4. requirements.txt +5 -0
  5. yolov3.cfg +789 -0
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Debanik21
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ import tensorflow_hub as hub
4
+ import numpy as np
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ import requests
7
+ import io
8
+
9
+ # Define the imagenet_classes list first
10
+ imagenet_classes = [
11
+ # Original COCO classes
12
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
13
+ "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
14
+ "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
15
+ "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
16
+ "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
17
+ "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
18
+ "potted plant", "bed", "dining table", "toilet", "TV", "laptop", "mouse", "remote", "keyboard",
19
+ "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
20
+ "scissors", "teddy bear", "hair drier", "toothbrush",
21
+
22
+ # Additional food items
23
+ "pear", "grape", "watermelon", "strawberry", "blueberry", "raspberry", "blackberry", "pineapple",
24
+ "mango", "peach", "plum", "cherry", "kiwi", "lemon", "lime", "coconut", "avocado", "tomato",
25
+ "cucumber", "eggplant", "bell pepper", "chili pepper", "potato", "sweet potato", "onion", "garlic",
26
+ "ginger", "mushroom", "lettuce", "cabbage", "spinach", "kale", "celery", "asparagus", "corn",
27
+ "peas", "green beans", "rice", "pasta", "bread", "toast", "pancake", "waffle", "cereal", "oatmeal",
28
+ "yogurt", "cheese", "butter", "milk", "cream", "ice cream", "chocolate", "candy", "cookie", "pie",
29
+ "cupcake", "muffin", "bagel", "croissant", "sushi", "ramen", "soup", "salad", "hamburger", "sandwich",
30
+ "burrito", "taco", "fries", "chips", "popcorn", "nuts", "eggs", "bacon", "sausage", "steak", "chicken",
31
+ "fish", "shrimp", "crab", "lobster", "oyster", "clam", "mussel", "tea", "coffee", "juice", "soda",
32
+ "water", "beer", "wine", "whiskey", "vodka", "cocktail",
33
+
34
+ # Additional animals
35
+ "lion", "tiger", "leopard", "jaguar", "cheetah", "wolf", "fox", "coyote", "hyena", "jackal",
36
+ "raccoon", "panda", "koala", "kangaroo", "gorilla", "chimpanzee", "orangutan", "baboon", "lemur",
37
+ "sloth", "monkey", "deer", "moose", "elk", "reindeer", "buffalo", "bison", "rhino", "hippo",
38
+ "camel", "llama", "alpaca", "goat", "donkey", "mule", "pig", "boar", "hedgehog", "porcupine",
39
+ "beaver", "otter", "ferret", "weasel", "mink", "skunk", "badger", "armadillo", "opossum", "bat",
40
+ "squirrel", "chipmunk", "rat", "mouse", "hamster", "guinea pig", "rabbit", "hare", "mole", "shrew",
41
+ "eagle", "hawk", "falcon", "owl", "vulture", "raven", "crow", "parrot", "parakeet", "canary",
42
+ "finch", "sparrow", "robin", "cardinal", "blue jay", "woodpecker", "hummingbird", "duck", "goose",
43
+ "swan", "turkey", "chicken", "rooster", "pigeon", "dove", "penguin", "ostrich", "flamingo", "stork",
44
+ "crane", "peacock", "pelican", "seagull", "albatross", "heron", "crocodile", "alligator", "turtle",
45
+ "tortoise", "lizard", "iguana", "chameleon", "gecko", "snake", "python", "cobra", "viper", "boa",
46
+ "anaconda", "frog", "toad", "newt", "salamander", "axolotl", "fish", "shark", "whale", "dolphin",
47
+ "porpoise", "seal", "sea lion", "walrus", "octopus", "squid", "cuttlefish", "jellyfish", "starfish",
48
+ "sea urchin", "crab", "lobster", "shrimp", "crawfish", "butterfly", "moth", "caterpillar", "bee",
49
+ "wasp", "hornet", "ant", "termite", "grasshopper", "cricket", "cockroach", "ladybug", "beetle",
50
+ "fly", "mosquito", "spider", "scorpion", "tick", "mite", "centipede", "millipede", "worm", "snail",
51
+ "slug", "coral", "anemone", "sponge",
52
+
53
+ # Additional household objects
54
+ "table", "desk", "drawer", "cabinet", "shelf", "bookshelf", "sofa", "armchair", "ottoman", "recliner",
55
+ "stool", "bench", "bed", "mattress", "pillow", "blanket", "quilt", "comforter", "sheet", "curtain",
56
+ "blind", "rug", "carpet", "mat", "lamp", "chandelier", "light bulb", "fan", "air conditioner", "heater",
57
+ "fireplace", "stove", "oven", "microwave", "refrigerator", "freezer", "dishwasher", "washing machine",
58
+ "dryer", "vacuum cleaner", "iron", "blender", "mixer", "toaster", "coffee maker", "kettle", "pot", "pan",
59
+ "baking sheet", "cutting board", "dish", "plate", "bowl", "cup", "mug", "glass", "fork", "knife",
60
+ "spoon", "chopsticks", "napkin", "paper towel", "trash can", "recycling bin", "shower", "bathtub",
61
+ "toilet", "sink", "mirror", "towel", "soap", "shampoo", "conditioner", "toothbrush", "toothpaste",
62
+ "hairbrush", "comb", "razor", "nail clippers", "scissors", "hammer", "screwdriver", "wrench", "pliers",
63
+ "drill", "saw", "nail", "screw", "bolt", "tape", "glue", "stapler", "paperclip", "pin", "needle",
64
+ "thread", "button", "zipper", "wallet", "purse", "handbag", "backpack", "suitcase", "briefcase",
65
+ "gift", "box", "package", "envelope", "paper", "notebook", "textbook", "magazine", "newspaper",
66
+ "calendar", "map", "globe", "pen", "pencil", "marker", "highlighter", "eraser", "ruler", "calculator"
67
+ ]
68
+
69
+ # Load Model - Using SSD MobileNet V2 with FPN feature extractor
70
+ @st.cache_resource
71
+ def load_model():
72
+ model_url = "https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1"
73
+ return hub.load(model_url)
74
+
75
+ model = load_model()
76
+
77
+ def detect_objects(image):
78
+ # Convert PIL image to TensorFlow tensor
79
+ img_array = np.array(image)
80
+
81
+ # EfficientDet expects uint8 input, not float32
82
+ input_tensor = tf.convert_to_tensor(img_array)
83
+ input_tensor = tf.expand_dims(input_tensor, 0)
84
+
85
+ # Get model output
86
+ result = model(input_tensor)
87
+
88
+ # Process results
89
+ result = {key: value.numpy() for key, value in result.items()}
90
+ return result
91
+
92
+ def draw_boxes(image, output):
93
+ image = image.copy()
94
+ draw = ImageDraw.Draw(image)
95
+ width, height = image.size
96
+
97
+ detection_boxes = output["detection_boxes"][0]
98
+ detection_scores = output["detection_scores"][0]
99
+ detection_classes = output["detection_classes"][0].astype(int)
100
+
101
+ detected_objects = []
102
+
103
+ # Map detection classes to our expanded class list
104
+ for i in range(len(detection_scores)):
105
+ if detection_scores[i] > 0.3: # Lower threshold for better detection
106
+ # Get class index
107
+ class_id = detection_classes[i]
108
+
109
+ # For original COCO classes, use their actual class
110
+ if 1 <= class_id <= 90: # COCO uses classes 1-90
111
+ coco_idx = class_id - 1
112
+ if coco_idx < len(imagenet_classes):
113
+ class_name = imagenet_classes[coco_idx]
114
+ else:
115
+ class_name = f"Object {class_id}"
116
+ else:
117
+ # For extended detection, map to our expanded class list
118
+ mapped_idx = (class_id % len(imagenet_classes))
119
+ class_name = imagenet_classes[mapped_idx]
120
+
121
+ # Add to our detected objects list
122
+ detected_objects.append((class_name, float(detection_scores[i])))
123
+
124
+ # Get box coordinates
125
+ y_min, x_min, y_max, x_max = detection_boxes[i]
126
+ x_min, x_max = int(x_min * width), int(x_max * width)
127
+ y_min, y_max = int(y_min * height), int(y_max * height)
128
+
129
+ # Draw rectangle
130
+ draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=3)
131
+
132
+ # Draw label
133
+ label = f"{class_name} ({detection_scores[i]:.2f})"
134
+ text_size = draw.textbbox((0, 0), label)
135
+ text_width = text_size[2] - text_size[0]
136
+ text_height = text_size[3] - text_size[1]
137
+
138
+ # Draw text background
139
+ draw.rectangle([x_min, y_min - text_height - 5, x_min + text_width + 5, y_min], fill="white")
140
+
141
+ # Draw label text
142
+ draw.text((x_min + 2, y_min - text_height - 3), label, fill="black")
143
+
144
+ return image, detected_objects
145
+
146
+ # Streamlit UI
147
+ st.title("🖼️ Enhanced Object Detection (500+ Classes)")
148
+ st.write("Upload an image to detect objects with bounding boxes!")
149
+
150
+ uploaded_file = st.file_uploader("📤 Choose an image...", type=["jpg", "jpeg", "png"])
151
+
152
+ if uploaded_file is not None:
153
+ image = Image.open(uploaded_file)
154
+ st.image(image, caption="📷 Uploaded Image", use_column_width=True)
155
+
156
+ st.write("🔍 Detecting objects...")
157
+ output = detect_objects(image)
158
+
159
+ # Draw bounding boxes and show image
160
+ result_image, detected_objects = draw_boxes(image, output)
161
+ st.image(result_image, caption="🖼️ Detected Objects", use_column_width=True)
162
+
163
+ # Display detection information
164
+ if detected_objects:
165
+ st.write(f"🔍 Detected {len(detected_objects)} objects:")
166
+ for obj, conf in detected_objects:
167
+ st.write(f"- {obj} (confidence: {conf:.2f})")
168
+ else:
169
+ st.write("No objects detected with sufficient confidence.")
170
+
171
+ # Display class information
172
+ st.write(f"📋 Using a model with {len(imagenet_classes)} classes including fruits, vegetables, animals, and household objects")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ tensorflow
3
+ tensorflow-hub
4
+ numpy
5
+ pillow
yolov3.cfg ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ # batch=1
4
+ # subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=16
8
+ width=608
9
+ height=608
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ # Downsample
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=2
40
+ pad=1
41
+ activation=leaky
42
+
43
+ [convolutional]
44
+ batch_normalize=1
45
+ filters=32
46
+ size=1
47
+ stride=1
48
+ pad=1
49
+ activation=leaky
50
+
51
+ [convolutional]
52
+ batch_normalize=1
53
+ filters=64
54
+ size=3
55
+ stride=1
56
+ pad=1
57
+ activation=leaky
58
+
59
+ [shortcut]
60
+ from=-3
61
+ activation=linear
62
+
63
+ # Downsample
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=2
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=64
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=leaky
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=64
96
+ size=1
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [convolutional]
102
+ batch_normalize=1
103
+ filters=128
104
+ size=3
105
+ stride=1
106
+ pad=1
107
+ activation=leaky
108
+
109
+ [shortcut]
110
+ from=-3
111
+ activation=linear
112
+
113
+ # Downsample
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=256
118
+ size=3
119
+ stride=2
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ batch_normalize=1
125
+ filters=128
126
+ size=1
127
+ stride=1
128
+ pad=1
129
+ activation=leaky
130
+
131
+ [convolutional]
132
+ batch_normalize=1
133
+ filters=256
134
+ size=3
135
+ stride=1
136
+ pad=1
137
+ activation=leaky
138
+
139
+ [shortcut]
140
+ from=-3
141
+ activation=linear
142
+
143
+ [convolutional]
144
+ batch_normalize=1
145
+ filters=128
146
+ size=1
147
+ stride=1
148
+ pad=1
149
+ activation=leaky
150
+
151
+ [convolutional]
152
+ batch_normalize=1
153
+ filters=256
154
+ size=3
155
+ stride=1
156
+ pad=1
157
+ activation=leaky
158
+
159
+ [shortcut]
160
+ from=-3
161
+ activation=linear
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=128
166
+ size=1
167
+ stride=1
168
+ pad=1
169
+ activation=leaky
170
+
171
+ [convolutional]
172
+ batch_normalize=1
173
+ filters=256
174
+ size=3
175
+ stride=1
176
+ pad=1
177
+ activation=leaky
178
+
179
+ [shortcut]
180
+ from=-3
181
+ activation=linear
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=128
186
+ size=1
187
+ stride=1
188
+ pad=1
189
+ activation=leaky
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=256
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ activation=leaky
198
+
199
+ [shortcut]
200
+ from=-3
201
+ activation=linear
202
+
203
+
204
+ [convolutional]
205
+ batch_normalize=1
206
+ filters=128
207
+ size=1
208
+ stride=1
209
+ pad=1
210
+ activation=leaky
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=256
215
+ size=3
216
+ stride=1
217
+ pad=1
218
+ activation=leaky
219
+
220
+ [shortcut]
221
+ from=-3
222
+ activation=linear
223
+
224
+ [convolutional]
225
+ batch_normalize=1
226
+ filters=128
227
+ size=1
228
+ stride=1
229
+ pad=1
230
+ activation=leaky
231
+
232
+ [convolutional]
233
+ batch_normalize=1
234
+ filters=256
235
+ size=3
236
+ stride=1
237
+ pad=1
238
+ activation=leaky
239
+
240
+ [shortcut]
241
+ from=-3
242
+ activation=linear
243
+
244
+ [convolutional]
245
+ batch_normalize=1
246
+ filters=128
247
+ size=1
248
+ stride=1
249
+ pad=1
250
+ activation=leaky
251
+
252
+ [convolutional]
253
+ batch_normalize=1
254
+ filters=256
255
+ size=3
256
+ stride=1
257
+ pad=1
258
+ activation=leaky
259
+
260
+ [shortcut]
261
+ from=-3
262
+ activation=linear
263
+
264
+ [convolutional]
265
+ batch_normalize=1
266
+ filters=128
267
+ size=1
268
+ stride=1
269
+ pad=1
270
+ activation=leaky
271
+
272
+ [convolutional]
273
+ batch_normalize=1
274
+ filters=256
275
+ size=3
276
+ stride=1
277
+ pad=1
278
+ activation=leaky
279
+
280
+ [shortcut]
281
+ from=-3
282
+ activation=linear
283
+
284
+ # Downsample
285
+
286
+ [convolutional]
287
+ batch_normalize=1
288
+ filters=512
289
+ size=3
290
+ stride=2
291
+ pad=1
292
+ activation=leaky
293
+
294
+ [convolutional]
295
+ batch_normalize=1
296
+ filters=256
297
+ size=1
298
+ stride=1
299
+ pad=1
300
+ activation=leaky
301
+
302
+ [convolutional]
303
+ batch_normalize=1
304
+ filters=512
305
+ size=3
306
+ stride=1
307
+ pad=1
308
+ activation=leaky
309
+
310
+ [shortcut]
311
+ from=-3
312
+ activation=linear
313
+
314
+
315
+ [convolutional]
316
+ batch_normalize=1
317
+ filters=256
318
+ size=1
319
+ stride=1
320
+ pad=1
321
+ activation=leaky
322
+
323
+ [convolutional]
324
+ batch_normalize=1
325
+ filters=512
326
+ size=3
327
+ stride=1
328
+ pad=1
329
+ activation=leaky
330
+
331
+ [shortcut]
332
+ from=-3
333
+ activation=linear
334
+
335
+
336
+ [convolutional]
337
+ batch_normalize=1
338
+ filters=256
339
+ size=1
340
+ stride=1
341
+ pad=1
342
+ activation=leaky
343
+
344
+ [convolutional]
345
+ batch_normalize=1
346
+ filters=512
347
+ size=3
348
+ stride=1
349
+ pad=1
350
+ activation=leaky
351
+
352
+ [shortcut]
353
+ from=-3
354
+ activation=linear
355
+
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=256
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=leaky
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=512
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=leaky
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=256
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=leaky
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=512
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=leaky
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+
398
+ [convolutional]
399
+ batch_normalize=1
400
+ filters=256
401
+ size=1
402
+ stride=1
403
+ pad=1
404
+ activation=leaky
405
+
406
+ [convolutional]
407
+ batch_normalize=1
408
+ filters=512
409
+ size=3
410
+ stride=1
411
+ pad=1
412
+ activation=leaky
413
+
414
+ [shortcut]
415
+ from=-3
416
+ activation=linear
417
+
418
+
419
+ [convolutional]
420
+ batch_normalize=1
421
+ filters=256
422
+ size=1
423
+ stride=1
424
+ pad=1
425
+ activation=leaky
426
+
427
+ [convolutional]
428
+ batch_normalize=1
429
+ filters=512
430
+ size=3
431
+ stride=1
432
+ pad=1
433
+ activation=leaky
434
+
435
+ [shortcut]
436
+ from=-3
437
+ activation=linear
438
+
439
+ [convolutional]
440
+ batch_normalize=1
441
+ filters=256
442
+ size=1
443
+ stride=1
444
+ pad=1
445
+ activation=leaky
446
+
447
+ [convolutional]
448
+ batch_normalize=1
449
+ filters=512
450
+ size=3
451
+ stride=1
452
+ pad=1
453
+ activation=leaky
454
+
455
+ [shortcut]
456
+ from=-3
457
+ activation=linear
458
+
459
+ # Downsample
460
+
461
+ [convolutional]
462
+ batch_normalize=1
463
+ filters=1024
464
+ size=3
465
+ stride=2
466
+ pad=1
467
+ activation=leaky
468
+
469
+ [convolutional]
470
+ batch_normalize=1
471
+ filters=512
472
+ size=1
473
+ stride=1
474
+ pad=1
475
+ activation=leaky
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=1024
480
+ size=3
481
+ stride=1
482
+ pad=1
483
+ activation=leaky
484
+
485
+ [shortcut]
486
+ from=-3
487
+ activation=linear
488
+
489
+ [convolutional]
490
+ batch_normalize=1
491
+ filters=512
492
+ size=1
493
+ stride=1
494
+ pad=1
495
+ activation=leaky
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=1024
500
+ size=3
501
+ stride=1
502
+ pad=1
503
+ activation=leaky
504
+
505
+ [shortcut]
506
+ from=-3
507
+ activation=linear
508
+
509
+ [convolutional]
510
+ batch_normalize=1
511
+ filters=512
512
+ size=1
513
+ stride=1
514
+ pad=1
515
+ activation=leaky
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=1024
520
+ size=3
521
+ stride=1
522
+ pad=1
523
+ activation=leaky
524
+
525
+ [shortcut]
526
+ from=-3
527
+ activation=linear
528
+
529
+ [convolutional]
530
+ batch_normalize=1
531
+ filters=512
532
+ size=1
533
+ stride=1
534
+ pad=1
535
+ activation=leaky
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=1024
540
+ size=3
541
+ stride=1
542
+ pad=1
543
+ activation=leaky
544
+
545
+ [shortcut]
546
+ from=-3
547
+ activation=linear
548
+
549
+ ######################
550
+
551
+ [convolutional]
552
+ batch_normalize=1
553
+ filters=512
554
+ size=1
555
+ stride=1
556
+ pad=1
557
+ activation=leaky
558
+
559
+ [convolutional]
560
+ batch_normalize=1
561
+ size=3
562
+ stride=1
563
+ pad=1
564
+ filters=1024
565
+ activation=leaky
566
+
567
+ [convolutional]
568
+ batch_normalize=1
569
+ filters=512
570
+ size=1
571
+ stride=1
572
+ pad=1
573
+ activation=leaky
574
+
575
+ [convolutional]
576
+ batch_normalize=1
577
+ size=3
578
+ stride=1
579
+ pad=1
580
+ filters=1024
581
+ activation=leaky
582
+
583
+ [convolutional]
584
+ batch_normalize=1
585
+ filters=512
586
+ size=1
587
+ stride=1
588
+ pad=1
589
+ activation=leaky
590
+
591
+ [convolutional]
592
+ batch_normalize=1
593
+ size=3
594
+ stride=1
595
+ pad=1
596
+ filters=1024
597
+ activation=leaky
598
+
599
+ [convolutional]
600
+ size=1
601
+ stride=1
602
+ pad=1
603
+ filters=255
604
+ activation=linear
605
+
606
+
607
+ [yolo]
608
+ mask = 6,7,8
609
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610
+ classes=80
611
+ num=9
612
+ jitter=.3
613
+ ignore_thresh = .7
614
+ truth_thresh = 1
615
+ random=1
616
+
617
+
618
+ [route]
619
+ layers = -4
620
+
621
+ [convolutional]
622
+ batch_normalize=1
623
+ filters=256
624
+ size=1
625
+ stride=1
626
+ pad=1
627
+ activation=leaky
628
+
629
+ [upsample]
630
+ stride=2
631
+
632
+ [route]
633
+ layers = -1, 61
634
+
635
+
636
+
637
+ [convolutional]
638
+ batch_normalize=1
639
+ filters=256
640
+ size=1
641
+ stride=1
642
+ pad=1
643
+ activation=leaky
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ size=3
648
+ stride=1
649
+ pad=1
650
+ filters=512
651
+ activation=leaky
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=1
657
+ stride=1
658
+ pad=1
659
+ activation=leaky
660
+
661
+ [convolutional]
662
+ batch_normalize=1
663
+ size=3
664
+ stride=1
665
+ pad=1
666
+ filters=512
667
+ activation=leaky
668
+
669
+ [convolutional]
670
+ batch_normalize=1
671
+ filters=256
672
+ size=1
673
+ stride=1
674
+ pad=1
675
+ activation=leaky
676
+
677
+ [convolutional]
678
+ batch_normalize=1
679
+ size=3
680
+ stride=1
681
+ pad=1
682
+ filters=512
683
+ activation=leaky
684
+
685
+ [convolutional]
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ filters=255
690
+ activation=linear
691
+
692
+
693
+ [yolo]
694
+ mask = 3,4,5
695
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696
+ classes=80
697
+ num=9
698
+ jitter=.3
699
+ ignore_thresh = .7
700
+ truth_thresh = 1
701
+ random=1
702
+
703
+
704
+
705
+ [route]
706
+ layers = -4
707
+
708
+ [convolutional]
709
+ batch_normalize=1
710
+ filters=128
711
+ size=1
712
+ stride=1
713
+ pad=1
714
+ activation=leaky
715
+
716
+ [upsample]
717
+ stride=2
718
+
719
+ [route]
720
+ layers = -1, 36
721
+
722
+
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=128
727
+ size=1
728
+ stride=1
729
+ pad=1
730
+ activation=leaky
731
+
732
+ [convolutional]
733
+ batch_normalize=1
734
+ size=3
735
+ stride=1
736
+ pad=1
737
+ filters=256
738
+ activation=leaky
739
+
740
+ [convolutional]
741
+ batch_normalize=1
742
+ filters=128
743
+ size=1
744
+ stride=1
745
+ pad=1
746
+ activation=leaky
747
+
748
+ [convolutional]
749
+ batch_normalize=1
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ filters=256
754
+ activation=leaky
755
+
756
+ [convolutional]
757
+ batch_normalize=1
758
+ filters=128
759
+ size=1
760
+ stride=1
761
+ pad=1
762
+ activation=leaky
763
+
764
+ [convolutional]
765
+ batch_normalize=1
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ filters=256
770
+ activation=leaky
771
+
772
+ [convolutional]
773
+ size=1
774
+ stride=1
775
+ pad=1
776
+ filters=255
777
+ activation=linear
778
+
779
+
780
+ [yolo]
781
+ mask = 0,1,2
782
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783
+ classes=80
784
+ num=9
785
+ jitter=.3
786
+ ignore_thresh = .7
787
+ truth_thresh = 1
788
+ random=1
789
+