Abhilashvj commited on
Commit
31e192b
β€’
0 Parent(s):

Duplicate from Abhilashvj/computer-vision-backend

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PINECONE_KEY=696a2b15-b4c0-4581-af5d-2d52d0198950
2
+ PINECONE_ENV=us-central1-gcp
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ res10_300x300_ssd_iter_140000_fp16.caffemodel filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/Dockerfile
2
+
3
+ FROM python:3.10.1-slim
4
+
5
+ WORKDIR /app
6
+
7
+ RUN apt-get update
8
+ RUN apt-get install git \
9
+ 'ffmpeg'\
10
+ 'libsm6'\
11
+ 'libxext6' -y
12
+
13
+ COPY requirements.txt .
14
+ RUN pip install -r requirements.txt
15
+ # Clone Real-ESRGAN and enter the Real-ESRGAN
16
+ # RUN git clone https://github.com/xinntao/Real-ESRGAN.git
17
+ # RUN cd Real-ESRGAN
18
+ # # Set up the environment
19
+ # RUN pip install basicsr
20
+ # RUN pip install facexlib
21
+ # RUN pip install gfpgan
22
+ # RUN pip install -r requirements.txt
23
+ # RUN python setup.py develop
24
+
25
+ # Set up a new user named "user" with user ID 1000
26
+ RUN useradd -m -u 1000 user
27
+
28
+ # Switch to the "user" user
29
+ USER user
30
+
31
+ # Set home to the user's home directory
32
+ ENV HOME=/home/user \
33
+ PATH=/home/user/.local/bin:$PATH
34
+
35
+ # Set the working directory to the user's home directory
36
+ WORKDIR $HOME/app
37
+
38
+
39
+
40
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
41
+ COPY --chown=user . $HOME/app
42
+ # EXPOSE 8000
43
+ # RUN python download_models.py
44
+ # CMD ["python", "app.py"]
45
+ # Start app
46
+ # ENTRYPOINT ["gunicorn", "-c", "gunicorn.py", "-k", "uvicorn.workers.UvicornWorker", "app:app"]
47
+
48
+ # RUN python download_models.py
49
+ # CMD ["python", "app.py"]
50
+
51
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
52
+
Licenseplate_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c9a080781aa7ff722968c944a702983af8a452753edd5ba20719d42349ec7bd
3
+ size 71780037
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Computer Vision Backend
3
+ emoji: πŸŒ–
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ duplicated_from: Abhilashvj/computer-vision-backend
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import argparse
4
+ import base64
5
+ import io
6
+ import os
7
+ import re
8
+ import sys
9
+ import traceback
10
+ import uuid
11
+ from typing import List, Optional
12
+ from PIL import ImageEnhance
13
+ import traceback
14
+ import cv2
15
+ import numpy as np
16
+ import pandas as pd
17
+ import pinecone
18
+ import pyiqa
19
+ import timm
20
+ import torch
21
+ import uvicorn
22
+ from dotenv import load_dotenv
23
+ from fastapi import FastAPI, File, Form, HTTPException, UploadFile
24
+ from PIL import Image
25
+ from pydantic import BaseModel
26
+ from sentence_transformers import SentenceTransformer, util
27
+ from transformers import (
28
+ AutoFeatureExtractor,
29
+ AutoModel,
30
+ DonutProcessor,
31
+ VisionEncoderDecoderModel,
32
+ )
33
+
34
+ load_dotenv()
35
+ pinecone.init(api_key=os.getenv("PINECONE_KEY"), environment=os.getenv("PINECONE_ENV"))
36
+ DETECTION_URL = "/object-detection/"
37
+ CLASSIFICATION_URL = "/object-classification/"
38
+ QUALITY_ASSESSMENT_URL = "/quality-assessment/"
39
+ FACE_URL = "/face-anonymization/"
40
+ LICENCE_URL = "/licenceplate-anonymization/"
41
+ DOCUMENT_QA = "/document-qa/"
42
+ IMAGE_SIMILARITY_DEMO = "/find-similar-image/"
43
+ IMAGE_SIMILARITY_PINECONE_DEMO = "/find-similar-image-pinecone/"
44
+ INDEX_NAME = "imagesearch-demo"
45
+ INDEX_DIMENSION = 512
46
+ TMP_DIR = "tmp"
47
+
48
+
49
+
50
+ def enhance_image(pil_image):
51
+ # Convert PIL Image to OpenCV format
52
+ open_cv_image = np.array(pil_image)
53
+ # Convert RGB to BGR
54
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
55
+
56
+ # Convert to grayscale
57
+ gray = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2GRAY)
58
+
59
+ # Histogram equalization
60
+ equ = cv2.equalizeHist(gray)
61
+
62
+ # Adaptive Histogram Equalization
63
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
64
+ adaptive_hist_eq = clahe.apply(gray)
65
+
66
+ # Gaussian Blurring
67
+ gaussian_blurred = cv2.GaussianBlur(adaptive_hist_eq, (5,5), 0)
68
+
69
+ # Noise reduction
70
+ denoised = cv2.medianBlur(gaussian_blurred, 3)
71
+
72
+ # Brightness & Contrast adjustment
73
+ lab = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2Lab)
74
+ l, a, b = cv2.split(lab)
75
+ cl = clahe.apply(l)
76
+ limg = cv2.merge((cl, a, b))
77
+ enhanced_image = cv2.cvtColor(limg, cv2.COLOR_Lab2BGR)
78
+
79
+ # Convert back to PIL Image
80
+ enhanced_pil_image = Image.fromarray(cv2.cvtColor(enhanced_image, cv2.COLOR_BGR2RGB))
81
+
82
+ # IMAGE AUGMENTATION
83
+ # For demonstration purposes, let's do a simple brightness adjustment.
84
+ # In practice, choose the augmentations that suit your task.
85
+ enhancer = ImageEnhance.Brightness(enhanced_pil_image)
86
+ enhanced_pil_image = enhancer.enhance(1.2) # Brighten the image by 20%
87
+
88
+ return enhanced_pil_image
89
+
90
+
91
+ if INDEX_NAME not in pinecone.list_indexes():
92
+ pinecone.create_index(INDEX_NAME, dimension=512, metric='cosine')
93
+
94
+ print("Connecting to Pinecone Index")
95
+ index = pinecone.Index(INDEX_NAME)
96
+
97
+
98
+ device = "cuda" if torch.cuda.is_available() else "cpu"
99
+
100
+ os.makedirs(TMP_DIR, exist_ok=True)
101
+
102
+ licence_model = torch.hub.load(
103
+ "ultralytics/yolov5", "custom", path="Licenseplate_model.pt", device="cpu", force_reload=True
104
+ )
105
+ licence_model.cpu()
106
+
107
+ detector = cv2.dnn.DetectionModel(
108
+ "res10_300x300_ssd_iter_140000_fp16.caffemodel", "deploy.prototxt"
109
+ )
110
+
111
+ processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
112
+ doc_qa_model = VisionEncoderDecoderModel.from_pretrained(
113
+ "naver-clova-ix/donut-base-finetuned-docvqa"
114
+ )
115
+
116
+ device = "cuda" if torch.cuda.is_available() else "cpu"
117
+ doc_qa_model.to(device)
118
+
119
+
120
+ os.makedirs(TMP_DIR, exist_ok=True)
121
+
122
+ model = torch.hub.load(
123
+ "ultralytics/yolov5", "custom", path="best.pt", device="cpu", force_reload=True
124
+ )
125
+ model.cpu()
126
+
127
+ classes = [
128
+ "gas-distribution-meter",
129
+ "gas-distribution-piping",
130
+ "gas-distribution-regulator",
131
+ "gas-distribution-valve",
132
+ ]
133
+
134
+ class_to_idx = {
135
+ "gas-distribution-meter": 0,
136
+ "gas-distribution-piping": 1,
137
+ "gas-distribution-regulator": 2,
138
+ "gas-distribution-valve": 3,
139
+ }
140
+
141
+ idx_to_classes = {v: k for k, v in class_to_idx.items()}
142
+ modelname = "resnet50d"
143
+ model_weights = "best_classifer_model.pt"
144
+ num_classes = len(classes)
145
+
146
+ classifier_model = timm.create_model(
147
+ "resnet50d", pretrained=True, num_classes=num_classes, drop_path_rate=0.05
148
+ )
149
+ classifier_model.load_state_dict(
150
+ torch.load(model_weights, map_location=torch.device("cpu"))["model_state_dict"]
151
+ )
152
+
153
+ musiq_metric = pyiqa.create_metric("musiq-koniq", device=torch.device("cpu"))
154
+ image_sim_model = SentenceTransformer("clip-ViT-B-32")
155
+
156
+
157
+ # model_ckpt = "nateraw/vit-base-beans"
158
+ # extractor = AutoFeatureExtractor.from_pretrained(model_ckpt)
159
+ # image_sim_model = AutoModel.from_pretrained(model_ckpt)
160
+
161
+
162
+ app = FastAPI(title="CV Demos")
163
+
164
+ # Define the Response
165
+ class Prediction(BaseModel):
166
+ filename: str
167
+ contenttype: str
168
+ prediction: List[float] = []
169
+
170
+
171
+ # define response
172
+ @app.get("/")
173
+ def root_route():
174
+ return {"error": f"Use GET {DETECTION_URL} instead of the root route!"}
175
+
176
+
177
+ @app.post(
178
+ DETECTION_URL,
179
+ )
180
+ async def predict(file: UploadFile = File(...), quality_check: bool = False):
181
+ try:
182
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
183
+ if not extension:
184
+ return "Image must be jpg or png format!"
185
+ # read image contain
186
+ contents = await file.read()
187
+ pil_image = Image.open(io.BytesIO(contents))
188
+ if quality_check:
189
+ print("RUNNING QUALITY CEHCK BEFORE OBJEFCT DETECTION!!!")
190
+ tmp_file = f"{TMP_DIR}/tmp.png"
191
+ pil_image.save(tmp_file)
192
+ score = musiq_metric(tmp_file)
193
+ if score < 50:
194
+ return {
195
+ "Error": "Image quality is not sufficient enough to be considered for object detection"
196
+ }
197
+
198
+ results = model(pil_image, size=640) # reduce size=320 for faster inference
199
+ return results.pandas().xyxy[0].to_json(orient="records")
200
+ except:
201
+ e = sys.exc_info()[1]
202
+ raise HTTPException(status_code=500, detail=str(e))
203
+
204
+
205
+ @app.post(CLASSIFICATION_URL)
206
+ async def classify(file: UploadFile = File(...)):
207
+ try:
208
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
209
+ if not extension:
210
+ return "Image must be jpg or png format!"
211
+ # read image contain
212
+ contents = await file.read()
213
+ pil_image = Image.open(io.BytesIO(contents))
214
+ data_mean = (0.485, 0.456, 0.406)
215
+ data_std = (0.229, 0.224, 0.225)
216
+ image_size = (224, 224)
217
+ eval_transforms = timm.data.create_transform(
218
+ input_size=image_size, mean=data_mean, std=data_std
219
+ )
220
+ eval_transforms(pil_image).unsqueeze(dim=0).shape
221
+ classifier_model.eval()
222
+ print("RUNNING Image Classification!!!")
223
+ max_class_idx = np.argmax(
224
+ classifier_model(eval_transforms(pil_image).unsqueeze(dim=0)).detach().numpy()
225
+ )
226
+ predicted_class = idx_to_classes[max_class_idx]
227
+ print(f"Predicted Class idx: {max_class_idx} with name : {predicted_class}")
228
+ return {"object": predicted_class}
229
+
230
+ except:
231
+ e = sys.exc_info()[1]
232
+ raise HTTPException(status_code=500, detail=str(e))
233
+
234
+
235
+ @app.post(QUALITY_ASSESSMENT_URL)
236
+ async def quality_check(file: UploadFile = File(...)):
237
+ try:
238
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
239
+ if not extension:
240
+ return "Image must be jpg or png format!"
241
+ # read image contain
242
+ contents = await file.read()
243
+ pil_image = Image.open(io.BytesIO(contents))
244
+ tmp_file = f"{TMP_DIR}/tmp.png"
245
+ pil_image.save(tmp_file)
246
+ score = musiq_metric(tmp_file).detach().numpy().tolist()
247
+ return {"score": score}
248
+
249
+ except:
250
+ e = sys.exc_info()[1]
251
+ raise HTTPException(status_code=500, detail=str(e))
252
+
253
+
254
+ def anonymize_simple(image, factor=3.0):
255
+ # automatically determine the size of the blurring kernel based
256
+ # on the spatial dimensions of the input image
257
+ (h, w) = image.shape[:2]
258
+ kW = int(w / factor)
259
+ kH = int(h / factor)
260
+ # ensure the width of the kernel is odd
261
+ if kW % 2 == 0:
262
+ kW -= 1
263
+ # ensure the height of the kernel is odd
264
+ if kH % 2 == 0:
265
+ kH -= 1
266
+ # apply a Gaussian blur to the input image using our computed
267
+ # kernel size
268
+ return cv2.GaussianBlur(image, (kW, kH), 0)
269
+
270
+
271
+ def anonymize_pixelate(image, blocks=3):
272
+ # divide the input image into NxN blocks
273
+ (h, w) = image.shape[:2]
274
+ xSteps = np.linspace(0, w, blocks + 1, dtype="int")
275
+ ySteps = np.linspace(0, h, blocks + 1, dtype="int")
276
+ # loop over the blocks in both the x and y direction
277
+ for i in range(1, len(ySteps)):
278
+ for j in range(1, len(xSteps)):
279
+ # compute the starting and ending (x, y)-coordinates
280
+ # for the current block
281
+ startX = xSteps[j - 1]
282
+ startY = ySteps[i - 1]
283
+ endX = xSteps[j]
284
+ endY = ySteps[i]
285
+ # extract the ROI using NumPy array slicing, compute the
286
+ # mean of the ROI, and then draw a rectangle with the
287
+ # mean RGB values over the ROI in the original image
288
+ roi = image[startY:endY, startX:endX]
289
+ (B, G, R) = [int(x) for x in cv2.mean(roi)[:3]]
290
+ cv2.rectangle(image, (startX, startY), (endX, endY), (B, G, R), -1)
291
+ # return the pixelated blurred image
292
+ return image
293
+
294
+
295
+ # define response
296
+ @app.get("/")
297
+ def root_route():
298
+ return {"error": f"Use GET {FACE_URL} or {LICENCE_URL} instead of the root route!"}
299
+
300
+
301
+ @app.post(
302
+ FACE_URL,
303
+ )
304
+ async def face_anonymize(
305
+ file: UploadFile = File(...), blur_type="simple", quality_check: bool = False
306
+ ):
307
+ """
308
+ https://pyimagesearch.com/2020/04/06/blur-and-anonymize-faces-with-opencv-and-python/
309
+ """
310
+ try:
311
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
312
+ if not extension:
313
+ return "Image must be jpg or png format!"
314
+ # read image contain
315
+ contents = await file.read()
316
+ pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
317
+ detector = cv2.dnn.DetectionModel(
318
+ "res10_300x300_ssd_iter_140000_fp16.caffemodel", "deploy.prototxt"
319
+ )
320
+ open_cv_image = np.array(pil_image)
321
+ # Convert RGB to BGR
322
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
323
+ (h, w) = open_cv_image.shape[:2]
324
+ # Getting the detections
325
+ detections = detector.detect(open_cv_image)
326
+ if len(detections[2]) > 0:
327
+ for face in detections[2]:
328
+ (x, y, w, h) = face.astype("int")
329
+ # extract the face ROI
330
+
331
+ face = open_cv_image[y : y + h, x : x + w]
332
+ if blur_type == "simple":
333
+ face = anonymize_simple(face)
334
+ else:
335
+ face = anonymize_pixelate(face)
336
+ open_cv_image[y : y + h, x : x + w] = face
337
+
338
+ _, encoded_img = cv2.imencode(".PNG", open_cv_image)
339
+
340
+ encoded_img = base64.b64encode(encoded_img)
341
+ return {
342
+ "filename": file.filename,
343
+ "dimensions": str(open_cv_image.shape),
344
+ "encoded_img": encoded_img,
345
+ }
346
+ except:
347
+ e = sys.exc_info()[1]
348
+ print(traceback.format_exc())
349
+ raise HTTPException(status_code=500, detail=str(e))
350
+
351
+
352
+ @app.post(LICENCE_URL)
353
+ async def licence_anonymize(file: UploadFile = File(...), blur_type="simple"):
354
+ """https://www.kaggle.com/code/gowrishankarp/license-plate-detection-yolov5-pytesseract/notebook#Visualize"""
355
+ try:
356
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
357
+ if not extension:
358
+ return "Image must be jpg or png format!"
359
+ # read image contain
360
+ contents = await file.read()
361
+ pil_image = Image.open(io.BytesIO(contents))
362
+ results = licence_model(pil_image, size=640) # reduce size=320 for faster inference
363
+ pil_image = pil_image.convert("RGB")
364
+ open_cv_image = np.array(pil_image)
365
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
366
+ df = results.pandas().xyxy[0]
367
+ for i, row in df.iterrows():
368
+ xmin = int(row["xmin"])
369
+ ymin = int(row["ymin"])
370
+ xmax = int(row["xmax"])
371
+ ymax = int(row["ymax"])
372
+ licence = open_cv_image[ymin:ymax, xmin:xmax]
373
+ if blur_type == "simple":
374
+ licence = anonymize_simple(licence)
375
+ else:
376
+ licence = anonymize_pixelate(licence)
377
+ open_cv_image[ymin:ymax, xmin:xmax] = licence
378
+
379
+ _, encoded_img = cv2.imencode(".PNG", open_cv_image)
380
+
381
+ encoded_img = base64.b64encode(encoded_img)
382
+ return {
383
+ "filename": file.filename,
384
+ "dimensions": str(open_cv_image.shape),
385
+ "encoded_img": encoded_img,
386
+ }
387
+
388
+ except:
389
+ e = sys.exc_info()[1]
390
+ raise HTTPException(status_code=500, detail=str(e))
391
+
392
+
393
+ def process_document(image, question):
394
+ # prepare encoder inputs
395
+ pixel_values = processor(image, return_tensors="pt").pixel_values
396
+
397
+ # prepare decoder inputs
398
+ task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
399
+ prompt = task_prompt.replace("{user_input}", question)
400
+ decoder_input_ids = processor.tokenizer(
401
+ prompt, add_special_tokens=False, return_tensors="pt"
402
+ ).input_ids
403
+
404
+ # generate answer
405
+ outputs = doc_qa_model.generate(
406
+ pixel_values.to(device),
407
+ decoder_input_ids=decoder_input_ids.to(device),
408
+ max_length=doc_qa_model.decoder.config.max_position_embeddings,
409
+ early_stopping=True,
410
+ pad_token_id=processor.tokenizer.pad_token_id,
411
+ eos_token_id=processor.tokenizer.eos_token_id,
412
+ use_cache=True,
413
+ num_beams=1,
414
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
415
+ return_dict_in_generate=True,
416
+ )
417
+
418
+ # postprocess
419
+ sequence = processor.batch_decode(outputs.sequences)[0]
420
+ sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(
421
+ processor.tokenizer.pad_token, ""
422
+ )
423
+ sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
424
+
425
+ return processor.token2json(sequence)
426
+
427
+
428
+ @app.post(DOCUMENT_QA)
429
+ async def document_qa(question: str = Form(...), file: UploadFile = File(...)):
430
+
431
+ try:
432
+ extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
433
+ if not extension:
434
+ return "Image must be jpg or png format!"
435
+ # read image contain
436
+ contents = await file.read()
437
+ pil_image = Image.open(io.BytesIO(contents))
438
+ # tmp_file = f"{TMP_DIR}/tmp.png"
439
+ # pil_image.save(tmp_file)
440
+ # answer_git_large = generate_answer_git(git_processor_large, git_model_large, image, question)
441
+
442
+ answer = process_document(pil_image, question)["answer"]
443
+
444
+ return {"answer": answer}
445
+
446
+ except:
447
+ e = sys.exc_info()[1]
448
+ raise HTTPException(status_code=500, detail=str(e))
449
+
450
+
451
+ @app.post(IMAGE_SIMILARITY_DEMO)
452
+ async def image_search_local(
453
+ images_to_search: List[UploadFile], query_image: UploadFile = File(...), top_k: int = 5
454
+ ):
455
+ print(
456
+ f"Recived images of length: {len(images_to_search)} needs to retrieve top k : {top_k} similar images as result"
457
+ )
458
+ try:
459
+ extension = query_image.filename.split(".")[-1] in ("jpg", "jpeg", "png")
460
+ search_images = []
461
+ search_filenames = []
462
+ print("Processing request...")
463
+ for image in images_to_search:
464
+ if image.filename.split(".")[-1] not in ("jpg", "jpeg", "png"):
465
+ return "Image must be jpg or png format!"
466
+ # read image contain
467
+ search_filenames.append(image.filename)
468
+ contents = await image.read()
469
+ search_images.append(Image.open(io.BytesIO(contents)))
470
+ print("Indexing images to search...")
471
+
472
+ corpus_embeddings = image_sim_model.encode(
473
+ search_images, convert_to_tensor=True, show_progress_bar=True
474
+ )
475
+ if not extension:
476
+ return "Image must be jpg or png format!"
477
+ # read image contain
478
+ contents = await query_image.read()
479
+ query_image = Image.open(io.BytesIO(contents))
480
+ print("Indexing query image...")
481
+
482
+ prompt_embedding = image_sim_model.encode(query_image, convert_to_tensor=True)
483
+ print("Searching query image...")
484
+
485
+ hits = util.semantic_search(prompt_embedding, corpus_embeddings, top_k=top_k)
486
+ # hits = pd.DataFrame(hits[0], columns=['corpus_id', 'score'])
487
+ # tmp_file = f"{TMP_DIR}/tmp.png"
488
+ # pil_image.save(tmp_file)
489
+ # answer_git_large = generate_answer_git(git_processor_large, git_model_large, image, question)
490
+ print("Creating the result..")
491
+ similar_images = []
492
+ print("hits ", hits)
493
+ for hit in hits[0]:
494
+ # print("Finding the image ")
495
+ # print("Type of images list ", type(search_images), "similar image id ", hit['corpus_id'])
496
+ open_cv_image = np.array(search_images[hit["corpus_id"]].convert("RGB"))[:, :, ::-1]
497
+ # print("cv2.imencode the image ")
498
+ _, encoded_img = cv2.imencode(".PNG", open_cv_image)
499
+ # print("base64 the image ")
500
+ encoded_img = base64.b64encode(encoded_img)
501
+ # print("Appending the image ")
502
+ similar_images.append(
503
+ {
504
+ "filename": search_filenames[hit["corpus_id"]],
505
+ "dimensions": str(open_cv_image.shape),
506
+ "score": hit["score"],
507
+ "encoded_img": encoded_img,
508
+ }
509
+ )
510
+ print("Sending result..")
511
+
512
+ return {"similar_images": similar_images}
513
+
514
+ except:
515
+ e = sys.exc_info()[1]
516
+ raise HTTPException(status_code=500, detail=str(e))
517
+
518
+
519
+ @app.post(IMAGE_SIMILARITY_PINECONE_DEMO)
520
+ async def image_search_pinecone(
521
+ images_to_search: Optional[List[UploadFile]] = File(None),
522
+ query_image: Optional[UploadFile] = File(None),
523
+ top_k: int = 5,
524
+ namespace="av_local",
525
+ action="query",
526
+ ):
527
+
528
+ try:
529
+ # Function to delete all files from the database
530
+ print(f"Received request with images_to_search: {images_to_search} and query_image: {query_image} with action: {action}")
531
+ if action == "delete":
532
+ index = pinecone.Index(INDEX_NAME)
533
+ delete_response = index.delete(delete_all=True, namespace=namespace)
534
+ return {f"Deleted the namespace: {namespace}": delete_response}
535
+
536
+ elif action == "query" and query_image is not None:
537
+ extension = query_image.filename.split(".")[-1] in ("jpg", "jpeg", "png", "JPG", "PNG", "JPEG")
538
+ if not extension:
539
+ return "Image must be jpg or png format!"
540
+ # read image contain
541
+ contents = await query_image.read()
542
+ query_image = Image.open(io.BytesIO(contents))
543
+ print("Indexing query image...")
544
+ query_image = enhance_image(query_image)
545
+ prompt_embedding = image_sim_model.encode(query_image, convert_to_tensor=True).tolist()
546
+ if INDEX_NAME not in pinecone.list_indexes():
547
+ return {"similar_images": [], "status": "No index found for images"}
548
+
549
+ else:
550
+ index = pinecone.Index(INDEX_NAME)
551
+ query_response = index.query(
552
+ namespace=namespace,
553
+ top_k=top_k,
554
+ include_values=True,
555
+ include_metadata=True,
556
+ vector=prompt_embedding,
557
+ )
558
+ result_images = [d["metadata"]["file_path"] for d in query_response["matches"]]
559
+ print("Creating the result..")
560
+ similar_images = []
561
+ print("Retrieved matches ", query_response["matches"])
562
+ for file_path in result_images:
563
+ try:
564
+ # print("Finding the image ")
565
+ # print("Type of images list ", type(search_images), "similar image id ", hit['corpus_id'])
566
+ open_cv_image = cv2.imread(file_path)
567
+ # print("cv2.imencode the image ")
568
+ _, encoded_img = cv2.imencode(".PNG", open_cv_image)
569
+ # print("base64 the image ")
570
+ encoded_img = base64.b64encode(encoded_img)
571
+ # print("Appending the image ")
572
+ similar_images.append(
573
+ {
574
+ "filename": file_path,
575
+ "dimensions": str(open_cv_image.shape),
576
+ "score": 0,
577
+ "encoded_img": encoded_img,
578
+ }
579
+ )
580
+ except:
581
+ similar_images.append(
582
+ {
583
+ "filename": file_path,
584
+ "dimensions": None,
585
+ "score": 0,
586
+ "encoded_img": None,
587
+ }
588
+ )
589
+ print("Sending result..")
590
+
591
+ return {"similar_images": similar_images}
592
+
593
+ elif action == "index" and len(images_to_search) > 0:
594
+ print(
595
+ f"Recived images of length: {len(images_to_search)} needs to retrieve top k : {top_k} similar images as result"
596
+ )
597
+ print(f"Action indexing is executing for : {len(images_to_search)} images")
598
+ # if the index does not already exist, we create it
599
+ # check if the abstractive-question-answering index exists
600
+ print("checking pinecone Index")
601
+ if INDEX_NAME not in pinecone.list_indexes():
602
+ # delete the current index and create the new index if it does not exist
603
+ for delete_index in pinecone.list_indexes():
604
+ print(f"Deleting exitsing pinecone Index : {delete_index}")
605
+
606
+ pinecone.delete_index(delete_index)
607
+ print(f"Creating new pinecone Index : {INDEX_NAME}")
608
+ pinecone.create_index(INDEX_NAME, dimension=INDEX_DIMENSION, metric="cosine")
609
+ # instantiate connection to your Pinecone index
610
+ print(f"Connecting to pinecone Index : {INDEX_NAME}")
611
+ index = pinecone.Index(INDEX_NAME)
612
+ search_images = []
613
+ meta_datas = []
614
+ ids = []
615
+ print("Processing request...")
616
+ for image in images_to_search:
617
+ if image.filename.split(".")[-1] not in ("jpg", "jpeg", "png", "JPG", "PNG", "JPEG"):
618
+ return "Image must be jpg or png format!"
619
+ # read image contain
620
+ contents = await image.read()
621
+ pil_image = Image.open(io.BytesIO(contents))
622
+ tmp_file = f"{TMP_DIR}/{image.filename}"
623
+ pil_image.save(tmp_file)
624
+ meta_datas.append({"file_path": tmp_file})
625
+ search_images.append(pil_image)
626
+ ids.append(str(uuid.uuid1()).replace("-",""))
627
+
628
+ print("Encoding images to vectors...")
629
+ corpus_embeddings = image_sim_model.encode(
630
+ search_images, convert_to_tensor=True, show_progress_bar=True
631
+ ).tolist()
632
+ print(f"Indexing images to pinecone Index : {INDEX_NAME}")
633
+ index.upsert(
634
+ vectors=list(zip(ids, corpus_embeddings, meta_datas)), namespace=namespace
635
+ )
636
+
637
+
638
+ return {"similar_images": [], "status": "Indexing succesfull for uploaded files"}
639
+ else:
640
+ return {"similar_images": []}
641
+ except Exception as e:
642
+ e = sys.exc_info()[1]
643
+ print(f"exception happened {e} {str(traceback.print_exc())}")
644
+ raise HTTPException(status_code=500, detail=str(e))
645
+
646
+
647
+ if __name__ == "__main__":
648
+ parser = argparse.ArgumentParser(description="Fast API exposing YOLOv5 model")
649
+ parser.add_argument("--port", default=8000, type=int, help="port number")
650
+ # parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s')
651
+ opt = parser.parse_args()
652
+ uvicorn.run(app, port=opt.port)
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8faa2592e29248e58453cb031e536bd96f2929d9768bbd3c78ea54944f045db
3
+ size 14447677
best_classifer_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5c0f63fbe8f8349ceda742cc6c7d333c1a2ae443b6f7aa1d100859d59322a7
3
+ size 377080432
deploy.prototxt ADDED
@@ -0,0 +1,1789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input: "data"
2
+ input_shape {
3
+ dim: 1
4
+ dim: 3
5
+ dim: 300
6
+ dim: 300
7
+ }
8
+
9
+ layer {
10
+ name: "data_bn"
11
+ type: "BatchNorm"
12
+ bottom: "data"
13
+ top: "data_bn"
14
+ param {
15
+ lr_mult: 0.0
16
+ }
17
+ param {
18
+ lr_mult: 0.0
19
+ }
20
+ param {
21
+ lr_mult: 0.0
22
+ }
23
+ }
24
+ layer {
25
+ name: "data_scale"
26
+ type: "Scale"
27
+ bottom: "data_bn"
28
+ top: "data_bn"
29
+ param {
30
+ lr_mult: 1.0
31
+ decay_mult: 1.0
32
+ }
33
+ param {
34
+ lr_mult: 2.0
35
+ decay_mult: 1.0
36
+ }
37
+ scale_param {
38
+ bias_term: true
39
+ }
40
+ }
41
+ layer {
42
+ name: "conv1_h"
43
+ type: "Convolution"
44
+ bottom: "data_bn"
45
+ top: "conv1_h"
46
+ param {
47
+ lr_mult: 1.0
48
+ decay_mult: 1.0
49
+ }
50
+ param {
51
+ lr_mult: 2.0
52
+ decay_mult: 1.0
53
+ }
54
+ convolution_param {
55
+ num_output: 32
56
+ pad: 3
57
+ kernel_size: 7
58
+ stride: 2
59
+ weight_filler {
60
+ type: "msra"
61
+ variance_norm: FAN_OUT
62
+ }
63
+ bias_filler {
64
+ type: "constant"
65
+ value: 0.0
66
+ }
67
+ }
68
+ }
69
+ layer {
70
+ name: "conv1_bn_h"
71
+ type: "BatchNorm"
72
+ bottom: "conv1_h"
73
+ top: "conv1_h"
74
+ param {
75
+ lr_mult: 0.0
76
+ }
77
+ param {
78
+ lr_mult: 0.0
79
+ }
80
+ param {
81
+ lr_mult: 0.0
82
+ }
83
+ }
84
+ layer {
85
+ name: "conv1_scale_h"
86
+ type: "Scale"
87
+ bottom: "conv1_h"
88
+ top: "conv1_h"
89
+ param {
90
+ lr_mult: 1.0
91
+ decay_mult: 1.0
92
+ }
93
+ param {
94
+ lr_mult: 2.0
95
+ decay_mult: 1.0
96
+ }
97
+ scale_param {
98
+ bias_term: true
99
+ }
100
+ }
101
+ layer {
102
+ name: "conv1_relu"
103
+ type: "ReLU"
104
+ bottom: "conv1_h"
105
+ top: "conv1_h"
106
+ }
107
+ layer {
108
+ name: "conv1_pool"
109
+ type: "Pooling"
110
+ bottom: "conv1_h"
111
+ top: "conv1_pool"
112
+ pooling_param {
113
+ kernel_size: 3
114
+ stride: 2
115
+ }
116
+ }
117
+ layer {
118
+ name: "layer_64_1_conv1_h"
119
+ type: "Convolution"
120
+ bottom: "conv1_pool"
121
+ top: "layer_64_1_conv1_h"
122
+ param {
123
+ lr_mult: 1.0
124
+ decay_mult: 1.0
125
+ }
126
+ convolution_param {
127
+ num_output: 32
128
+ bias_term: false
129
+ pad: 1
130
+ kernel_size: 3
131
+ stride: 1
132
+ weight_filler {
133
+ type: "msra"
134
+ }
135
+ bias_filler {
136
+ type: "constant"
137
+ value: 0.0
138
+ }
139
+ }
140
+ }
141
+ layer {
142
+ name: "layer_64_1_bn2_h"
143
+ type: "BatchNorm"
144
+ bottom: "layer_64_1_conv1_h"
145
+ top: "layer_64_1_conv1_h"
146
+ param {
147
+ lr_mult: 0.0
148
+ }
149
+ param {
150
+ lr_mult: 0.0
151
+ }
152
+ param {
153
+ lr_mult: 0.0
154
+ }
155
+ }
156
+ layer {
157
+ name: "layer_64_1_scale2_h"
158
+ type: "Scale"
159
+ bottom: "layer_64_1_conv1_h"
160
+ top: "layer_64_1_conv1_h"
161
+ param {
162
+ lr_mult: 1.0
163
+ decay_mult: 1.0
164
+ }
165
+ param {
166
+ lr_mult: 2.0
167
+ decay_mult: 1.0
168
+ }
169
+ scale_param {
170
+ bias_term: true
171
+ }
172
+ }
173
+ layer {
174
+ name: "layer_64_1_relu2"
175
+ type: "ReLU"
176
+ bottom: "layer_64_1_conv1_h"
177
+ top: "layer_64_1_conv1_h"
178
+ }
179
+ layer {
180
+ name: "layer_64_1_conv2_h"
181
+ type: "Convolution"
182
+ bottom: "layer_64_1_conv1_h"
183
+ top: "layer_64_1_conv2_h"
184
+ param {
185
+ lr_mult: 1.0
186
+ decay_mult: 1.0
187
+ }
188
+ convolution_param {
189
+ num_output: 32
190
+ bias_term: false
191
+ pad: 1
192
+ kernel_size: 3
193
+ stride: 1
194
+ weight_filler {
195
+ type: "msra"
196
+ }
197
+ bias_filler {
198
+ type: "constant"
199
+ value: 0.0
200
+ }
201
+ }
202
+ }
203
+ layer {
204
+ name: "layer_64_1_sum"
205
+ type: "Eltwise"
206
+ bottom: "layer_64_1_conv2_h"
207
+ bottom: "conv1_pool"
208
+ top: "layer_64_1_sum"
209
+ }
210
+ layer {
211
+ name: "layer_128_1_bn1_h"
212
+ type: "BatchNorm"
213
+ bottom: "layer_64_1_sum"
214
+ top: "layer_128_1_bn1_h"
215
+ param {
216
+ lr_mult: 0.0
217
+ }
218
+ param {
219
+ lr_mult: 0.0
220
+ }
221
+ param {
222
+ lr_mult: 0.0
223
+ }
224
+ }
225
+ layer {
226
+ name: "layer_128_1_scale1_h"
227
+ type: "Scale"
228
+ bottom: "layer_128_1_bn1_h"
229
+ top: "layer_128_1_bn1_h"
230
+ param {
231
+ lr_mult: 1.0
232
+ decay_mult: 1.0
233
+ }
234
+ param {
235
+ lr_mult: 2.0
236
+ decay_mult: 1.0
237
+ }
238
+ scale_param {
239
+ bias_term: true
240
+ }
241
+ }
242
+ layer {
243
+ name: "layer_128_1_relu1"
244
+ type: "ReLU"
245
+ bottom: "layer_128_1_bn1_h"
246
+ top: "layer_128_1_bn1_h"
247
+ }
248
+ layer {
249
+ name: "layer_128_1_conv1_h"
250
+ type: "Convolution"
251
+ bottom: "layer_128_1_bn1_h"
252
+ top: "layer_128_1_conv1_h"
253
+ param {
254
+ lr_mult: 1.0
255
+ decay_mult: 1.0
256
+ }
257
+ convolution_param {
258
+ num_output: 128
259
+ bias_term: false
260
+ pad: 1
261
+ kernel_size: 3
262
+ stride: 2
263
+ weight_filler {
264
+ type: "msra"
265
+ }
266
+ bias_filler {
267
+ type: "constant"
268
+ value: 0.0
269
+ }
270
+ }
271
+ }
272
+ layer {
273
+ name: "layer_128_1_bn2"
274
+ type: "BatchNorm"
275
+ bottom: "layer_128_1_conv1_h"
276
+ top: "layer_128_1_conv1_h"
277
+ param {
278
+ lr_mult: 0.0
279
+ }
280
+ param {
281
+ lr_mult: 0.0
282
+ }
283
+ param {
284
+ lr_mult: 0.0
285
+ }
286
+ }
287
+ layer {
288
+ name: "layer_128_1_scale2"
289
+ type: "Scale"
290
+ bottom: "layer_128_1_conv1_h"
291
+ top: "layer_128_1_conv1_h"
292
+ param {
293
+ lr_mult: 1.0
294
+ decay_mult: 1.0
295
+ }
296
+ param {
297
+ lr_mult: 2.0
298
+ decay_mult: 1.0
299
+ }
300
+ scale_param {
301
+ bias_term: true
302
+ }
303
+ }
304
+ layer {
305
+ name: "layer_128_1_relu2"
306
+ type: "ReLU"
307
+ bottom: "layer_128_1_conv1_h"
308
+ top: "layer_128_1_conv1_h"
309
+ }
310
+ layer {
311
+ name: "layer_128_1_conv2"
312
+ type: "Convolution"
313
+ bottom: "layer_128_1_conv1_h"
314
+ top: "layer_128_1_conv2"
315
+ param {
316
+ lr_mult: 1.0
317
+ decay_mult: 1.0
318
+ }
319
+ convolution_param {
320
+ num_output: 128
321
+ bias_term: false
322
+ pad: 1
323
+ kernel_size: 3
324
+ stride: 1
325
+ weight_filler {
326
+ type: "msra"
327
+ }
328
+ bias_filler {
329
+ type: "constant"
330
+ value: 0.0
331
+ }
332
+ }
333
+ }
334
+ layer {
335
+ name: "layer_128_1_conv_expand_h"
336
+ type: "Convolution"
337
+ bottom: "layer_128_1_bn1_h"
338
+ top: "layer_128_1_conv_expand_h"
339
+ param {
340
+ lr_mult: 1.0
341
+ decay_mult: 1.0
342
+ }
343
+ convolution_param {
344
+ num_output: 128
345
+ bias_term: false
346
+ pad: 0
347
+ kernel_size: 1
348
+ stride: 2
349
+ weight_filler {
350
+ type: "msra"
351
+ }
352
+ bias_filler {
353
+ type: "constant"
354
+ value: 0.0
355
+ }
356
+ }
357
+ }
358
+ layer {
359
+ name: "layer_128_1_sum"
360
+ type: "Eltwise"
361
+ bottom: "layer_128_1_conv2"
362
+ bottom: "layer_128_1_conv_expand_h"
363
+ top: "layer_128_1_sum"
364
+ }
365
+ layer {
366
+ name: "layer_256_1_bn1"
367
+ type: "BatchNorm"
368
+ bottom: "layer_128_1_sum"
369
+ top: "layer_256_1_bn1"
370
+ param {
371
+ lr_mult: 0.0
372
+ }
373
+ param {
374
+ lr_mult: 0.0
375
+ }
376
+ param {
377
+ lr_mult: 0.0
378
+ }
379
+ }
380
+ layer {
381
+ name: "layer_256_1_scale1"
382
+ type: "Scale"
383
+ bottom: "layer_256_1_bn1"
384
+ top: "layer_256_1_bn1"
385
+ param {
386
+ lr_mult: 1.0
387
+ decay_mult: 1.0
388
+ }
389
+ param {
390
+ lr_mult: 2.0
391
+ decay_mult: 1.0
392
+ }
393
+ scale_param {
394
+ bias_term: true
395
+ }
396
+ }
397
+ layer {
398
+ name: "layer_256_1_relu1"
399
+ type: "ReLU"
400
+ bottom: "layer_256_1_bn1"
401
+ top: "layer_256_1_bn1"
402
+ }
403
+ layer {
404
+ name: "layer_256_1_conv1"
405
+ type: "Convolution"
406
+ bottom: "layer_256_1_bn1"
407
+ top: "layer_256_1_conv1"
408
+ param {
409
+ lr_mult: 1.0
410
+ decay_mult: 1.0
411
+ }
412
+ convolution_param {
413
+ num_output: 256
414
+ bias_term: false
415
+ pad: 1
416
+ kernel_size: 3
417
+ stride: 2
418
+ weight_filler {
419
+ type: "msra"
420
+ }
421
+ bias_filler {
422
+ type: "constant"
423
+ value: 0.0
424
+ }
425
+ }
426
+ }
427
+ layer {
428
+ name: "layer_256_1_bn2"
429
+ type: "BatchNorm"
430
+ bottom: "layer_256_1_conv1"
431
+ top: "layer_256_1_conv1"
432
+ param {
433
+ lr_mult: 0.0
434
+ }
435
+ param {
436
+ lr_mult: 0.0
437
+ }
438
+ param {
439
+ lr_mult: 0.0
440
+ }
441
+ }
442
+ layer {
443
+ name: "layer_256_1_scale2"
444
+ type: "Scale"
445
+ bottom: "layer_256_1_conv1"
446
+ top: "layer_256_1_conv1"
447
+ param {
448
+ lr_mult: 1.0
449
+ decay_mult: 1.0
450
+ }
451
+ param {
452
+ lr_mult: 2.0
453
+ decay_mult: 1.0
454
+ }
455
+ scale_param {
456
+ bias_term: true
457
+ }
458
+ }
459
+ layer {
460
+ name: "layer_256_1_relu2"
461
+ type: "ReLU"
462
+ bottom: "layer_256_1_conv1"
463
+ top: "layer_256_1_conv1"
464
+ }
465
+ layer {
466
+ name: "layer_256_1_conv2"
467
+ type: "Convolution"
468
+ bottom: "layer_256_1_conv1"
469
+ top: "layer_256_1_conv2"
470
+ param {
471
+ lr_mult: 1.0
472
+ decay_mult: 1.0
473
+ }
474
+ convolution_param {
475
+ num_output: 256
476
+ bias_term: false
477
+ pad: 1
478
+ kernel_size: 3
479
+ stride: 1
480
+ weight_filler {
481
+ type: "msra"
482
+ }
483
+ bias_filler {
484
+ type: "constant"
485
+ value: 0.0
486
+ }
487
+ }
488
+ }
489
+ layer {
490
+ name: "layer_256_1_conv_expand"
491
+ type: "Convolution"
492
+ bottom: "layer_256_1_bn1"
493
+ top: "layer_256_1_conv_expand"
494
+ param {
495
+ lr_mult: 1.0
496
+ decay_mult: 1.0
497
+ }
498
+ convolution_param {
499
+ num_output: 256
500
+ bias_term: false
501
+ pad: 0
502
+ kernel_size: 1
503
+ stride: 2
504
+ weight_filler {
505
+ type: "msra"
506
+ }
507
+ bias_filler {
508
+ type: "constant"
509
+ value: 0.0
510
+ }
511
+ }
512
+ }
513
+ layer {
514
+ name: "layer_256_1_sum"
515
+ type: "Eltwise"
516
+ bottom: "layer_256_1_conv2"
517
+ bottom: "layer_256_1_conv_expand"
518
+ top: "layer_256_1_sum"
519
+ }
520
+ layer {
521
+ name: "layer_512_1_bn1"
522
+ type: "BatchNorm"
523
+ bottom: "layer_256_1_sum"
524
+ top: "layer_512_1_bn1"
525
+ param {
526
+ lr_mult: 0.0
527
+ }
528
+ param {
529
+ lr_mult: 0.0
530
+ }
531
+ param {
532
+ lr_mult: 0.0
533
+ }
534
+ }
535
+ layer {
536
+ name: "layer_512_1_scale1"
537
+ type: "Scale"
538
+ bottom: "layer_512_1_bn1"
539
+ top: "layer_512_1_bn1"
540
+ param {
541
+ lr_mult: 1.0
542
+ decay_mult: 1.0
543
+ }
544
+ param {
545
+ lr_mult: 2.0
546
+ decay_mult: 1.0
547
+ }
548
+ scale_param {
549
+ bias_term: true
550
+ }
551
+ }
552
+ layer {
553
+ name: "layer_512_1_relu1"
554
+ type: "ReLU"
555
+ bottom: "layer_512_1_bn1"
556
+ top: "layer_512_1_bn1"
557
+ }
558
+ layer {
559
+ name: "layer_512_1_conv1_h"
560
+ type: "Convolution"
561
+ bottom: "layer_512_1_bn1"
562
+ top: "layer_512_1_conv1_h"
563
+ param {
564
+ lr_mult: 1.0
565
+ decay_mult: 1.0
566
+ }
567
+ convolution_param {
568
+ num_output: 128
569
+ bias_term: false
570
+ pad: 1
571
+ kernel_size: 3
572
+ stride: 1 # 2
573
+ weight_filler {
574
+ type: "msra"
575
+ }
576
+ bias_filler {
577
+ type: "constant"
578
+ value: 0.0
579
+ }
580
+ }
581
+ }
582
+ layer {
583
+ name: "layer_512_1_bn2_h"
584
+ type: "BatchNorm"
585
+ bottom: "layer_512_1_conv1_h"
586
+ top: "layer_512_1_conv1_h"
587
+ param {
588
+ lr_mult: 0.0
589
+ }
590
+ param {
591
+ lr_mult: 0.0
592
+ }
593
+ param {
594
+ lr_mult: 0.0
595
+ }
596
+ }
597
+ layer {
598
+ name: "layer_512_1_scale2_h"
599
+ type: "Scale"
600
+ bottom: "layer_512_1_conv1_h"
601
+ top: "layer_512_1_conv1_h"
602
+ param {
603
+ lr_mult: 1.0
604
+ decay_mult: 1.0
605
+ }
606
+ param {
607
+ lr_mult: 2.0
608
+ decay_mult: 1.0
609
+ }
610
+ scale_param {
611
+ bias_term: true
612
+ }
613
+ }
614
+ layer {
615
+ name: "layer_512_1_relu2"
616
+ type: "ReLU"
617
+ bottom: "layer_512_1_conv1_h"
618
+ top: "layer_512_1_conv1_h"
619
+ }
620
+ layer {
621
+ name: "layer_512_1_conv2_h"
622
+ type: "Convolution"
623
+ bottom: "layer_512_1_conv1_h"
624
+ top: "layer_512_1_conv2_h"
625
+ param {
626
+ lr_mult: 1.0
627
+ decay_mult: 1.0
628
+ }
629
+ convolution_param {
630
+ num_output: 256
631
+ bias_term: false
632
+ pad: 2 # 1
633
+ kernel_size: 3
634
+ stride: 1
635
+ dilation: 2
636
+ weight_filler {
637
+ type: "msra"
638
+ }
639
+ bias_filler {
640
+ type: "constant"
641
+ value: 0.0
642
+ }
643
+ }
644
+ }
645
+ layer {
646
+ name: "layer_512_1_conv_expand_h"
647
+ type: "Convolution"
648
+ bottom: "layer_512_1_bn1"
649
+ top: "layer_512_1_conv_expand_h"
650
+ param {
651
+ lr_mult: 1.0
652
+ decay_mult: 1.0
653
+ }
654
+ convolution_param {
655
+ num_output: 256
656
+ bias_term: false
657
+ pad: 0
658
+ kernel_size: 1
659
+ stride: 1 # 2
660
+ weight_filler {
661
+ type: "msra"
662
+ }
663
+ bias_filler {
664
+ type: "constant"
665
+ value: 0.0
666
+ }
667
+ }
668
+ }
669
+ layer {
670
+ name: "layer_512_1_sum"
671
+ type: "Eltwise"
672
+ bottom: "layer_512_1_conv2_h"
673
+ bottom: "layer_512_1_conv_expand_h"
674
+ top: "layer_512_1_sum"
675
+ }
676
+ layer {
677
+ name: "last_bn_h"
678
+ type: "BatchNorm"
679
+ bottom: "layer_512_1_sum"
680
+ top: "layer_512_1_sum"
681
+ param {
682
+ lr_mult: 0.0
683
+ }
684
+ param {
685
+ lr_mult: 0.0
686
+ }
687
+ param {
688
+ lr_mult: 0.0
689
+ }
690
+ }
691
+ layer {
692
+ name: "last_scale_h"
693
+ type: "Scale"
694
+ bottom: "layer_512_1_sum"
695
+ top: "layer_512_1_sum"
696
+ param {
697
+ lr_mult: 1.0
698
+ decay_mult: 1.0
699
+ }
700
+ param {
701
+ lr_mult: 2.0
702
+ decay_mult: 1.0
703
+ }
704
+ scale_param {
705
+ bias_term: true
706
+ }
707
+ }
708
+ layer {
709
+ name: "last_relu"
710
+ type: "ReLU"
711
+ bottom: "layer_512_1_sum"
712
+ top: "fc7"
713
+ }
714
+
715
+ layer {
716
+ name: "conv6_1_h"
717
+ type: "Convolution"
718
+ bottom: "fc7"
719
+ top: "conv6_1_h"
720
+ param {
721
+ lr_mult: 1
722
+ decay_mult: 1
723
+ }
724
+ param {
725
+ lr_mult: 2
726
+ decay_mult: 0
727
+ }
728
+ convolution_param {
729
+ num_output: 128
730
+ pad: 0
731
+ kernel_size: 1
732
+ stride: 1
733
+ weight_filler {
734
+ type: "xavier"
735
+ }
736
+ bias_filler {
737
+ type: "constant"
738
+ value: 0
739
+ }
740
+ }
741
+ }
742
+ layer {
743
+ name: "conv6_1_relu"
744
+ type: "ReLU"
745
+ bottom: "conv6_1_h"
746
+ top: "conv6_1_h"
747
+ }
748
+ layer {
749
+ name: "conv6_2_h"
750
+ type: "Convolution"
751
+ bottom: "conv6_1_h"
752
+ top: "conv6_2_h"
753
+ param {
754
+ lr_mult: 1
755
+ decay_mult: 1
756
+ }
757
+ param {
758
+ lr_mult: 2
759
+ decay_mult: 0
760
+ }
761
+ convolution_param {
762
+ num_output: 256
763
+ pad: 1
764
+ kernel_size: 3
765
+ stride: 2
766
+ weight_filler {
767
+ type: "xavier"
768
+ }
769
+ bias_filler {
770
+ type: "constant"
771
+ value: 0
772
+ }
773
+ }
774
+ }
775
+ layer {
776
+ name: "conv6_2_relu"
777
+ type: "ReLU"
778
+ bottom: "conv6_2_h"
779
+ top: "conv6_2_h"
780
+ }
781
+ layer {
782
+ name: "conv7_1_h"
783
+ type: "Convolution"
784
+ bottom: "conv6_2_h"
785
+ top: "conv7_1_h"
786
+ param {
787
+ lr_mult: 1
788
+ decay_mult: 1
789
+ }
790
+ param {
791
+ lr_mult: 2
792
+ decay_mult: 0
793
+ }
794
+ convolution_param {
795
+ num_output: 64
796
+ pad: 0
797
+ kernel_size: 1
798
+ stride: 1
799
+ weight_filler {
800
+ type: "xavier"
801
+ }
802
+ bias_filler {
803
+ type: "constant"
804
+ value: 0
805
+ }
806
+ }
807
+ }
808
+ layer {
809
+ name: "conv7_1_relu"
810
+ type: "ReLU"
811
+ bottom: "conv7_1_h"
812
+ top: "conv7_1_h"
813
+ }
814
+ layer {
815
+ name: "conv7_2_h"
816
+ type: "Convolution"
817
+ bottom: "conv7_1_h"
818
+ top: "conv7_2_h"
819
+ param {
820
+ lr_mult: 1
821
+ decay_mult: 1
822
+ }
823
+ param {
824
+ lr_mult: 2
825
+ decay_mult: 0
826
+ }
827
+ convolution_param {
828
+ num_output: 128
829
+ pad: 1
830
+ kernel_size: 3
831
+ stride: 2
832
+ weight_filler {
833
+ type: "xavier"
834
+ }
835
+ bias_filler {
836
+ type: "constant"
837
+ value: 0
838
+ }
839
+ }
840
+ }
841
+ layer {
842
+ name: "conv7_2_relu"
843
+ type: "ReLU"
844
+ bottom: "conv7_2_h"
845
+ top: "conv7_2_h"
846
+ }
847
+ layer {
848
+ name: "conv8_1_h"
849
+ type: "Convolution"
850
+ bottom: "conv7_2_h"
851
+ top: "conv8_1_h"
852
+ param {
853
+ lr_mult: 1
854
+ decay_mult: 1
855
+ }
856
+ param {
857
+ lr_mult: 2
858
+ decay_mult: 0
859
+ }
860
+ convolution_param {
861
+ num_output: 64
862
+ pad: 0
863
+ kernel_size: 1
864
+ stride: 1
865
+ weight_filler {
866
+ type: "xavier"
867
+ }
868
+ bias_filler {
869
+ type: "constant"
870
+ value: 0
871
+ }
872
+ }
873
+ }
874
+ layer {
875
+ name: "conv8_1_relu"
876
+ type: "ReLU"
877
+ bottom: "conv8_1_h"
878
+ top: "conv8_1_h"
879
+ }
880
+ layer {
881
+ name: "conv8_2_h"
882
+ type: "Convolution"
883
+ bottom: "conv8_1_h"
884
+ top: "conv8_2_h"
885
+ param {
886
+ lr_mult: 1
887
+ decay_mult: 1
888
+ }
889
+ param {
890
+ lr_mult: 2
891
+ decay_mult: 0
892
+ }
893
+ convolution_param {
894
+ num_output: 128
895
+ pad: 1
896
+ kernel_size: 3
897
+ stride: 1
898
+ weight_filler {
899
+ type: "xavier"
900
+ }
901
+ bias_filler {
902
+ type: "constant"
903
+ value: 0
904
+ }
905
+ }
906
+ }
907
+ layer {
908
+ name: "conv8_2_relu"
909
+ type: "ReLU"
910
+ bottom: "conv8_2_h"
911
+ top: "conv8_2_h"
912
+ }
913
+ layer {
914
+ name: "conv9_1_h"
915
+ type: "Convolution"
916
+ bottom: "conv8_2_h"
917
+ top: "conv9_1_h"
918
+ param {
919
+ lr_mult: 1
920
+ decay_mult: 1
921
+ }
922
+ param {
923
+ lr_mult: 2
924
+ decay_mult: 0
925
+ }
926
+ convolution_param {
927
+ num_output: 64
928
+ pad: 0
929
+ kernel_size: 1
930
+ stride: 1
931
+ weight_filler {
932
+ type: "xavier"
933
+ }
934
+ bias_filler {
935
+ type: "constant"
936
+ value: 0
937
+ }
938
+ }
939
+ }
940
+ layer {
941
+ name: "conv9_1_relu"
942
+ type: "ReLU"
943
+ bottom: "conv9_1_h"
944
+ top: "conv9_1_h"
945
+ }
946
+ layer {
947
+ name: "conv9_2_h"
948
+ type: "Convolution"
949
+ bottom: "conv9_1_h"
950
+ top: "conv9_2_h"
951
+ param {
952
+ lr_mult: 1
953
+ decay_mult: 1
954
+ }
955
+ param {
956
+ lr_mult: 2
957
+ decay_mult: 0
958
+ }
959
+ convolution_param {
960
+ num_output: 128
961
+ pad: 1
962
+ kernel_size: 3
963
+ stride: 1
964
+ weight_filler {
965
+ type: "xavier"
966
+ }
967
+ bias_filler {
968
+ type: "constant"
969
+ value: 0
970
+ }
971
+ }
972
+ }
973
+ layer {
974
+ name: "conv9_2_relu"
975
+ type: "ReLU"
976
+ bottom: "conv9_2_h"
977
+ top: "conv9_2_h"
978
+ }
979
+ layer {
980
+ name: "conv4_3_norm"
981
+ type: "Normalize"
982
+ bottom: "layer_256_1_bn1"
983
+ top: "conv4_3_norm"
984
+ norm_param {
985
+ across_spatial: false
986
+ scale_filler {
987
+ type: "constant"
988
+ value: 20
989
+ }
990
+ channel_shared: false
991
+ }
992
+ }
993
+ layer {
994
+ name: "conv4_3_norm_mbox_loc"
995
+ type: "Convolution"
996
+ bottom: "conv4_3_norm"
997
+ top: "conv4_3_norm_mbox_loc"
998
+ param {
999
+ lr_mult: 1
1000
+ decay_mult: 1
1001
+ }
1002
+ param {
1003
+ lr_mult: 2
1004
+ decay_mult: 0
1005
+ }
1006
+ convolution_param {
1007
+ num_output: 16
1008
+ pad: 1
1009
+ kernel_size: 3
1010
+ stride: 1
1011
+ weight_filler {
1012
+ type: "xavier"
1013
+ }
1014
+ bias_filler {
1015
+ type: "constant"
1016
+ value: 0
1017
+ }
1018
+ }
1019
+ }
1020
+ layer {
1021
+ name: "conv4_3_norm_mbox_loc_perm"
1022
+ type: "Permute"
1023
+ bottom: "conv4_3_norm_mbox_loc"
1024
+ top: "conv4_3_norm_mbox_loc_perm"
1025
+ permute_param {
1026
+ order: 0
1027
+ order: 2
1028
+ order: 3
1029
+ order: 1
1030
+ }
1031
+ }
1032
+ layer {
1033
+ name: "conv4_3_norm_mbox_loc_flat"
1034
+ type: "Flatten"
1035
+ bottom: "conv4_3_norm_mbox_loc_perm"
1036
+ top: "conv4_3_norm_mbox_loc_flat"
1037
+ flatten_param {
1038
+ axis: 1
1039
+ }
1040
+ }
1041
+ layer {
1042
+ name: "conv4_3_norm_mbox_conf"
1043
+ type: "Convolution"
1044
+ bottom: "conv4_3_norm"
1045
+ top: "conv4_3_norm_mbox_conf"
1046
+ param {
1047
+ lr_mult: 1
1048
+ decay_mult: 1
1049
+ }
1050
+ param {
1051
+ lr_mult: 2
1052
+ decay_mult: 0
1053
+ }
1054
+ convolution_param {
1055
+ num_output: 8 # 84
1056
+ pad: 1
1057
+ kernel_size: 3
1058
+ stride: 1
1059
+ weight_filler {
1060
+ type: "xavier"
1061
+ }
1062
+ bias_filler {
1063
+ type: "constant"
1064
+ value: 0
1065
+ }
1066
+ }
1067
+ }
1068
+ layer {
1069
+ name: "conv4_3_norm_mbox_conf_perm"
1070
+ type: "Permute"
1071
+ bottom: "conv4_3_norm_mbox_conf"
1072
+ top: "conv4_3_norm_mbox_conf_perm"
1073
+ permute_param {
1074
+ order: 0
1075
+ order: 2
1076
+ order: 3
1077
+ order: 1
1078
+ }
1079
+ }
1080
+ layer {
1081
+ name: "conv4_3_norm_mbox_conf_flat"
1082
+ type: "Flatten"
1083
+ bottom: "conv4_3_norm_mbox_conf_perm"
1084
+ top: "conv4_3_norm_mbox_conf_flat"
1085
+ flatten_param {
1086
+ axis: 1
1087
+ }
1088
+ }
1089
+ layer {
1090
+ name: "conv4_3_norm_mbox_priorbox"
1091
+ type: "PriorBox"
1092
+ bottom: "conv4_3_norm"
1093
+ bottom: "data"
1094
+ top: "conv4_3_norm_mbox_priorbox"
1095
+ prior_box_param {
1096
+ min_size: 30.0
1097
+ max_size: 60.0
1098
+ aspect_ratio: 2
1099
+ flip: true
1100
+ clip: false
1101
+ variance: 0.1
1102
+ variance: 0.1
1103
+ variance: 0.2
1104
+ variance: 0.2
1105
+ step: 8
1106
+ offset: 0.5
1107
+ }
1108
+ }
1109
+ layer {
1110
+ name: "fc7_mbox_loc"
1111
+ type: "Convolution"
1112
+ bottom: "fc7"
1113
+ top: "fc7_mbox_loc"
1114
+ param {
1115
+ lr_mult: 1
1116
+ decay_mult: 1
1117
+ }
1118
+ param {
1119
+ lr_mult: 2
1120
+ decay_mult: 0
1121
+ }
1122
+ convolution_param {
1123
+ num_output: 24
1124
+ pad: 1
1125
+ kernel_size: 3
1126
+ stride: 1
1127
+ weight_filler {
1128
+ type: "xavier"
1129
+ }
1130
+ bias_filler {
1131
+ type: "constant"
1132
+ value: 0
1133
+ }
1134
+ }
1135
+ }
1136
+ layer {
1137
+ name: "fc7_mbox_loc_perm"
1138
+ type: "Permute"
1139
+ bottom: "fc7_mbox_loc"
1140
+ top: "fc7_mbox_loc_perm"
1141
+ permute_param {
1142
+ order: 0
1143
+ order: 2
1144
+ order: 3
1145
+ order: 1
1146
+ }
1147
+ }
1148
+ layer {
1149
+ name: "fc7_mbox_loc_flat"
1150
+ type: "Flatten"
1151
+ bottom: "fc7_mbox_loc_perm"
1152
+ top: "fc7_mbox_loc_flat"
1153
+ flatten_param {
1154
+ axis: 1
1155
+ }
1156
+ }
1157
+ layer {
1158
+ name: "fc7_mbox_conf"
1159
+ type: "Convolution"
1160
+ bottom: "fc7"
1161
+ top: "fc7_mbox_conf"
1162
+ param {
1163
+ lr_mult: 1
1164
+ decay_mult: 1
1165
+ }
1166
+ param {
1167
+ lr_mult: 2
1168
+ decay_mult: 0
1169
+ }
1170
+ convolution_param {
1171
+ num_output: 12 # 126
1172
+ pad: 1
1173
+ kernel_size: 3
1174
+ stride: 1
1175
+ weight_filler {
1176
+ type: "xavier"
1177
+ }
1178
+ bias_filler {
1179
+ type: "constant"
1180
+ value: 0
1181
+ }
1182
+ }
1183
+ }
1184
+ layer {
1185
+ name: "fc7_mbox_conf_perm"
1186
+ type: "Permute"
1187
+ bottom: "fc7_mbox_conf"
1188
+ top: "fc7_mbox_conf_perm"
1189
+ permute_param {
1190
+ order: 0
1191
+ order: 2
1192
+ order: 3
1193
+ order: 1
1194
+ }
1195
+ }
1196
+ layer {
1197
+ name: "fc7_mbox_conf_flat"
1198
+ type: "Flatten"
1199
+ bottom: "fc7_mbox_conf_perm"
1200
+ top: "fc7_mbox_conf_flat"
1201
+ flatten_param {
1202
+ axis: 1
1203
+ }
1204
+ }
1205
+ layer {
1206
+ name: "fc7_mbox_priorbox"
1207
+ type: "PriorBox"
1208
+ bottom: "fc7"
1209
+ bottom: "data"
1210
+ top: "fc7_mbox_priorbox"
1211
+ prior_box_param {
1212
+ min_size: 60.0
1213
+ max_size: 111.0
1214
+ aspect_ratio: 2
1215
+ aspect_ratio: 3
1216
+ flip: true
1217
+ clip: false
1218
+ variance: 0.1
1219
+ variance: 0.1
1220
+ variance: 0.2
1221
+ variance: 0.2
1222
+ step: 16
1223
+ offset: 0.5
1224
+ }
1225
+ }
1226
+ layer {
1227
+ name: "conv6_2_mbox_loc"
1228
+ type: "Convolution"
1229
+ bottom: "conv6_2_h"
1230
+ top: "conv6_2_mbox_loc"
1231
+ param {
1232
+ lr_mult: 1
1233
+ decay_mult: 1
1234
+ }
1235
+ param {
1236
+ lr_mult: 2
1237
+ decay_mult: 0
1238
+ }
1239
+ convolution_param {
1240
+ num_output: 24
1241
+ pad: 1
1242
+ kernel_size: 3
1243
+ stride: 1
1244
+ weight_filler {
1245
+ type: "xavier"
1246
+ }
1247
+ bias_filler {
1248
+ type: "constant"
1249
+ value: 0
1250
+ }
1251
+ }
1252
+ }
1253
+ layer {
1254
+ name: "conv6_2_mbox_loc_perm"
1255
+ type: "Permute"
1256
+ bottom: "conv6_2_mbox_loc"
1257
+ top: "conv6_2_mbox_loc_perm"
1258
+ permute_param {
1259
+ order: 0
1260
+ order: 2
1261
+ order: 3
1262
+ order: 1
1263
+ }
1264
+ }
1265
+ layer {
1266
+ name: "conv6_2_mbox_loc_flat"
1267
+ type: "Flatten"
1268
+ bottom: "conv6_2_mbox_loc_perm"
1269
+ top: "conv6_2_mbox_loc_flat"
1270
+ flatten_param {
1271
+ axis: 1
1272
+ }
1273
+ }
1274
+ layer {
1275
+ name: "conv6_2_mbox_conf"
1276
+ type: "Convolution"
1277
+ bottom: "conv6_2_h"
1278
+ top: "conv6_2_mbox_conf"
1279
+ param {
1280
+ lr_mult: 1
1281
+ decay_mult: 1
1282
+ }
1283
+ param {
1284
+ lr_mult: 2
1285
+ decay_mult: 0
1286
+ }
1287
+ convolution_param {
1288
+ num_output: 12 # 126
1289
+ pad: 1
1290
+ kernel_size: 3
1291
+ stride: 1
1292
+ weight_filler {
1293
+ type: "xavier"
1294
+ }
1295
+ bias_filler {
1296
+ type: "constant"
1297
+ value: 0
1298
+ }
1299
+ }
1300
+ }
1301
+ layer {
1302
+ name: "conv6_2_mbox_conf_perm"
1303
+ type: "Permute"
1304
+ bottom: "conv6_2_mbox_conf"
1305
+ top: "conv6_2_mbox_conf_perm"
1306
+ permute_param {
1307
+ order: 0
1308
+ order: 2
1309
+ order: 3
1310
+ order: 1
1311
+ }
1312
+ }
1313
+ layer {
1314
+ name: "conv6_2_mbox_conf_flat"
1315
+ type: "Flatten"
1316
+ bottom: "conv6_2_mbox_conf_perm"
1317
+ top: "conv6_2_mbox_conf_flat"
1318
+ flatten_param {
1319
+ axis: 1
1320
+ }
1321
+ }
1322
+ layer {
1323
+ name: "conv6_2_mbox_priorbox"
1324
+ type: "PriorBox"
1325
+ bottom: "conv6_2_h"
1326
+ bottom: "data"
1327
+ top: "conv6_2_mbox_priorbox"
1328
+ prior_box_param {
1329
+ min_size: 111.0
1330
+ max_size: 162.0
1331
+ aspect_ratio: 2
1332
+ aspect_ratio: 3
1333
+ flip: true
1334
+ clip: false
1335
+ variance: 0.1
1336
+ variance: 0.1
1337
+ variance: 0.2
1338
+ variance: 0.2
1339
+ step: 32
1340
+ offset: 0.5
1341
+ }
1342
+ }
1343
+ layer {
1344
+ name: "conv7_2_mbox_loc"
1345
+ type: "Convolution"
1346
+ bottom: "conv7_2_h"
1347
+ top: "conv7_2_mbox_loc"
1348
+ param {
1349
+ lr_mult: 1
1350
+ decay_mult: 1
1351
+ }
1352
+ param {
1353
+ lr_mult: 2
1354
+ decay_mult: 0
1355
+ }
1356
+ convolution_param {
1357
+ num_output: 24
1358
+ pad: 1
1359
+ kernel_size: 3
1360
+ stride: 1
1361
+ weight_filler {
1362
+ type: "xavier"
1363
+ }
1364
+ bias_filler {
1365
+ type: "constant"
1366
+ value: 0
1367
+ }
1368
+ }
1369
+ }
1370
+ layer {
1371
+ name: "conv7_2_mbox_loc_perm"
1372
+ type: "Permute"
1373
+ bottom: "conv7_2_mbox_loc"
1374
+ top: "conv7_2_mbox_loc_perm"
1375
+ permute_param {
1376
+ order: 0
1377
+ order: 2
1378
+ order: 3
1379
+ order: 1
1380
+ }
1381
+ }
1382
+ layer {
1383
+ name: "conv7_2_mbox_loc_flat"
1384
+ type: "Flatten"
1385
+ bottom: "conv7_2_mbox_loc_perm"
1386
+ top: "conv7_2_mbox_loc_flat"
1387
+ flatten_param {
1388
+ axis: 1
1389
+ }
1390
+ }
1391
+ layer {
1392
+ name: "conv7_2_mbox_conf"
1393
+ type: "Convolution"
1394
+ bottom: "conv7_2_h"
1395
+ top: "conv7_2_mbox_conf"
1396
+ param {
1397
+ lr_mult: 1
1398
+ decay_mult: 1
1399
+ }
1400
+ param {
1401
+ lr_mult: 2
1402
+ decay_mult: 0
1403
+ }
1404
+ convolution_param {
1405
+ num_output: 12 # 126
1406
+ pad: 1
1407
+ kernel_size: 3
1408
+ stride: 1
1409
+ weight_filler {
1410
+ type: "xavier"
1411
+ }
1412
+ bias_filler {
1413
+ type: "constant"
1414
+ value: 0
1415
+ }
1416
+ }
1417
+ }
1418
+ layer {
1419
+ name: "conv7_2_mbox_conf_perm"
1420
+ type: "Permute"
1421
+ bottom: "conv7_2_mbox_conf"
1422
+ top: "conv7_2_mbox_conf_perm"
1423
+ permute_param {
1424
+ order: 0
1425
+ order: 2
1426
+ order: 3
1427
+ order: 1
1428
+ }
1429
+ }
1430
+ layer {
1431
+ name: "conv7_2_mbox_conf_flat"
1432
+ type: "Flatten"
1433
+ bottom: "conv7_2_mbox_conf_perm"
1434
+ top: "conv7_2_mbox_conf_flat"
1435
+ flatten_param {
1436
+ axis: 1
1437
+ }
1438
+ }
1439
+ layer {
1440
+ name: "conv7_2_mbox_priorbox"
1441
+ type: "PriorBox"
1442
+ bottom: "conv7_2_h"
1443
+ bottom: "data"
1444
+ top: "conv7_2_mbox_priorbox"
1445
+ prior_box_param {
1446
+ min_size: 162.0
1447
+ max_size: 213.0
1448
+ aspect_ratio: 2
1449
+ aspect_ratio: 3
1450
+ flip: true
1451
+ clip: false
1452
+ variance: 0.1
1453
+ variance: 0.1
1454
+ variance: 0.2
1455
+ variance: 0.2
1456
+ step: 64
1457
+ offset: 0.5
1458
+ }
1459
+ }
1460
+ layer {
1461
+ name: "conv8_2_mbox_loc"
1462
+ type: "Convolution"
1463
+ bottom: "conv8_2_h"
1464
+ top: "conv8_2_mbox_loc"
1465
+ param {
1466
+ lr_mult: 1
1467
+ decay_mult: 1
1468
+ }
1469
+ param {
1470
+ lr_mult: 2
1471
+ decay_mult: 0
1472
+ }
1473
+ convolution_param {
1474
+ num_output: 16
1475
+ pad: 1
1476
+ kernel_size: 3
1477
+ stride: 1
1478
+ weight_filler {
1479
+ type: "xavier"
1480
+ }
1481
+ bias_filler {
1482
+ type: "constant"
1483
+ value: 0
1484
+ }
1485
+ }
1486
+ }
1487
+ layer {
1488
+ name: "conv8_2_mbox_loc_perm"
1489
+ type: "Permute"
1490
+ bottom: "conv8_2_mbox_loc"
1491
+ top: "conv8_2_mbox_loc_perm"
1492
+ permute_param {
1493
+ order: 0
1494
+ order: 2
1495
+ order: 3
1496
+ order: 1
1497
+ }
1498
+ }
1499
+ layer {
1500
+ name: "conv8_2_mbox_loc_flat"
1501
+ type: "Flatten"
1502
+ bottom: "conv8_2_mbox_loc_perm"
1503
+ top: "conv8_2_mbox_loc_flat"
1504
+ flatten_param {
1505
+ axis: 1
1506
+ }
1507
+ }
1508
+ layer {
1509
+ name: "conv8_2_mbox_conf"
1510
+ type: "Convolution"
1511
+ bottom: "conv8_2_h"
1512
+ top: "conv8_2_mbox_conf"
1513
+ param {
1514
+ lr_mult: 1
1515
+ decay_mult: 1
1516
+ }
1517
+ param {
1518
+ lr_mult: 2
1519
+ decay_mult: 0
1520
+ }
1521
+ convolution_param {
1522
+ num_output: 8 # 84
1523
+ pad: 1
1524
+ kernel_size: 3
1525
+ stride: 1
1526
+ weight_filler {
1527
+ type: "xavier"
1528
+ }
1529
+ bias_filler {
1530
+ type: "constant"
1531
+ value: 0
1532
+ }
1533
+ }
1534
+ }
1535
+ layer {
1536
+ name: "conv8_2_mbox_conf_perm"
1537
+ type: "Permute"
1538
+ bottom: "conv8_2_mbox_conf"
1539
+ top: "conv8_2_mbox_conf_perm"
1540
+ permute_param {
1541
+ order: 0
1542
+ order: 2
1543
+ order: 3
1544
+ order: 1
1545
+ }
1546
+ }
1547
+ layer {
1548
+ name: "conv8_2_mbox_conf_flat"
1549
+ type: "Flatten"
1550
+ bottom: "conv8_2_mbox_conf_perm"
1551
+ top: "conv8_2_mbox_conf_flat"
1552
+ flatten_param {
1553
+ axis: 1
1554
+ }
1555
+ }
1556
+ layer {
1557
+ name: "conv8_2_mbox_priorbox"
1558
+ type: "PriorBox"
1559
+ bottom: "conv8_2_h"
1560
+ bottom: "data"
1561
+ top: "conv8_2_mbox_priorbox"
1562
+ prior_box_param {
1563
+ min_size: 213.0
1564
+ max_size: 264.0
1565
+ aspect_ratio: 2
1566
+ flip: true
1567
+ clip: false
1568
+ variance: 0.1
1569
+ variance: 0.1
1570
+ variance: 0.2
1571
+ variance: 0.2
1572
+ step: 100
1573
+ offset: 0.5
1574
+ }
1575
+ }
1576
+ layer {
1577
+ name: "conv9_2_mbox_loc"
1578
+ type: "Convolution"
1579
+ bottom: "conv9_2_h"
1580
+ top: "conv9_2_mbox_loc"
1581
+ param {
1582
+ lr_mult: 1
1583
+ decay_mult: 1
1584
+ }
1585
+ param {
1586
+ lr_mult: 2
1587
+ decay_mult: 0
1588
+ }
1589
+ convolution_param {
1590
+ num_output: 16
1591
+ pad: 1
1592
+ kernel_size: 3
1593
+ stride: 1
1594
+ weight_filler {
1595
+ type: "xavier"
1596
+ }
1597
+ bias_filler {
1598
+ type: "constant"
1599
+ value: 0
1600
+ }
1601
+ }
1602
+ }
1603
+ layer {
1604
+ name: "conv9_2_mbox_loc_perm"
1605
+ type: "Permute"
1606
+ bottom: "conv9_2_mbox_loc"
1607
+ top: "conv9_2_mbox_loc_perm"
1608
+ permute_param {
1609
+ order: 0
1610
+ order: 2
1611
+ order: 3
1612
+ order: 1
1613
+ }
1614
+ }
1615
+ layer {
1616
+ name: "conv9_2_mbox_loc_flat"
1617
+ type: "Flatten"
1618
+ bottom: "conv9_2_mbox_loc_perm"
1619
+ top: "conv9_2_mbox_loc_flat"
1620
+ flatten_param {
1621
+ axis: 1
1622
+ }
1623
+ }
1624
+ layer {
1625
+ name: "conv9_2_mbox_conf"
1626
+ type: "Convolution"
1627
+ bottom: "conv9_2_h"
1628
+ top: "conv9_2_mbox_conf"
1629
+ param {
1630
+ lr_mult: 1
1631
+ decay_mult: 1
1632
+ }
1633
+ param {
1634
+ lr_mult: 2
1635
+ decay_mult: 0
1636
+ }
1637
+ convolution_param {
1638
+ num_output: 8 # 84
1639
+ pad: 1
1640
+ kernel_size: 3
1641
+ stride: 1
1642
+ weight_filler {
1643
+ type: "xavier"
1644
+ }
1645
+ bias_filler {
1646
+ type: "constant"
1647
+ value: 0
1648
+ }
1649
+ }
1650
+ }
1651
+ layer {
1652
+ name: "conv9_2_mbox_conf_perm"
1653
+ type: "Permute"
1654
+ bottom: "conv9_2_mbox_conf"
1655
+ top: "conv9_2_mbox_conf_perm"
1656
+ permute_param {
1657
+ order: 0
1658
+ order: 2
1659
+ order: 3
1660
+ order: 1
1661
+ }
1662
+ }
1663
+ layer {
1664
+ name: "conv9_2_mbox_conf_flat"
1665
+ type: "Flatten"
1666
+ bottom: "conv9_2_mbox_conf_perm"
1667
+ top: "conv9_2_mbox_conf_flat"
1668
+ flatten_param {
1669
+ axis: 1
1670
+ }
1671
+ }
1672
+ layer {
1673
+ name: "conv9_2_mbox_priorbox"
1674
+ type: "PriorBox"
1675
+ bottom: "conv9_2_h"
1676
+ bottom: "data"
1677
+ top: "conv9_2_mbox_priorbox"
1678
+ prior_box_param {
1679
+ min_size: 264.0
1680
+ max_size: 315.0
1681
+ aspect_ratio: 2
1682
+ flip: true
1683
+ clip: false
1684
+ variance: 0.1
1685
+ variance: 0.1
1686
+ variance: 0.2
1687
+ variance: 0.2
1688
+ step: 300
1689
+ offset: 0.5
1690
+ }
1691
+ }
1692
+ layer {
1693
+ name: "mbox_loc"
1694
+ type: "Concat"
1695
+ bottom: "conv4_3_norm_mbox_loc_flat"
1696
+ bottom: "fc7_mbox_loc_flat"
1697
+ bottom: "conv6_2_mbox_loc_flat"
1698
+ bottom: "conv7_2_mbox_loc_flat"
1699
+ bottom: "conv8_2_mbox_loc_flat"
1700
+ bottom: "conv9_2_mbox_loc_flat"
1701
+ top: "mbox_loc"
1702
+ concat_param {
1703
+ axis: 1
1704
+ }
1705
+ }
1706
+ layer {
1707
+ name: "mbox_conf"
1708
+ type: "Concat"
1709
+ bottom: "conv4_3_norm_mbox_conf_flat"
1710
+ bottom: "fc7_mbox_conf_flat"
1711
+ bottom: "conv6_2_mbox_conf_flat"
1712
+ bottom: "conv7_2_mbox_conf_flat"
1713
+ bottom: "conv8_2_mbox_conf_flat"
1714
+ bottom: "conv9_2_mbox_conf_flat"
1715
+ top: "mbox_conf"
1716
+ concat_param {
1717
+ axis: 1
1718
+ }
1719
+ }
1720
+ layer {
1721
+ name: "mbox_priorbox"
1722
+ type: "Concat"
1723
+ bottom: "conv4_3_norm_mbox_priorbox"
1724
+ bottom: "fc7_mbox_priorbox"
1725
+ bottom: "conv6_2_mbox_priorbox"
1726
+ bottom: "conv7_2_mbox_priorbox"
1727
+ bottom: "conv8_2_mbox_priorbox"
1728
+ bottom: "conv9_2_mbox_priorbox"
1729
+ top: "mbox_priorbox"
1730
+ concat_param {
1731
+ axis: 2
1732
+ }
1733
+ }
1734
+
1735
+ layer {
1736
+ name: "mbox_conf_reshape"
1737
+ type: "Reshape"
1738
+ bottom: "mbox_conf"
1739
+ top: "mbox_conf_reshape"
1740
+ reshape_param {
1741
+ shape {
1742
+ dim: 0
1743
+ dim: -1
1744
+ dim: 2
1745
+ }
1746
+ }
1747
+ }
1748
+ layer {
1749
+ name: "mbox_conf_softmax"
1750
+ type: "Softmax"
1751
+ bottom: "mbox_conf_reshape"
1752
+ top: "mbox_conf_softmax"
1753
+ softmax_param {
1754
+ axis: 2
1755
+ }
1756
+ }
1757
+ layer {
1758
+ name: "mbox_conf_flatten"
1759
+ type: "Flatten"
1760
+ bottom: "mbox_conf_softmax"
1761
+ top: "mbox_conf_flatten"
1762
+ flatten_param {
1763
+ axis: 1
1764
+ }
1765
+ }
1766
+
1767
+ layer {
1768
+ name: "detection_out"
1769
+ type: "DetectionOutput"
1770
+ bottom: "mbox_loc"
1771
+ bottom: "mbox_conf_flatten"
1772
+ bottom: "mbox_priorbox"
1773
+ top: "detection_out"
1774
+ include {
1775
+ phase: TEST
1776
+ }
1777
+ detection_output_param {
1778
+ num_classes: 2
1779
+ share_location: true
1780
+ background_label_id: 0
1781
+ nms_param {
1782
+ nms_threshold: 0.3
1783
+ top_k: 400
1784
+ }
1785
+ code_type: CENTER_SIZE
1786
+ keep_top_k: 200
1787
+ confidence_threshold: 0.01
1788
+ }
1789
+ }
download_models.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import cv2
4
+ import numpy as np
5
+ import io
6
+ import sys
7
+ import numpy as np
8
+ import timm
9
+ import pyiqa
10
+ import torch
11
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
12
+
13
+
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ licence_model = torch.hub.load(
17
+ "ultralytics/yolov5", "custom", path="Licenseplate_model.pt", device="cpu", force_reload=True
18
+ )
19
+ licence_model.cpu()
20
+
21
+ detector = cv2.dnn.DetectionModel("res10_300x300_ssd_iter_140000_fp16.caffemodel", "deploy.prototxt")
22
+
23
+ processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
24
+ doc_qa_model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
25
+
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ doc_qa_model.to(device)
28
+
29
+ model = torch.hub.load(
30
+ "ultralytics/yolov5", "custom", path="best.pt", device="cpu", force_reload=True
31
+ )
32
+ model.cpu()
33
+
34
+ classes = [
35
+ "gas-distribution-meter",
36
+ "gas-distribution-piping",
37
+ "gas-distribution-regulator",
38
+ "gas-distribution-valve"
39
+ ]
40
+
41
+ class_to_idx = {'gas-distribution-meter': 0,
42
+ 'gas-distribution-piping': 1,
43
+ 'gas-distribution-regulator': 2,
44
+ 'gas-distribution-valve': 3}
45
+
46
+ idx_to_classes = {v:k for k,v in class_to_idx.items()}
47
+ modelname = "resnet50d"
48
+ model_weights = "best_classifer_model.pt"
49
+ num_classes = len(classes)
50
+
51
+ classifier_model = timm.create_model(
52
+ "resnet50d", pretrained=True, num_classes=num_classes, drop_path_rate=0.05
53
+ )
54
+ classifier_model.load_state_dict(torch.load(model_weights, map_location=torch.device('cpu'))["model_state_dict"])
55
+
56
+ musiq_metric = pyiqa.create_metric('musiq-koniq', device=torch.device('cpu'))
requirements.txt ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 requirements
2
+ # Usage: pip install -r requirements.txt
3
+
4
+ # Base ------------------------------------------------------------------------
5
+ gitpython
6
+ ipython # interactive notebook
7
+ matplotlib>=3.2.2
8
+ numpy>=1.18.5
9
+ opencv-python>=4.1.1
10
+ Pillow>=7.1.2
11
+ psutil # system resources
12
+ PyYAML>=5.3.1
13
+ requests>=2.23.0
14
+ scipy>=1.4.1
15
+ thop>=0.1.1 # FLOPs computation
16
+ torch>=1.7.0 # see https://pytorch.org/get-started/locally (recommended)
17
+ torchvision>=0.8.1
18
+ tqdm>=4.64.0
19
+ # protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
20
+ fastapi
21
+ python-multipart
22
+ uvicorn
23
+ timm==0.5.4
24
+ pytorch-accelerated
25
+ tensorflow-hub
26
+ pyiqa
27
+ protobuf==3.20.*
28
+ transformers
29
+ sentencepiece
30
+ sentence-transformers
31
+ # Logging ---------------------------------------------------------------------
32
+ tensorboard>=2.4.1
33
+ # clearml>=1.2.0
34
+ # comet
35
+
36
+ # Plotting --------------------------------------------------------------------
37
+ pandas>=1.1.4
38
+ seaborn>=0.11.0
39
+ pinecone-client
40
+ python-dotenv
41
+ # Export ----------------------------------------------------------------------
42
+ coremltools>=6.0 # CoreML export
43
+ onnx>=1.12.0 # ONNX export
44
+ onnx-simplifier>=0.4.1 # ONNX simplifier
45
+ # nvidia-pyindex # TensorRT export
46
+ # nvidia-tensorrt # TensorRT export
47
+ scikit-learn<=1.1.2 # CoreML quantization
48
+ tensorflow>=2.4.1 # TF exports (-cpu, -aarch64, -macos)
49
+ # tensorflowjs>=3.9.0 # TF.js export
50
+ # openvino-dev # OpenVINO export
51
+
52
+ # Deploy ----------------------------------------------------------------------
53
+ # tritonclient[all]~=2.24.0
54
+
55
+ # Extras ----------------------------------------------------------------------
56
+ # mss # screenshots
57
+ albumentations>=1.0.3
58
+ pycocotools>=2.0.6 # COCO mAP
59
+ # roboflow
60
+ ultralytics # HUB https://hub.ultralytics.com
res10_300x300_ssd_iter_140000_fp16.caffemodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510ffd2471bd81e3fcc88a5beb4eae4fb445ccf8333ebc54e7302b83f4158a76
3
+ size 5351047
run_cmds.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ docker build -t abhi001vj/object_detection_backend .
2
+ sudo kill -9 $(sudo lsof -t -i:8000)
3
+ docker run -p 8000:8000 abhi001vj/object_detection_backend
4
+
5
+ uvicorn app:app --port 8000 --reload