lohitkavuru14 commited on
Commit
cb5c22d
1 Parent(s): 6f0cd6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +373 -0
app.py CHANGED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import fileinput
3
+ import itertools
4
+ import os
5
+ import re
6
+ from copy import deepcopy
7
+ from operator import itemgetter
8
+ from pathlib import Path
9
+ from typing import Union
10
+
11
+ !pip install opencv-python
12
+ !pip install --quiet gradio==2.9.0b0
13
+ import cv2 # type: ignore
14
+ import gradio as gr # type: ignore
15
+ gr.Interface(lambda x:x, "textbox", "textbox").launch()
16
+ import numpy as np
17
+ import torch
18
+ from deep_sort_realtime.deepsort_tracker import DeepSort # type: ignore
19
+ from paddleocr import PaddleOCR # type: ignore
20
+
21
+ if not os.path.isfile("weights.pt"):
22
+ weights_url = "https://archive.org/download/anpr_weights/weights.pt"
23
+ os.system(f"wget {weights_url}")
24
+
25
+ if not os.path.isdir("examples"):
26
+ examples_url = "https://archive.org/download/anpr_examples_202208/examples.tar.gz"
27
+ os.system(f"wget {examples_url}")
28
+ os.system("tar -xvf examples.tar.gz")
29
+ os.system("rm -rf examples.tar.gz")
30
+
31
+
32
+ def prepend_text(filename: Union[str, Path], text: str):
33
+ with fileinput.input(filename, inplace=True) as file:
34
+ for line in file:
35
+ if file.isfirstline():
36
+ print(text)
37
+ print(line, end="")
38
+
39
+
40
+ if not os.path.isdir("yolov7"):
41
+ yolov7_repo_url = "https://github.com/WongKinYiu/yolov7"
42
+ os.system(f"git clone {yolov7_repo_url}")
43
+ # Fix import errors
44
+ for file in [
45
+ "yolov7/models/common.py",
46
+ "yolov7/models/experimental.py",
47
+ "yolov7/models/yolo.py",
48
+ "yolov7/utils/datasets.py",
49
+ ]:
50
+ prepend_text(file, "import sys\nsys.path.insert(0, './yolov7')")
51
+
52
+ from yolov7.models.experimental import attempt_load # type: ignore
53
+ from yolov7.utils.datasets import letterbox # type: ignore
54
+ from yolov7.utils.general import check_img_size # type: ignore
55
+ from yolov7.utils.general import non_max_suppression # type: ignore
56
+ from yolov7.utils.general import scale_coords # type: ignore
57
+ from yolov7.utils.plots import plot_one_box # type: ignore
58
+ from yolov7.utils.torch_utils import TracedModel, select_device # type: ignore
59
+
60
+ weights = "weights.pt"
61
+ device_id = "cpu"
62
+ image_size = 640
63
+ trace = True
64
+
65
+ # Initialize
66
+ device = select_device(device_id)
67
+ half = device.type != "cpu" # half precision only supported on CUDA
68
+
69
+ # Load model
70
+ model = attempt_load(weights, map_location=device) # load FP32 model
71
+ stride = int(model.stride.max()) # model stride
72
+ imgsz = check_img_size(image_size, s=stride) # check img_size
73
+
74
+ if trace:
75
+ model = TracedModel(model, device, image_size)
76
+
77
+ if half:
78
+ model.half() # to FP16
79
+
80
+ if device.type != "cpu":
81
+ model(
82
+ torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))
83
+ ) # run once
84
+
85
+ model.eval()
86
+
87
+ # Load OCR
88
+
89
+ paddle = PaddleOCR(lang="en")
90
+
91
+
92
+ def detect_plate(source_image):
93
+ # Padded resize
94
+ img_size = 640
95
+ stride = 32
96
+ img = letterbox(source_image, img_size, stride=stride)[0]
97
+
98
+ # Convert
99
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
100
+ img = np.ascontiguousarray(img)
101
+ img = torch.from_numpy(img).to(device)
102
+ img = img.half() if half else img.float() # uint8 to fp16/32
103
+ img /= 255.0 # 0 - 255 to 0.0 - 1.0
104
+ if img.ndimension() == 3:
105
+ img = img.unsqueeze(0)
106
+
107
+ with torch.no_grad():
108
+ # Inference
109
+ pred = model(img, augment=True)[0]
110
+
111
+ # Apply NMS
112
+ pred = non_max_suppression(pred, 0.25, 0.45, classes=0, agnostic=True)
113
+
114
+ plate_detections = []
115
+ det_confidences = []
116
+
117
+ # Process detections
118
+ for i, det in enumerate(pred): # detections per image
119
+ if len(det):
120
+ # Rescale boxes from img_size to source image size
121
+ det[:, :4] = scale_coords(
122
+ img.shape[2:], det[:, :4], source_image.shape
123
+ ).round()
124
+
125
+ # Return results
126
+ for *xyxy, conf, cls in reversed(det):
127
+ coords = [
128
+ int(position)
129
+ for position in (torch.tensor(xyxy).view(1, 4)).tolist()[0]
130
+ ]
131
+ plate_detections.append(coords)
132
+ det_confidences.append(conf.item())
133
+
134
+ return plate_detections, det_confidences
135
+
136
+
137
+ def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=2.0, threshold=0):
138
+ blurred = cv2.GaussianBlur(image, kernel_size, sigma)
139
+ sharpened = float(amount + 1) * image - float(amount) * blurred
140
+ sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
141
+ sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
142
+ sharpened = sharpened.round().astype(np.uint8)
143
+ if threshold > 0:
144
+ low_contrast_mask = np.absolute(image - blurred) < threshold
145
+ np.copyto(sharpened, image, where=low_contrast_mask)
146
+ return sharpened
147
+
148
+
149
+ def crop(image, coord):
150
+ cropped_image = image[int(coord[1]) : int(coord[3]), int(coord[0]) : int(coord[2])]
151
+ return cropped_image
152
+
153
+
154
+ def ocr_plate(plate_region):
155
+ # Image pre-processing for more accurate OCR
156
+ rescaled = cv2.resize(
157
+ plate_region, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC
158
+ )
159
+ grayscale = cv2.cvtColor(rescaled, cv2.COLOR_BGR2GRAY)
160
+ kernel = np.ones((1, 1), np.uint8)
161
+ dilated = cv2.dilate(grayscale, kernel, iterations=1)
162
+ eroded = cv2.erode(dilated, kernel, iterations=1)
163
+ sharpened = unsharp_mask(eroded)
164
+
165
+ # OCR the preprocessed image
166
+ results = paddle.ocr(sharpened, det=False, cls=False)
167
+ flattened = list(itertools.chain.from_iterable(results))
168
+ plate_text, ocr_confidence = max(flattened, key=itemgetter(1), default=("", 0))
169
+
170
+ # Filter out anything but uppercase letters, digits, hypens and whitespace.
171
+ plate_text = re.sub(r"[^-A-Z0-9 ]", r"", plate_text).strip()
172
+
173
+ if ocr_confidence == "nan":
174
+ ocr_confidence = 0
175
+
176
+ return plate_text, ocr_confidence
177
+
178
+
179
+ def get_plates_from_image(input):
180
+ if input is None:
181
+ return None
182
+ plate_detections, det_confidences = detect_plate(input)
183
+ plate_texts = []
184
+ ocr_confidences = []
185
+ detected_image = deepcopy(input)
186
+ for coords in plate_detections:
187
+ plate_region = crop(input, coords)
188
+ plate_text, ocr_confidence = ocr_plate(plate_region)
189
+ if ocr_confidence == 0: # If OCR confidence is 0, skip this detection
190
+ continue
191
+ plate_texts.append(plate_text)
192
+ ocr_confidences.append(ocr_confidence)
193
+ plot_one_box(
194
+ coords,
195
+ detected_image,
196
+ label=plate_text,
197
+ color=[0, 150, 255],
198
+ line_thickness=2,
199
+ )
200
+ return detected_image
201
+
202
+
203
+ def pascal_voc_to_coco(x1y1x2y2):
204
+ x1, y1, x2, y2 = x1y1x2y2
205
+ return [x1, y1, x2 - x1, y2 - y1]
206
+
207
+
208
+ def get_best_ocr(preds, rec_conf, ocr_res, track_id):
209
+ for info in preds:
210
+ # Check if it is current track id
211
+ if info["track_id"] == track_id:
212
+ # Check if the ocr confidenence is maximum or not
213
+ if info["ocr_conf"] < rec_conf:
214
+ info["ocr_conf"] = rec_conf
215
+ info["ocr_txt"] = ocr_res
216
+ else:
217
+ rec_conf = info["ocr_conf"]
218
+ ocr_res = info["ocr_txt"]
219
+ break
220
+ return preds, rec_conf, ocr_res
221
+
222
+
223
+ def get_plates_from_video(source):
224
+ if source is None:
225
+ return None
226
+
227
+ # Create a VideoCapture object
228
+ video = cv2.VideoCapture(source)
229
+
230
+ # Default resolutions of the frame are obtained. The default resolutions are system dependent.
231
+ # We convert the resolutions from float to integer.
232
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
233
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
234
+ fps = video.get(cv2.CAP_PROP_FPS)
235
+
236
+ # Define the codec and create VideoWriter object.
237
+ temp = f"{Path(source).stem}_temp{Path(source).suffix}"
238
+ export = cv2.VideoWriter(
239
+ temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
240
+ )
241
+
242
+ # Intializing tracker
243
+ tracker = DeepSort(embedder_gpu=False)
244
+
245
+ # Initializing some helper variables.
246
+ preds = []
247
+ total_obj = 0
248
+
249
+ while True:
250
+ ret, frame = video.read()
251
+ if ret == True:
252
+ # Run the ANPR algorithm
253
+ bboxes, scores = detect_plate(frame)
254
+ # Convert Pascal VOC detections to COCO
255
+ bboxes = list(map(lambda bbox: pascal_voc_to_coco(bbox), bboxes))
256
+
257
+ if len(bboxes) > 0:
258
+ # Storing all the required info in a list.
259
+ detections = [
260
+ (bbox, score, "number_plate") for bbox, score in zip(bboxes, scores)
261
+ ]
262
+
263
+ # Applying tracker.
264
+ # The tracker code flow: kalman filter -> target association(using hungarian algorithm) and appearance descriptor.
265
+ tracks = tracker.update_tracks(detections, frame=frame)
266
+
267
+ # Checking if tracks exist.
268
+ for track in tracks:
269
+ if not track.is_confirmed() or track.time_since_update > 1:
270
+ continue
271
+
272
+ # Changing track bbox to top left, bottom right coordinates
273
+ bbox = [int(position) for position in list(track.to_tlbr())]
274
+
275
+ for i in range(len(bbox)):
276
+ if bbox[i] < 0:
277
+ bbox[i] = 0
278
+
279
+ # Cropping the license plate and applying the OCR.
280
+ plate_region = crop(frame, bbox)
281
+ plate_text, ocr_confidence = ocr_plate(plate_region)
282
+
283
+ # Storing the ocr output for corresponding track id.
284
+ output_frame = {
285
+ "track_id": track.track_id,
286
+ "ocr_txt": plate_text,
287
+ "ocr_conf": ocr_confidence,
288
+ }
289
+
290
+ # Appending track_id to list only if it does not exist in the list
291
+ # else looking for the current track in the list and updating the highest confidence of it.
292
+ if track.track_id not in list(
293
+ set(pred["track_id"] for pred in preds)
294
+ ):
295
+ total_obj += 1
296
+ preds.append(output_frame)
297
+ else:
298
+ preds, ocr_confidence, plate_text = get_best_ocr(
299
+ preds, ocr_confidence, plate_text, track.track_id
300
+ )
301
+
302
+ # Plotting the prediction.
303
+ plot_one_box(
304
+ bbox,
305
+ frame,
306
+ label=f"{str(track.track_id)}. {plate_text}",
307
+ color=[255, 150, 0],
308
+ line_thickness=3,
309
+ )
310
+
311
+ # Write the frame into the output file
312
+ export.write(frame)
313
+ else:
314
+ break
315
+
316
+ # When everything done, release the video capture and video write objects
317
+
318
+ video.release()
319
+ export.release()
320
+
321
+ # Compressing the output video for smaller size and web compatibility.
322
+ output = f"{Path(source).stem}_detected{Path(source).suffix}"
323
+ os.system(
324
+ f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
325
+ )
326
+ os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
327
+
328
+ return output
329
+
330
+
331
+ with gr.Blocks() as demo:
332
+ gr.Markdown('### <h3 align="center">Automatic Number Plate Recognition</h3>')
333
+ gr.Markdown(
334
+ "This AI was trained to detect and recognize number plates on vehicles."
335
+ )
336
+ with gr.Tabs():
337
+ with gr.TabItem("Image"):
338
+ with gr.Row():
339
+ image_input = gr.Image()
340
+ image_output = gr.Image()
341
+ image_input.change(
342
+ get_plates_from_image, inputs=image_input, outputs=image_output
343
+ )
344
+ gr.Examples(
345
+ [
346
+ ["examples/test_image_1.jpg"],
347
+ ["examples/test_image_2.jpg"],
348
+ ["examples/test_image_3.png"],
349
+ ["examples/test_image_4.jpeg"],
350
+ ],
351
+ [image_input],
352
+ image_output,
353
+ get_plates_from_image,
354
+ cache_examples=True,
355
+ )
356
+ with gr.TabItem("Video"):
357
+ with gr.Row():
358
+ video_input = gr.Video(format="mp4")
359
+ video_output = gr.Video(format="mp4")
360
+ video_input.change(
361
+ get_plates_from_video, inputs=video_input, outputs=video_output
362
+ )
363
+ gr.Examples(
364
+ [["examples/test_video_1.mp4"]],
365
+ [video_input],
366
+ video_output,
367
+ get_plates_from_video,
368
+ cache_examples=True,
369
+ )
370
+ gr.Markdown("[@itsyoboieltr](https://github.com/itsyoboieltr)")
371
+
372
+ demo.launch()
373
+