Spaces:
Running
Running
Abhishek Gola
commited on
Commit
·
c6bbca4
1
Parent(s):
9708ebf
Added object detection yolox to opencv spaces
Browse files
README.md
CHANGED
@@ -7,6 +7,12 @@ sdk: gradio
|
|
7 |
sdk_version: 5.34.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
7 |
sdk_version: 5.34.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: Object detection with yolox using OpenCV
|
11 |
+
tags:
|
12 |
+
- opencv
|
13 |
+
- object-detection
|
14 |
+
- yolo
|
15 |
+
- yolox
|
16 |
---
|
17 |
|
18 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2 as cv
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
from yolox import YoloX
|
6 |
+
|
7 |
+
# Download YOLOX model from Hugging Face (optional fallback)
|
8 |
+
model_path = hf_hub_download(
|
9 |
+
repo_id="opencv/object_detection_yolox",
|
10 |
+
filename="object_detection_yolox_2022nov.onnx"
|
11 |
+
)
|
12 |
+
|
13 |
+
# Initialize YOLOX model
|
14 |
+
model = YoloX(
|
15 |
+
modelPath=model_path,
|
16 |
+
confThreshold=0.5,
|
17 |
+
nmsThreshold=0.5,
|
18 |
+
objThreshold=0.5,
|
19 |
+
backendId=cv.dnn.DNN_BACKEND_OPENCV,
|
20 |
+
targetId=cv.dnn.DNN_TARGET_CPU
|
21 |
+
)
|
22 |
+
|
23 |
+
classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
24 |
+
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
25 |
+
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
26 |
+
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
|
27 |
+
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
28 |
+
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
|
29 |
+
'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
|
30 |
+
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
|
31 |
+
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
|
32 |
+
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
33 |
+
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
|
34 |
+
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
|
35 |
+
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
|
36 |
+
'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
|
37 |
+
|
38 |
+
def letterbox(srcimg, target_size=(640, 640)):
|
39 |
+
padded_img = np.ones((target_size[0], target_size[1], 3), dtype=np.float32) * 114.0
|
40 |
+
ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
|
41 |
+
resized_img = cv.resize(srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR).astype(np.float32)
|
42 |
+
padded_img[:int(srcimg.shape[0] * ratio), :int(srcimg.shape[1] * ratio)] = resized_img
|
43 |
+
return padded_img, ratio
|
44 |
+
|
45 |
+
def unletterbox(bbox, scale):
|
46 |
+
return bbox / scale
|
47 |
+
|
48 |
+
def visualize(dets, image, scale):
|
49 |
+
res_img = image.copy()
|
50 |
+
h, w = res_img.shape[:2]
|
51 |
+
font_scale = max(0.5, min(w, h) / 640.0 * 0.5)
|
52 |
+
thickness = max(1, int(font_scale * 2))
|
53 |
+
|
54 |
+
for det in dets:
|
55 |
+
box = unletterbox(det[:4], scale).astype(np.int32)
|
56 |
+
score = det[-2]
|
57 |
+
cls_id = int(det[-1])
|
58 |
+
|
59 |
+
x0, y0, x1, y1 = box
|
60 |
+
label = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
|
61 |
+
|
62 |
+
cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), thickness)
|
63 |
+
(tw, th), _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
|
64 |
+
cv.rectangle(res_img, (x0, y0), (x0 + tw + 2, y0 + th + 4), (255, 255, 255), -1)
|
65 |
+
cv.putText(res_img, label, (x0, y0 + th), cv.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
|
66 |
+
|
67 |
+
return res_img
|
68 |
+
|
69 |
+
def detect_objects(input_image):
|
70 |
+
bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
|
71 |
+
input_blob, scale = letterbox(cv.cvtColor(bgr, cv.COLOR_BGR2RGB))
|
72 |
+
|
73 |
+
results = model.infer(input_blob)
|
74 |
+
if results is None or len(results) == 0:
|
75 |
+
return input_image
|
76 |
+
|
77 |
+
vis_image = visualize(results, bgr, scale)
|
78 |
+
return cv.cvtColor(vis_image, cv.COLOR_BGR2RGB)
|
79 |
+
|
80 |
+
# Gradio interface
|
81 |
+
demo = gr.Interface(
|
82 |
+
fn=detect_objects,
|
83 |
+
inputs=gr.Image(type="numpy", label="Upload Image"),
|
84 |
+
outputs=gr.Image(type="numpy", label="Detected Objects"),
|
85 |
+
title="YOLOX Object Detection (OpenCV + ONNX)",
|
86 |
+
description="Upload an image to detect objects using YOLOX ONNX model and OpenCV DNN.",
|
87 |
+
allow_flagging="never"
|
88 |
+
)
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
opencv-python
|
2 |
+
gradio
|
3 |
+
numpy
|
4 |
+
huggingface_hub
|
yolox.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
|
4 |
+
class YoloX:
|
5 |
+
def __init__(self, modelPath, confThreshold=0.35, nmsThreshold=0.5, objThreshold=0.5, backendId=0, targetId=0):
|
6 |
+
self.num_classes = 80
|
7 |
+
self.net = cv2.dnn.readNet(modelPath)
|
8 |
+
self.input_size = (640, 640)
|
9 |
+
self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
|
10 |
+
self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
|
11 |
+
self.strides = [8, 16, 32]
|
12 |
+
self.confThreshold = confThreshold
|
13 |
+
self.nmsThreshold = nmsThreshold
|
14 |
+
self.objThreshold = objThreshold
|
15 |
+
self.backendId = backendId
|
16 |
+
self.targetId = targetId
|
17 |
+
self.net.setPreferableBackend(self.backendId)
|
18 |
+
self.net.setPreferableTarget(self.targetId)
|
19 |
+
|
20 |
+
self.generateAnchors()
|
21 |
+
|
22 |
+
@property
|
23 |
+
def name(self):
|
24 |
+
return self.__class__.__name__
|
25 |
+
|
26 |
+
def setBackendAndTarget(self, backendId, targetId):
|
27 |
+
self.backendId = backendId
|
28 |
+
self.targetId = targetId
|
29 |
+
self.net.setPreferableBackend(self.backendId)
|
30 |
+
self.net.setPreferableTarget(self.targetId)
|
31 |
+
|
32 |
+
def preprocess(self, img):
|
33 |
+
blob = np.transpose(img, (2, 0, 1))
|
34 |
+
return blob[np.newaxis, :, :, :]
|
35 |
+
|
36 |
+
def infer(self, srcimg):
|
37 |
+
input_blob = self.preprocess(srcimg)
|
38 |
+
|
39 |
+
self.net.setInput(input_blob)
|
40 |
+
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
|
41 |
+
|
42 |
+
predictions = self.postprocess(outs[0])
|
43 |
+
return predictions
|
44 |
+
|
45 |
+
def postprocess(self, outputs):
|
46 |
+
dets = outputs[0]
|
47 |
+
|
48 |
+
dets[:, :2] = (dets[:, :2] + self.grids) * self.expanded_strides
|
49 |
+
dets[:, 2:4] = np.exp(dets[:, 2:4]) * self.expanded_strides
|
50 |
+
|
51 |
+
# get boxes
|
52 |
+
boxes = dets[:, :4]
|
53 |
+
boxes_xyxy = np.ones_like(boxes)
|
54 |
+
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
|
55 |
+
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
|
56 |
+
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
|
57 |
+
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
|
58 |
+
|
59 |
+
# get scores and class indices
|
60 |
+
scores = dets[:, 4:5] * dets[:, 5:]
|
61 |
+
max_scores = np.amax(scores, axis=1)
|
62 |
+
max_scores_idx = np.argmax(scores, axis=1)
|
63 |
+
|
64 |
+
keep = cv2.dnn.NMSBoxesBatched(boxes_xyxy.tolist(), max_scores.tolist(), max_scores_idx.tolist(), self.confThreshold, self.nmsThreshold)
|
65 |
+
|
66 |
+
candidates = np.concatenate([boxes_xyxy, max_scores[:, None], max_scores_idx[:, None]], axis=1)
|
67 |
+
if len(keep) == 0:
|
68 |
+
return np.array([])
|
69 |
+
return candidates[keep]
|
70 |
+
|
71 |
+
def generateAnchors(self):
|
72 |
+
self.grids = []
|
73 |
+
self.expanded_strides = []
|
74 |
+
hsizes = [self.input_size[0] // stride for stride in self.strides]
|
75 |
+
wsizes = [self.input_size[1] // stride for stride in self.strides]
|
76 |
+
|
77 |
+
for hsize, wsize, stride in zip(hsizes, wsizes, self.strides):
|
78 |
+
xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
|
79 |
+
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
|
80 |
+
self.grids.append(grid)
|
81 |
+
shape = grid.shape[:2]
|
82 |
+
self.expanded_strides.append(np.full((*shape, 1), stride))
|
83 |
+
|
84 |
+
self.grids = np.concatenate(self.grids, 1)
|
85 |
+
self.expanded_strides = np.concatenate(self.expanded_strides, 1)
|