Spaces:

opencv
/

object_detection_yolox

Sleeping

App Files Files Community

Abhishek Gola commited on Jun 23

Commit

c6bbca4

1 Parent(s): 9708ebf

Added object detection yolox to opencv spaces

Browse files

Files changed (4) hide show

README.md +6 -0
app.py +91 -0
requirements.txt +4 -0
yolox.py +85 -0

README.md CHANGED Viewed

@@ -7,6 +7,12 @@ sdk: gradio
 sdk_version: 5.34.2
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 5.34.2
 app_file: app.py
 pinned: false
+short_description: Object detection with yolox using OpenCV
+tags:
+  - opencv
+  - object-detection
+  - yolo
+  - yolox
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import cv2 as cv
+import numpy as np
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from yolox import YoloX
+# Download YOLOX model from Hugging Face (optional fallback)
+model_path = hf_hub_download(
+    repo_id="opencv/object_detection_yolox",
+    filename="object_detection_yolox_2022nov.onnx"
+)
+# Initialize YOLOX model
+model = YoloX(
+    modelPath=model_path,
+    confThreshold=0.5,
+    nmsThreshold=0.5,
+    objThreshold=0.5,
+    backendId=cv.dnn.DNN_BACKEND_OPENCV,
+    targetId=cv.dnn.DNN_TARGET_CPU
+)
+classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+           'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+           'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+           'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+           'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
+           'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+           'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
+           'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+           'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+           'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
+def letterbox(srcimg, target_size=(640, 640)):
+    padded_img = np.ones((target_size[0], target_size[1], 3), dtype=np.float32) * 114.0
+    ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
+    resized_img = cv.resize(srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR).astype(np.float32)
+    padded_img[:int(srcimg.shape[0] * ratio), :int(srcimg.shape[1] * ratio)] = resized_img
+    return padded_img, ratio
+def unletterbox(bbox, scale):
+    return bbox / scale
+def visualize(dets, image, scale):
+    res_img = image.copy()
+    h, w = res_img.shape[:2]
+    font_scale = max(0.5, min(w, h) / 640.0 * 0.5)
+    thickness = max(1, int(font_scale * 2))
+    for det in dets:
+        box = unletterbox(det[:4], scale).astype(np.int32)
+        score = det[-2]
+        cls_id = int(det[-1])
+        x0, y0, x1, y1 = box
+        label = '{}:{:.1f}%'.format(classes[cls_id], score * 100)
+        cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), thickness)
+        (tw, th), _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
+        cv.rectangle(res_img, (x0, y0), (x0 + tw + 2, y0 + th + 4), (255, 255, 255), -1)
+        cv.putText(res_img, label, (x0, y0 + th), cv.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)
+    return res_img
+def detect_objects(input_image):
+    bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
+    input_blob, scale = letterbox(cv.cvtColor(bgr, cv.COLOR_BGR2RGB))
+    results = model.infer(input_blob)
+    if results is None or len(results) == 0:
+        return input_image
+    vis_image = visualize(results, bgr, scale)
+    return cv.cvtColor(vis_image, cv.COLOR_BGR2RGB)
+# Gradio interface
+demo = gr.Interface(
+    fn=detect_objects,
+    inputs=gr.Image(type="numpy", label="Upload Image"),
+    outputs=gr.Image(type="numpy", label="Detected Objects"),
+    title="YOLOX Object Detection (OpenCV + ONNX)",
+    description="Upload an image to detect objects using YOLOX ONNX model and OpenCV DNN.",
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+opencv-python
+gradio
+numpy
+huggingface_hub

yolox.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+import cv2
+class YoloX:
+    def __init__(self, modelPath, confThreshold=0.35, nmsThreshold=0.5, objThreshold=0.5, backendId=0, targetId=0):
+        self.num_classes = 80
+        self.net = cv2.dnn.readNet(modelPath)
+        self.input_size = (640, 640)
+        self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
+        self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
+        self.strides = [8, 16, 32]
+        self.confThreshold = confThreshold
+        self.nmsThreshold = nmsThreshold
+        self.objThreshold = objThreshold
+        self.backendId = backendId
+        self.targetId = targetId
+        self.net.setPreferableBackend(self.backendId)
+        self.net.setPreferableTarget(self.targetId)
+        self.generateAnchors()
+    @property
+    def name(self):
+        return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self.backendId = backendId
+        self.targetId = targetId
+        self.net.setPreferableBackend(self.backendId)
+        self.net.setPreferableTarget(self.targetId)
+    def preprocess(self, img):
+        blob = np.transpose(img, (2, 0, 1))
+        return blob[np.newaxis, :, :, :]
+    def infer(self, srcimg):
+        input_blob = self.preprocess(srcimg)
+        self.net.setInput(input_blob)
+        outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+        predictions = self.postprocess(outs[0])
+        return predictions
+    def postprocess(self, outputs):
+        dets = outputs[0]
+        dets[:, :2] = (dets[:, :2] + self.grids) * self.expanded_strides
+        dets[:, 2:4] = np.exp(dets[:, 2:4]) * self.expanded_strides
+        # get boxes
+        boxes = dets[:, :4]
+        boxes_xyxy = np.ones_like(boxes)
+        boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
+        boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
+        boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
+        boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
+        # get scores and class indices
+        scores = dets[:, 4:5] * dets[:, 5:]
+        max_scores = np.amax(scores, axis=1)
+        max_scores_idx = np.argmax(scores, axis=1)
+        keep = cv2.dnn.NMSBoxesBatched(boxes_xyxy.tolist(), max_scores.tolist(), max_scores_idx.tolist(), self.confThreshold, self.nmsThreshold)
+        candidates = np.concatenate([boxes_xyxy, max_scores[:, None], max_scores_idx[:, None]], axis=1)
+        if len(keep) == 0:
+            return np.array([])
+        return candidates[keep]
+    def generateAnchors(self):
+        self.grids = []
+        self.expanded_strides = []
+        hsizes = [self.input_size[0] // stride for stride in self.strides]
+        wsizes = [self.input_size[1] // stride for stride in self.strides]
+        for hsize, wsize, stride in zip(hsizes, wsizes, self.strides):
+            xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
+            grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+            self.grids.append(grid)
+            shape = grid.shape[:2]
+            self.expanded_strides.append(np.full((*shape, 1), stride))
+        self.grids = np.concatenate(self.grids, 1)
+        self.expanded_strides = np.concatenate(self.expanded_strides, 1)