import cv2 import numpy as np import gradio as gr import requests # Configuration files config_file = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt" frozen_model = "frozen_inference_graph.pb" # Load model and set it to use the GPU model = cv2.dnn.DetectionModel(frozen_model, config_file) model.setInputSize(320, 320) model.setInputScale(1.0 / 127.5) model.setInputMean((127.5, 127.5, 127.5)) model.setInputSwapRB(True) model.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) model.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) # Load class labels classLabels = [] with open('labels.txt', 'rt') as f: classLabels = f.read().rstrip('\n').split('\n') def detect_objects(frame): """ Detect objects in a single frame and return their coordinates and names. :param frame: Input image/frame :return: List of detected objects with coordinates and names """ detections = [] # Detect objects in the frame ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55) if len(ClassIndex) != 0: for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox): if ClassInd <= 80: x, y, w, h = boxes detected_object = { "name": classLabels[ClassInd - 1], "coordinates": { "x": int(x), "y": int(y), "width": int(w), "height": int(h) } } detections.append(detected_object) return detections def get_image_from_url(url): response = requests.get(url) image = np.asarray(bytearray(response.content), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) return image def detect_objects_in_image_url(url): frame = get_image_from_url(url) detected_objects = detect_objects(frame) return detected_objects # Define the Gradio interface iface = gr.Interface( fn=detect_objects_in_image_url, inputs="text", outputs=gr.JSON(), title="Object Detection", description="Enter an image URL to detect objects. The detected objects will be returned as JSON." ) # Launch the interface iface.launch()