import gradio as gr import cv2 import mediapipe as mp import numpy as np # Bounding Box def box_yolo(image, only_people): def get_output_layers(net): layer_names = net.getLayerNames() output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] return output_layers def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h): label = str(classes[class_id]) color = COLORS[class_id] cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2) cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) Width = image.shape[1] Height = image.shape[0] scale = 0.00392 classes = None with open('yolov3.txt', 'r') as f: classes = [line.strip() for line in f.readlines()] COLORS = np.random.uniform(0, 255, size=(len(classes), 3)) net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False) net.setInput(blob) outs = net.forward(get_output_layers(net)) class_ids = [] confidences = [] boxes = [] conf_threshold = 0.5 nms_threshold = 0.4 for out in outs: for detection in out: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: center_x = int(detection[0] * Width) center_y = int(detection[1] * Height) w = int(detection[2] * Width) h = int(detection[3] * Height) x = center_x - w / 2 y = center_y - h / 2 class_ids.append(class_id) confidences.append(float(confidence)) boxes.append([x, y, w, h]) indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold) if only_people: for i in indices: if class_ids[i] == 0: box = boxes[i] x = box[0] y = box[1] w = box[2] h = box[3] draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h)) else: for i in indices: box = boxes[i] x = box[0] y = box[1] w = box[2] h = box[3] draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h)) return image # Pose Estimation def pose_mediapipe(image, segmentation): mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles mp_pose = mp.solutions.pose BG_COLOR = (192, 192, 192) # gray with mp_pose.Pose( static_image_mode=True, model_complexity=2, enable_segmentation=segmentation, min_detection_confidence=0.5) as pose: image_height, image_width, _ = image.shape # Convert the BGR image to RGB before processing. results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) annotated_image = image.copy() # Draw segmentation on the image. if segmentation: condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1 bg_image = np.zeros(image.shape, dtype=np.uint8) bg_image[:] = BG_COLOR annotated_image = np.where(condition, annotated_image, bg_image) # Draw pose landmarks on the image. mp_drawing.draw_landmarks( annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) return annotated_image # Both def both(image_, only_people): def get_output_layers(net): layer_names = net.getLayerNames() output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] return output_layers def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h): label = str(classes[class_id]) color = COLORS[class_id] cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2) cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) image = image_.copy() Width = image.shape[1] Height = image.shape[0] scale = 0.00392 classes = None with open('yolov3.txt', 'r') as f: classes = [line.strip() for line in f.readlines()] COLORS = np.random.uniform(0, 255, size=(len(classes), 3)) net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False) net.setInput(blob) outs = net.forward(get_output_layers(net)) class_ids = [] confidences = [] boxes = [] conf_threshold = 0.5 nms_threshold = 0.4 for out in outs: for detection in out: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: center_x = int(detection[0] * Width) center_y = int(detection[1] * Height) w = int(detection[2] * Width) h = int(detection[3] * Height) x = center_x - w / 2 y = center_y - h / 2 class_ids.append(class_id) confidences.append(float(confidence)) boxes.append([x, y, w, h]) indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold) if only_people: for i in indices: if class_ids[i] == 0: box = boxes[i] x = box[0] y = box[1] w = box[2] h = box[3] draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h)) else: for i in indices: box = boxes[i] x = box[0] y = box[1] w = box[2] h = box[3] draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h)) mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles mp_pose = mp.solutions.pose BG_COLOR = (192, 192, 192) # gray with mp_pose.Pose( static_image_mode=True, model_complexity=2, enable_segmentation=False, min_detection_confidence=0.5) as pose: image_height, image_width, _ = image_.shape # Convert the BGR image to RGB before processing. results = pose.process(cv2.cvtColor(image_, cv2.COLOR_BGR2RGB)) annotated_image = image.copy() # Draw pose landmarks on the image. mp_drawing.draw_landmarks( annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) return annotated_image def model_picker(image, model, segmentation, only_people): if model == 0: result = box_yolo(image, only_people) elif model == 1: result = pose_mediapipe(image, segmentation) elif model == 2: result = both(image, only_people) return result image_in = gr.inputs.Image(label='Input Image') radio_in = gr.Radio(['Bounding Box', 'Pose Estimation', 'Both'], type='index', label='Model Type') checkbox_1 = gr.inputs.Checkbox(label='Enable Segmentation (For Pose Estimation)') checkbox_2 = gr.inputs.Checkbox(label='Bound Only People in a Box') iface = gr.Interface(fn=model_picker, inputs=[image_in, radio_in, checkbox_1, checkbox_2], outputs='image') # app.launch(share=True) iface.launch()