Spaces:

oussamamatar
/

yolo-mediapipe

Runtime error

App Files Files Community

yolo-mediapipe / app.py

oussamamatar

Update app.py

dd5ddba about 2 years ago

raw

history blame contribute delete

7.79 kB

	import gradio as gr
	import cv2
	import mediapipe as mp
	import numpy as np

	# Bounding Box
	def box_yolo(image, only_people):
	def get_output_layers(net):
	layer_names = net.getLayerNames()
	output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
	return output_layers

	def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
	label = str(classes[class_id])
	color = COLORS[class_id]
	cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
	cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	Width = image.shape[1]
	Height = image.shape[0]
	scale = 0.00392
	classes = None

	with open('yolov3.txt', 'r') as f:
	classes = [line.strip() for line in f.readlines()]

	COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
	net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
	blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
	net.setInput(blob)
	outs = net.forward(get_output_layers(net))
	class_ids = []
	confidences = []
	boxes = []
	conf_threshold = 0.5
	nms_threshold = 0.4

	for out in outs:
	for detection in out:
	scores = detection[5:]
	class_id = np.argmax(scores)
	confidence = scores[class_id]
	if confidence > 0.5:
	center_x = int(detection[0] * Width)
	center_y = int(detection[1] * Height)
	w = int(detection[2] * Width)
	h = int(detection[3] * Height)
	x = center_x - w / 2
	y = center_y - h / 2
	class_ids.append(class_id)
	confidences.append(float(confidence))
	boxes.append([x, y, w, h])

	indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
	if only_people:
	for i in indices:
	if class_ids[i] == 0:
	box = boxes[i]
	x = box[0]
	y = box[1]
	w = box[2]
	h = box[3]
	draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
	else:
	for i in indices:
	box = boxes[i]
	x = box[0]
	y = box[1]
	w = box[2]
	h = box[3]
	draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))

	return image

	# Pose Estimation
	def pose_mediapipe(image, segmentation):
	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles
	mp_pose = mp.solutions.pose
	BG_COLOR = (192, 192, 192) # gray
	with mp_pose.Pose(
	static_image_mode=True,
	model_complexity=2,
	enable_segmentation=segmentation,
	min_detection_confidence=0.5) as pose:

	image_height, image_width, _ = image.shape
	# Convert the BGR image to RGB before processing.
	results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	annotated_image = image.copy()
	# Draw segmentation on the image.
	if segmentation:
	condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
	bg_image = np.zeros(image.shape, dtype=np.uint8)
	bg_image[:] = BG_COLOR
	annotated_image = np.where(condition, annotated_image, bg_image)
	# Draw pose landmarks on the image.
	mp_drawing.draw_landmarks(
	annotated_image,
	results.pose_landmarks,
	mp_pose.POSE_CONNECTIONS,
	landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
	return annotated_image

	# Both
	def both(image_, only_people):
	def get_output_layers(net):
	layer_names = net.getLayerNames()
	output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
	return output_layers

	def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
	label = str(classes[class_id])
	color = COLORS[class_id]
	cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
	cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	image = image_.copy()

	Width = image.shape[1]
	Height = image.shape[0]
	scale = 0.00392
	classes = None

	with open('yolov3.txt', 'r') as f:
	classes = [line.strip() for line in f.readlines()]

	COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

	net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
	blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
	net.setInput(blob)
	outs = net.forward(get_output_layers(net))
	class_ids = []
	confidences = []
	boxes = []
	conf_threshold = 0.5
	nms_threshold = 0.4

	for out in outs:
	for detection in out:
	scores = detection[5:]
	class_id = np.argmax(scores)
	confidence = scores[class_id]
	if confidence > 0.5:
	center_x = int(detection[0] * Width)
	center_y = int(detection[1] * Height)
	w = int(detection[2] * Width)
	h = int(detection[3] * Height)
	x = center_x - w / 2
	y = center_y - h / 2
	class_ids.append(class_id)
	confidences.append(float(confidence))
	boxes.append([x, y, w, h])

	indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

	if only_people:
	for i in indices:
	if class_ids[i] == 0:
	box = boxes[i]
	x = box[0]
	y = box[1]
	w = box[2]
	h = box[3]
	draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
	else:
	for i in indices:
	box = boxes[i]
	x = box[0]
	y = box[1]
	w = box[2]
	h = box[3]
	draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))

	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles
	mp_pose = mp.solutions.pose
	BG_COLOR = (192, 192, 192) # gray
	with mp_pose.Pose(
	static_image_mode=True,
	model_complexity=2,
	enable_segmentation=False,
	min_detection_confidence=0.5) as pose:

	image_height, image_width, _ = image_.shape
	# Convert the BGR image to RGB before processing.
	results = pose.process(cv2.cvtColor(image_, cv2.COLOR_BGR2RGB))
	annotated_image = image.copy()

	# Draw pose landmarks on the image.
	mp_drawing.draw_landmarks(
	annotated_image,
	results.pose_landmarks,
	mp_pose.POSE_CONNECTIONS,
	landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())

	return annotated_image

	def model_picker(image, model, segmentation, only_people):
	if model == 0:
	result = box_yolo(image, only_people)
	elif model == 1:
	result = pose_mediapipe(image, segmentation)
	elif model == 2:
	result = both(image, only_people)
	return result

	image_in = gr.inputs.Image(label='Input Image')
	radio_in = gr.Radio(['Bounding Box', 'Pose Estimation', 'Both'], type='index', label='Model Type')
	checkbox_1 = gr.inputs.Checkbox(label='Enable Segmentation (For Pose Estimation)')
	checkbox_2 = gr.inputs.Checkbox(label='Bound Only People in a Box')
	iface = gr.Interface(fn=model_picker, inputs=[image_in, radio_in, checkbox_1, checkbox_2], outputs='image')

	# app.launch(share=True)
	iface.launch()