Spaces:

Zengyf-CVer
/

Gradio_YOLOv5_Det_v3

Runtime error

App Files Files Community

Gradio_YOLOv5_Det_v3 / app.py

Zengyf-CVer

v03 update

65fff86 over 2 years ago

raw

history blame

13.4 kB

	# Gradio YOLOv5 Det v0.3
	# author: Zeng Yifu（曾逸夫）
	# creation time: 2022-05-09
	# email: zyfiy1314@163.com
	# project homepage: https://gitee.com/CV_Lab/gradio_yolov5_det

	# import os

	# os.system("pip install gradio==3.0.1")

	import argparse
	import csv
	import json
	import sys
	from collections import Counter
	from pathlib import Path
	import pandas as pd

	import gradio as gr
	import torch
	import yaml
	from PIL import Image, ImageDraw, ImageFont

	from util.fonts_opt import is_fonts
	from util.pdf_opt import pdf_generate

	ROOT_PATH = sys.path[0] # root directory

	# model path
	model_path = "ultralytics/yolov5"

	# Gradio YOLOv5 Det version
	GYD_VERSION = "Gradio YOLOv5 Det v0.3"

	# model name temporary variable
	model_name_tmp = ""

	# Device temporary variables
	device_tmp = ""

	# File extension
	suffix_list = [".csv", ".yaml"]

	# font size
	FONTSIZE = 25

	# object style
	obj_style = ["Small Object", "Medium Object", "Large Object"]


	def parse_args(known=False):
	parser = argparse.ArgumentParser(description="Gradio YOLOv5 Det v0.3")
	parser.add_argument("--source", "-src", default="upload", type=str, help="input source")
	parser.add_argument("--img_tool", "-it", default="editor", type=str, help="input image tool")
	parser.add_argument("--model_name", "-mn", default="yolov5s", type=str, help="model name")
	parser.add_argument(
	"--model_cfg",
	"-mc",
	default="./model_config/model_name_p5_p6_all.yaml",
	type=str,
	help="model config",
	)
	parser.add_argument(
	"--cls_name",
	"-cls",
	default="./cls_name/cls_name_en.yaml",
	type=str,
	help="cls name",
	)
	parser.add_argument(
	"--nms_conf",
	"-conf",
	default=0.5,
	type=float,
	help="model NMS confidence threshold",
	)
	parser.add_argument("--nms_iou", "-iou", default=0.45, type=float, help="model NMS IoU threshold")
	parser.add_argument(
	"--device",
	"-dev",
	default="cpu",
	type=str,
	help="cuda or cpu",
	)
	parser.add_argument("--inference_size", "-isz", default=640, type=int, help="model inference size")
	parser.add_argument("--max_detnum", "-mdn", default="50", type=str, help="model max det num")

	args = parser.parse_known_args()[0] if known else parser.parse_args()
	return args


	# yaml file parsing
	def yaml_parse(file_path):
	return yaml.safe_load(open(file_path, encoding="utf-8").read())


	# yaml csv file parsing
	def yaml_csv(file_path, file_tag):
	file_suffix = Path(file_path).suffix
	if file_suffix == suffix_list[0]:
	# model name
	file_names = [i[0] for i in list(csv.reader(open(file_path)))] # csv version
	elif file_suffix == suffix_list[1]:
	# model name
	file_names = yaml_parse(file_path).get(file_tag) # yaml version
	else:
	print(f"{file_path} is not in the correct format! Program exits!")
	sys.exit()

	return file_names


	# model loading
	def model_loading(model_name, device):

	# load model
	model = torch.hub.load(
	model_path, model_name, force_reload=True, device=device, _verbose=False
	)

	return model


	# check information
	def export_json(results, model, img_size):

	return [
	[
	{
	"id": i,
	"class": int(result[i][5]),
	# "class_name": model.model.names[int(result[i][5])],
	"class_name": model_cls_name_cp[int(result[i][5])],
	"normalized_box": {
	"x0": round(result[i][:4].tolist()[0], 6),
	"y0": round(result[i][:4].tolist()[1], 6),
	"x1": round(result[i][:4].tolist()[2], 6),
	"y1": round(result[i][:4].tolist()[3], 6),},
	"confidence": round(float(result[i][4]), 2),
	"fps": round(1000 / float(results.t[1]), 2),
	"width": img_size[0],
	"height": img_size[1],} for i in range(len(result))] for result in results.xyxyn]


	# frame conversion
	def pil_draw(img, countdown_msg, textFont, xyxy, font_size, opt):

	img_pil = ImageDraw.Draw(img)

	img_pil.rectangle(xyxy, fill=None, outline="green") # bounding box

	if "label" in opt:
	text_w, text_h = textFont.getsize(countdown_msg) # Label size
	img_pil.rectangle(
	(xyxy[0], xyxy[1], xyxy[0] + text_w, xyxy[1] + text_h),
	fill="green",
	outline="green",
	) # label background
	img_pil.multiline_text(
	(xyxy[0], xyxy[1]),
	countdown_msg,
	fill=(205, 250, 255),
	font=textFont,
	align="center",
	)

	return img


	# YOLOv5 image detection function
	def yolo_det(img, device, model_name, inference_size, conf, iou, max_num, model_cls, opt):

	global model, model_name_tmp, device_tmp

	# object size num
	s_obj, m_obj, l_obj = 0, 0, 0
	# object area list
	area_obj_all = []
	# cls num stat
	cls_det_stat = []

	if model_name_tmp != model_name:
	# Model judgment to avoid repeated loading
	model_name_tmp = model_name
	model = model_loading(model_name_tmp, device)
	elif device_tmp != device:
	device_tmp = device
	model = model_loading(model_name_tmp, device)

	# -------------Model tuning -------------
	model.conf = conf # NMS confidence threshold
	model.iou = iou # NMS IoU threshold
	model.max_det = int(max_num) # Maximum number of detection frames
	model.classes = model_cls # model classes

	img_size = img.size # frame size

	results = model(img, size=inference_size) # detection

	# Data Frame
	dataframe = results.pandas().xyxy[0].round(2)

	# ----------------Load fonts----------------
	yaml_index = cls_name.index(".yaml")
	cls_name_lang = cls_name[yaml_index - 2:yaml_index]

	if cls_name_lang == "zh":
	# Chinese
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/SimSun.ttf"), size=FONTSIZE)
	elif cls_name_lang in ["en", "ru", "es", "ar"]:
	# English, Russian, Spanish, Arabic
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/TimesNewRoman.ttf"), size=FONTSIZE)
	elif cls_name_lang == "ko":
	# Korean
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/malgun.ttf"), size=FONTSIZE)

	for result in results.xyxyn:
	for i in range(len(result)):
	id = int(i) # instance ID
	obj_cls_index = int(result[i][5]) # category index
	obj_cls = model_cls_name_cp[obj_cls_index] # category
	cls_det_stat.append(obj_cls)

	# ------------ border coordinates ------------
	x0 = float(result[i][:4].tolist()[0])
	y0 = float(result[i][:4].tolist()[1])
	x1 = float(result[i][:4].tolist()[2])
	y1 = float(result[i][:4].tolist()[3])

	# ------------ Actual coordinates of the border ------------
	x0 = int(img_size[0] * x0)
	y0 = int(img_size[1] * y0)
	x1 = int(img_size[0] * x1)
	y1 = int(img_size[1] * y1)

	conf = float(result[i][4]) # confidence
	# fps = f"{(1000 / float(results.t[1])):.2f}" # FPS

	det_img = pil_draw(
	img,
	f"{id}-{obj_cls}:{conf:.2f}",
	textFont,
	[x0, y0, x1, y1],
	FONTSIZE,
	opt,
	)

	# ----------add object size----------
	w_obj = x1 - x0
	h_obj = y1 - y0
	area_obj = w_obj * h_obj
	area_obj_all.append(area_obj)

	det_json = export_json(results, model, img.size)[0] # Detection information
	det_json_format = json.dumps(det_json, sort_keys=False, indent=4, separators=(",", ":"), ensure_ascii=False) # JSON formatting
	if "json" not in opt:
	det_json = None

	# -------pdf-------
	report = "./Det_Report.pdf"
	if "pdf" in opt:
	pdf_generate(f"{det_json_format}", report, GYD_VERSION)
	else:
	report = None

	# --------------object size compute--------------
	for i in range(len(area_obj_all)):
	if (0 < area_obj_all[i] <= 32 ** 2):
	s_obj = s_obj + 1
	elif (32 2 < area_obj_all[i] <= 96 2):
	m_obj = m_obj + 1
	elif (area_obj_all[i] > 96 ** 2):
	l_obj = l_obj + 1

	sml_obj_total = s_obj + m_obj + l_obj

	objSize_dict = {obj_style[i]: [s_obj, m_obj, l_obj][i] / sml_obj_total for i in range(3)}

	# ------------cls stat------------
	clsRatio_dict = {}
	clsDet_dict = Counter(cls_det_stat)
	clsDet_dict_sum = sum(clsDet_dict.values())

	for k, v in clsDet_dict.items():
	clsRatio_dict[k] = v / clsDet_dict_sum


	return det_img, objSize_dict, clsRatio_dict, det_json, report, dataframe


	def main(args):
	gr.close_all()

	global model, model_cls_name_cp, cls_name

	slider_step = 0.05 # sliding step

	source = args.source
	img_tool = args.img_tool
	nms_conf = args.nms_conf
	nms_iou = args.nms_iou
	model_name = args.model_name
	model_cfg = args.model_cfg
	cls_name = args.cls_name
	device = args.device
	inference_size = args.inference_size
	max_detnum = args.max_detnum

	is_fonts(f"{ROOT_PATH}/fonts") # Check font files

	# model loading
	model = model_loading(model_name, device)

	model_names = yaml_csv(model_cfg, "model_names") # model names
	model_cls_name = yaml_csv(cls_name, "model_cls_name") # class name

	model_cls_name_cp = model_cls_name.copy() # class name

	# ------------------- Input Components -------------------
	inputs_img = gr.Image(image_mode="RGB", source=source, tool=img_tool, type="pil", label="original image")
	inputs_device = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
	inputs_model = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
	inputs_size = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
	input_conf = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
	inputs_iou = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
	inputs_maxnum = gr.Textbox(lines=1, placeholder="Maximum number of detections", value=max_detnum, label="Maximum number of detections")
	inputs_clsName = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
	inputs_opt = gr.CheckboxGroup(choices=["label", "pdf", "json"],
	value=["label", "pdf"],
	type="value",
	label="operate")

	# Input parameters
	inputs = [
	inputs_img, # input image
	inputs_device, # device
	inputs_model, # model
	inputs_size, # inference size
	input_conf, # confidence threshold
	inputs_iou, # IoU threshold
	inputs_maxnum, # maximum number of detections
	inputs_clsName, # category
	inputs_opt, # detect operations
	]

	# Output parameters
	outputs_img = gr.Image(type="pil", label="Detection image")
	outputs_json = gr.JSON(label="Detection information")
	outputs_pdf = gr.File(label="Download test report")
	outputs_df = gr.Dataframe(max_rows=5, overflow_row_behaviour="paginate", type="pandas", label="List of detection information")
	outputs_objSize = gr.Label(label="Object size ratio statistics")
	outputs_clsSize = gr.Label(label="Category detection proportion statistics")

	outputs = [outputs_img, outputs_objSize, outputs_clsSize, outputs_json, outputs_pdf, outputs_df]

	# title
	title = "Gradio YOLOv5 Det v0.3"

	# describe
	description = "<div align='center'>Customizable target detection model, easy to install, easy to use</div>"

	# example image
	examples = [
	[
	"./img_example/bus.jpg",
	"cpu",
	"yolov5s",
	640,
	0.6,
	0.5,
	10,
	["person", "bus"],
	["label", "pdf"],],
	[
	"./img_example/giraffe.jpg",
	"cpu",
	"yolov5l",
	320,
	0.5,
	0.45,
	12,
	["giraffe"],
	["label", "pdf"],],
	[
	"./img_example/zidane.jpg",
	"cpu",
	"yolov5m",
	640,
	0.25,
	0.5,
	15,
	["person", "tie"],
	["pdf", "json"],],
	[
	"./img_example/Millenial-at-work.jpg",
	"cpu",
	"yolov5s6",
	1280,
	0.5,
	0.5,
	20,
	["person", "chair", "cup", "laptop"],
	["label", "pdf"],],]

	# interface
	gr.Interface(
	fn=yolo_det,
	inputs=inputs,
	outputs=outputs,
	title=title,
	description=description,
	article="",
	# examples=examples,
	# theme="seafoam",
	# flagging_dir="run", # output directory
	).launch(
	inbrowser=True, # Automatically open default browser
	show_tips=True, # Automatically display the latest features of gradio
	)


	if __name__ == "__main__":
	args = parse_args()
	main(args)