Spaces:

Testys
/

Human_Detector

Runtime error

App Files Files Community

Human_Detector / faster_rcnn.py

Testys

Commiting obj files

59d1adb about 1 year ago

raw history blame contribute delete

No virus

4.99 kB

	# importing modules
	import cv2
	import torch
	from torchvision import transforms
	from torchvision.models import detection
	import numpy as np

	# checks if their is a gpu present, if not uses a cpu
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# mainly consists of the classes present in the coco dataset
	classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
	'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
	'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
	'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
	'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
	'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
	'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
	'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
	'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
	'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
	'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
	'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

	colors = np.random.uniform(0, 255, size=(len(classes), 3)) # assigning a color to each classes of the data


	# calling the Faster RCNN ResNet50 model
	model = detection.fasterrcnn_resnet50_fpn_v2(pretrained=True, progress=True, pretrained_backbone=True).to(device)
	print(model.eval()) # prints out the architecture of the model


	# function to carry out object detection on images.
	def img_detect(img_path):
	image = cv2.imread(img_path) # reads the model using OpenCV
	image = cv2.resize(image, (640, 480))
	orig = image.copy()

	# changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	image = image.transpose((2, 0, 1)) # swapping the color channels from channels last to channels first

	image = np.expand_dims(image, axis=0) # add batch dimension to the image
	image = image / 255.0 # scaling image from (0,255) to (0,1)
	image = torch.FloatTensor(image) # changes the numpy array to a tensor.

	image = image.to(device)
	detections = model(image)[0] # the image is passed to the model to get the bounding boxes

	people = 0
	# loop to construct bounding boxes on image.
	for i in range(0, len(detections["boxes"])):
	confidence = detections["scores"][i] # get confidence score of each object in the image
	idx = int(detections["labels"][i]) # identifying the id of each of the classes in the image
	box = detections["boxes"][i].detach().cpu().numpy() # gets the coordinates for the bounding boxes
	(X_1, Y_1, X_2, Y_2) = box.astype("int")

	if confidence > 0.75 and idx == 1:
	# matching the label index with its classes and its probability
	label = f"{classes[idx]}, {idx}: {confidence* 100}%"
	print(f"[INFO] {label}")
	people += 1
	cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2) # draw bounding boxes over each object
	y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15

	# adds the label text to the image.
	cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
	cv2.putText(orig, f"Number of People: {people}", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)

	return orig


	# function to perform object detection in videos
	def video_detection(video_path):
	video = cv2.VideoCapture(video_path)
	# frame_width = video.get(3)
	# frame_height = video.get(4)

	# out = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height))

	while video.isOpened():
	ret, frame = video.read()
	vid = frame.copy()
	if not ret:
	break
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frame = transforms.functional.to_tensor(frame)
	frame = frame.to(device)
	vid_detect = model([frame])[0]

	for i in range(0, len(vid_detect["boxes"])):
	confidence = vid_detect["scores"][i]

	if confidence > 0.75:
	idx = int(vid_detect["labels"][i])
	box = vid_detect["boxes"][i].detach().cpu().numpy()
	(X_1, Y_1, X_2, Y_2) = box.astype("int")

	label = f"{classes[idx]}, {idx}: {confidence* 100}%"
	print(f"[INFO] {label}")

	cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)
	y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15

	cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)

	return vid