Spaces:

sanjanatule
/

ERA_Assignment13

Sleeping

App Files Files Community

ERA_Assignment13 / app.py

sanjanatule

Update app.py

5f7693c over 2 years ago

raw

history blame contribute delete

8.12 kB


	import gradio as gr
	from torchvision import datasets, transforms
	import albumentations as Al
	from albumentations.pytorch import ToTensorV2
	from PIL import Image
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	from torch.optim.lr_scheduler import OneCycleLR
	from pytorch_lightning import LightningModule, Trainer, seed_everything
	from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
	from pytorch_lightning.callbacks.progress import TQDMProgressBar
	from pytorch_lightning.loggers import CSVLogger,TensorBoardLogger
	from tqdm import tqdm
	import torch
	import torch.optim as optim
	import matplotlib
	import cv2
	from pytorch_grad_cam import EigenCAM
	from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget
	from pytorch_grad_cam.utils.image import show_cam_on_image
	from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image


	# my files
	import utils
	import config
	from model import YOLOv3
	from utils import (
	mean_average_precision,
	cells_to_bboxes,
	get_evaluation_bboxes,
	save_checkpoint,
	load_checkpoint,
	check_class_accuracy,
	plot_couple_examples,
	accuracy_fn,
	get_loaders
	)
	from loss import YoloLoss
	import litmodelclass


	# gradio

	model_stats = """
	### YoloV3 Model Implementation & Training Details
	Github Link: https://github.com/santule/ERA/tree/main/S13
	#### Model Performance:
	1. Validation Loss: 6.05
	2. Class accuracy: 82.4%
	3. No obj accuracy: 98.05%
	4. Obj accuracy: 72.3%
	"""

	title = "Yolo3 trained on PASCAL_VOC with GradCAM"
	description = "Gradio interface to infer on Yolo3 model, and get GradCAM results"

	with gr.Blocks() as demo:

	gr.Markdown(
	"""
	# Yolo3 model trained on PASCAL_VOC dataset Demo!
	20 Classes supported - aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor
	"""
	)


	# example images
	examples = [["example_images/009948.jpg"],["example_images/000041.jpg"],["example_images/000042.jpg"],["example_images/000043.jpg"],["example_images/000044.jpg"],["example_images/000045.jpg"]]

	# colors for the bboxes
	cmap = plt.get_cmap("tab20b")
	class_labels = config.PASCAL_CLASSES
	colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
	colors_hex = {class_labels[i]:matplotlib.colors.rgb2hex(colors[i]) for i in range(0,len(class_labels))}

	# consolidate the output from the model for gradcam to work
	def yolov3_reshape_transform(x):
	activations = []
	size = x[0].size()[2:4] # 13 * 13
	for x_item in x:
	x_permute = x_item.permute(0, 1, 4, 2, 3 ) # 1,3,25,13,13
	x_permute = x_permute.reshape((x_permute.shape[0],
	x_permute.shape[1]*x_permute.shape[2],
	*x_permute.shape[3:])) # 1,75,13,13
	activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear'))
	activations = torch.cat(activations, axis=1) # 1,255,13,13
	return(activations)


	# main function of the app
	def yolo3_inference(input_img,gradcam=True,gradcam_opa=0.5,user_iou_threshold=0.6,user_threshold=0.5): # function for yolo inference

	# load model
	yololit = litmodelclass.LitYolo()
	inference_model = yololit.load_from_checkpoint("yolo3_improved_model.ckpt")

	# bboxes, gradcam
	anchors = (torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2))
	bboxes = [[]]
	sections = [] # to return image and annotations
	nms_boxes_output = []

	# process the input image for inference/gradcam
	# input_img = cv2.resize(input_img, (416, 416))
	# input_img_copy = input_img.copy()
	# input_img = np.float32(input_img) / 255
	# transform = transforms.ToTensor()
	# input_img = transform(input_img).unsqueeze(0)

	# image transformation
	test_transforms = Al.Compose(
	[
	Al.LongestMaxSize(max_size=416),
	Al.PadIfNeeded(
	min_height=416, min_width=416, border_mode=cv2.BORDER_CONSTANT
	),
	Al.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
	#ToTensorV2(),
	]
	)

	input_img_copy = test_transforms(image=input_img)['image']
	transform = transforms.ToTensor()
	input_img_tensor = transform(input_img_copy).unsqueeze(0)

	# infer the image
	inference_model.eval()
	test_img_out = inference_model(input_img_tensor)

	# process the outputs to create bounding boxes
	for i in range(3):
	batch_size, A, S, _, _ = test_img_out[i].shape # 1, anchors = 3, scaling = 13/26/52
	anchor = anchors[i]
	boxes_scale_i = utils.cells_to_bboxes(test_img_out[i], anchor, S=S, is_preds=True)
	for idx, (box) in enumerate(boxes_scale_i):
	bboxes[idx] += box
	# nms
	nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=user_iou_threshold, threshold=user_threshold, box_format="midpoint",)
	nms_boxes_output.append(nms_boxes)

	# use gradio image annotations
	height, width = 416, 416
	for box in nms_boxes:
	class_pred = box[0]
	box = box[2:]
	upper_left_x = int((box[0] - box[2] / 2) * width)
	upper_left_y = max(int((box[1] - box[3] / 2) * height),0) # less than 0, box collapses
	lower_right_x = int(upper_left_x + (box[2] * width))
	lower_right_y = int(upper_left_y + (box[3] * height))
	sections.append(((upper_left_x,upper_left_y,lower_right_x,lower_right_y), class_labels[int(class_pred)]))

	# for gradcam
	if gradcam:
	objs = [b[1] for b in nms_boxes_output[0]]
	bbox_coord = [b[2:] for b in nms_boxes_output[0]]
	targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)]

	target_layers = [inference_model.model]
	cam = EigenCAM(inference_model, target_layers, use_cuda=False,reshape_transform=yolov3_reshape_transform)
	grayscale_cam = cam(input_tensor = input_img_tensor, targets= targets)
	grayscale_cam = grayscale_cam[0, :]

	#renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
	#new_bboxes = [a[0] for a in sections]

	# for x1, y1, x2, y2 in new_bboxes:
	# renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())

	# renormalized_cam = scale_cam_image(renormalized_cam)
	#visualization = show_cam_on_image(input_img_copy, renormalized_cam, use_rgb=False, image_weight=gradcam_opa)
	visualization = show_cam_on_image(input_img_copy, grayscale_cam, use_rgb=False, image_weight=gradcam_opa)

	return (visualization,sections)
	else:
	return (np.array(input_img_tensor.squeeze(0).permute(1,2,0)),sections)

	# app GUI
	with gr.Row():
	img_input = gr.Image()
	img_output = gr.AnnotatedImage().style(color_map = colors_hex)
	with gr.Row():
	gradcam_check = gr.Checkbox(label="Gradcam")
	gradcam_opa = gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM")
	iou_threshold = gr.Slider(0, 1, value = 0.6, label="IOU Threshold")
	threshold = gr.Slider(0, 1, value = 0.5, label="Threshold")

	section_btn = gr.Button("Identify Objects")
	section_btn.click(yolo3_inference, inputs=[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=[img_output])

	gr.Markdown("## Some Examples")
	gr.Examples(examples=examples,
	inputs =[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold],
	outputs=img_output,
	fn=yolo3_inference, cache_examples=False)


	with gr.Row():
	with gr.Box():
	with gr.Row():
	with gr.Column():
	with gr.Box():
	gr.Markdown(model_stats)

	if __name__ == "__main__":
	demo.launch()