sanjanatule's picture
Update app.py
5f7693c
import gradio as gr
from torchvision import datasets, transforms
import albumentations as Al
from albumentations.pytorch import ToTensorV2
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch.optim.lr_scheduler import OneCycleLR
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger,TensorBoardLogger
from tqdm import tqdm
import torch
import torch.optim as optim
import matplotlib
import cv2
from pytorch_grad_cam import EigenCAM
from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
# my files
import utils
import config
from model import YOLOv3
from utils import (
mean_average_precision,
cells_to_bboxes,
get_evaluation_bboxes,
save_checkpoint,
load_checkpoint,
check_class_accuracy,
plot_couple_examples,
accuracy_fn,
get_loaders
)
from loss import YoloLoss
import litmodelclass
# gradio
model_stats = """
### YoloV3 Model Implementation & Training Details
Github Link: https://github.com/santule/ERA/tree/main/S13
#### Model Performance:
1. **Validation Loss: 6.05**
2. **Class accuracy: 82.4%**
3. **No obj accuracy: 98.05%**
4. **Obj accuracy: 72.3%**
"""
title = "Yolo3 trained on PASCAL_VOC with GradCAM"
description = "Gradio interface to infer on Yolo3 model, and get GradCAM results"
with gr.Blocks() as demo:
gr.Markdown(
"""
# Yolo3 model trained on PASCAL_VOC dataset Demo!
20 Classes supported - aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor
"""
)
# example images
examples = [["example_images/009948.jpg"],["example_images/000041.jpg"],["example_images/000042.jpg"],["example_images/000043.jpg"],["example_images/000044.jpg"],["example_images/000045.jpg"]]
# colors for the bboxes
cmap = plt.get_cmap("tab20b")
class_labels = config.PASCAL_CLASSES
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
colors_hex = {class_labels[i]:matplotlib.colors.rgb2hex(colors[i]) for i in range(0,len(class_labels))}
# consolidate the output from the model for gradcam to work
def yolov3_reshape_transform(x):
activations = []
size = x[0].size()[2:4] # 13 * 13
for x_item in x:
x_permute = x_item.permute(0, 1, 4, 2, 3 ) # 1,3,25,13,13
x_permute = x_permute.reshape((x_permute.shape[0],
x_permute.shape[1]*x_permute.shape[2],
*x_permute.shape[3:])) # 1,75,13,13
activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear'))
activations = torch.cat(activations, axis=1) # 1,255,13,13
return(activations)
# main function of the app
def yolo3_inference(input_img,gradcam=True,gradcam_opa=0.5,user_iou_threshold=0.6,user_threshold=0.5): # function for yolo inference
# load model
yololit = litmodelclass.LitYolo()
inference_model = yololit.load_from_checkpoint("yolo3_improved_model.ckpt")
# bboxes, gradcam
anchors = (torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2))
bboxes = [[]]
sections = [] # to return image and annotations
nms_boxes_output = []
# process the input image for inference/gradcam
# input_img = cv2.resize(input_img, (416, 416))
# input_img_copy = input_img.copy()
# input_img = np.float32(input_img) / 255
# transform = transforms.ToTensor()
# input_img = transform(input_img).unsqueeze(0)
# image transformation
test_transforms = Al.Compose(
[
Al.LongestMaxSize(max_size=416),
Al.PadIfNeeded(
min_height=416, min_width=416, border_mode=cv2.BORDER_CONSTANT
),
Al.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
#ToTensorV2(),
]
)
input_img_copy = test_transforms(image=input_img)['image']
transform = transforms.ToTensor()
input_img_tensor = transform(input_img_copy).unsqueeze(0)
# infer the image
inference_model.eval()
test_img_out = inference_model(input_img_tensor)
# process the outputs to create bounding boxes
for i in range(3):
batch_size, A, S, _, _ = test_img_out[i].shape # 1, anchors = 3, scaling = 13/26/52
anchor = anchors[i]
boxes_scale_i = utils.cells_to_bboxes(test_img_out[i], anchor, S=S, is_preds=True)
for idx, (box) in enumerate(boxes_scale_i):
bboxes[idx] += box
# nms
nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=user_iou_threshold, threshold=user_threshold, box_format="midpoint",)
nms_boxes_output.append(nms_boxes)
# use gradio image annotations
height, width = 416, 416
for box in nms_boxes:
class_pred = box[0]
box = box[2:]
upper_left_x = int((box[0] - box[2] / 2) * width)
upper_left_y = max(int((box[1] - box[3] / 2) * height),0) # less than 0, box collapses
lower_right_x = int(upper_left_x + (box[2] * width))
lower_right_y = int(upper_left_y + (box[3] * height))
sections.append(((upper_left_x,upper_left_y,lower_right_x,lower_right_y), class_labels[int(class_pred)]))
# for gradcam
if gradcam:
objs = [b[1] for b in nms_boxes_output[0]]
bbox_coord = [b[2:] for b in nms_boxes_output[0]]
targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)]
target_layers = [inference_model.model]
cam = EigenCAM(inference_model, target_layers, use_cuda=False,reshape_transform=yolov3_reshape_transform)
grayscale_cam = cam(input_tensor = input_img_tensor, targets= targets)
grayscale_cam = grayscale_cam[0, :]
#renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
#new_bboxes = [a[0] for a in sections]
# for x1, y1, x2, y2 in new_bboxes:
# renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
# renormalized_cam = scale_cam_image(renormalized_cam)
#visualization = show_cam_on_image(input_img_copy, renormalized_cam, use_rgb=False, image_weight=gradcam_opa)
visualization = show_cam_on_image(input_img_copy, grayscale_cam, use_rgb=False, image_weight=gradcam_opa)
return (visualization,sections)
else:
return (np.array(input_img_tensor.squeeze(0).permute(1,2,0)),sections)
# app GUI
with gr.Row():
img_input = gr.Image()
img_output = gr.AnnotatedImage().style(color_map = colors_hex)
with gr.Row():
gradcam_check = gr.Checkbox(label="Gradcam")
gradcam_opa = gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM")
iou_threshold = gr.Slider(0, 1, value = 0.6, label="IOU Threshold")
threshold = gr.Slider(0, 1, value = 0.5, label="Threshold")
section_btn = gr.Button("Identify Objects")
section_btn.click(yolo3_inference, inputs=[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=[img_output])
gr.Markdown("## Some Examples")
gr.Examples(examples=examples,
inputs =[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold],
outputs=img_output,
fn=yolo3_inference, cache_examples=False)
with gr.Row():
with gr.Box():
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown(model_stats)
if __name__ == "__main__":
demo.launch()