Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from torchvision import datasets, transforms | |
| import albumentations as Al | |
| from albumentations.pytorch import ToTensorV2 | |
| from PIL import Image | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| from torch.optim.lr_scheduler import OneCycleLR | |
| from pytorch_lightning import LightningModule, Trainer, seed_everything | |
| from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint | |
| from pytorch_lightning.callbacks.progress import TQDMProgressBar | |
| from pytorch_lightning.loggers import CSVLogger,TensorBoardLogger | |
| from tqdm import tqdm | |
| import torch | |
| import torch.optim as optim | |
| import matplotlib | |
| import cv2 | |
| from pytorch_grad_cam import EigenCAM | |
| from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image | |
| # my files | |
| import utils | |
| import config | |
| from model import YOLOv3 | |
| from utils import ( | |
| mean_average_precision, | |
| cells_to_bboxes, | |
| get_evaluation_bboxes, | |
| save_checkpoint, | |
| load_checkpoint, | |
| check_class_accuracy, | |
| plot_couple_examples, | |
| accuracy_fn, | |
| get_loaders | |
| ) | |
| from loss import YoloLoss | |
| import litmodelclass | |
| # gradio | |
| model_stats = """ | |
| ### YoloV3 Model Implementation & Training Details | |
| Github Link: https://github.com/santule/ERA/tree/main/S13 | |
| #### Model Performance: | |
| 1. **Validation Loss: 6.05** | |
| 2. **Class accuracy: 82.4%** | |
| 3. **No obj accuracy: 98.05%** | |
| 4. **Obj accuracy: 72.3%** | |
| """ | |
| title = "Yolo3 trained on PASCAL_VOC with GradCAM" | |
| description = "Gradio interface to infer on Yolo3 model, and get GradCAM results" | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Yolo3 model trained on PASCAL_VOC dataset Demo! | |
| 20 Classes supported - aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor | |
| """ | |
| ) | |
| # example images | |
| examples = [["example_images/009948.jpg"],["example_images/000041.jpg"],["example_images/000042.jpg"],["example_images/000043.jpg"],["example_images/000044.jpg"],["example_images/000045.jpg"]] | |
| # colors for the bboxes | |
| cmap = plt.get_cmap("tab20b") | |
| class_labels = config.PASCAL_CLASSES | |
| colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] | |
| colors_hex = {class_labels[i]:matplotlib.colors.rgb2hex(colors[i]) for i in range(0,len(class_labels))} | |
| # consolidate the output from the model for gradcam to work | |
| def yolov3_reshape_transform(x): | |
| activations = [] | |
| size = x[0].size()[2:4] # 13 * 13 | |
| for x_item in x: | |
| x_permute = x_item.permute(0, 1, 4, 2, 3 ) # 1,3,25,13,13 | |
| x_permute = x_permute.reshape((x_permute.shape[0], | |
| x_permute.shape[1]*x_permute.shape[2], | |
| *x_permute.shape[3:])) # 1,75,13,13 | |
| activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear')) | |
| activations = torch.cat(activations, axis=1) # 1,255,13,13 | |
| return(activations) | |
| # main function of the app | |
| def yolo3_inference(input_img,gradcam=True,gradcam_opa=0.5,user_iou_threshold=0.6,user_threshold=0.5): # function for yolo inference | |
| # load model | |
| yololit = litmodelclass.LitYolo() | |
| inference_model = yololit.load_from_checkpoint("yolo3_improved_model.ckpt") | |
| # bboxes, gradcam | |
| anchors = (torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)) | |
| bboxes = [[]] | |
| sections = [] # to return image and annotations | |
| nms_boxes_output = [] | |
| # process the input image for inference/gradcam | |
| # input_img = cv2.resize(input_img, (416, 416)) | |
| # input_img_copy = input_img.copy() | |
| # input_img = np.float32(input_img) / 255 | |
| # transform = transforms.ToTensor() | |
| # input_img = transform(input_img).unsqueeze(0) | |
| # image transformation | |
| test_transforms = Al.Compose( | |
| [ | |
| Al.LongestMaxSize(max_size=416), | |
| Al.PadIfNeeded( | |
| min_height=416, min_width=416, border_mode=cv2.BORDER_CONSTANT | |
| ), | |
| Al.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,), | |
| #ToTensorV2(), | |
| ] | |
| ) | |
| input_img_copy = test_transforms(image=input_img)['image'] | |
| transform = transforms.ToTensor() | |
| input_img_tensor = transform(input_img_copy).unsqueeze(0) | |
| # infer the image | |
| inference_model.eval() | |
| test_img_out = inference_model(input_img_tensor) | |
| # process the outputs to create bounding boxes | |
| for i in range(3): | |
| batch_size, A, S, _, _ = test_img_out[i].shape # 1, anchors = 3, scaling = 13/26/52 | |
| anchor = anchors[i] | |
| boxes_scale_i = utils.cells_to_bboxes(test_img_out[i], anchor, S=S, is_preds=True) | |
| for idx, (box) in enumerate(boxes_scale_i): | |
| bboxes[idx] += box | |
| # nms | |
| nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=user_iou_threshold, threshold=user_threshold, box_format="midpoint",) | |
| nms_boxes_output.append(nms_boxes) | |
| # use gradio image annotations | |
| height, width = 416, 416 | |
| for box in nms_boxes: | |
| class_pred = box[0] | |
| box = box[2:] | |
| upper_left_x = int((box[0] - box[2] / 2) * width) | |
| upper_left_y = max(int((box[1] - box[3] / 2) * height),0) # less than 0, box collapses | |
| lower_right_x = int(upper_left_x + (box[2] * width)) | |
| lower_right_y = int(upper_left_y + (box[3] * height)) | |
| sections.append(((upper_left_x,upper_left_y,lower_right_x,lower_right_y), class_labels[int(class_pred)])) | |
| # for gradcam | |
| if gradcam: | |
| objs = [b[1] for b in nms_boxes_output[0]] | |
| bbox_coord = [b[2:] for b in nms_boxes_output[0]] | |
| targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)] | |
| target_layers = [inference_model.model] | |
| cam = EigenCAM(inference_model, target_layers, use_cuda=False,reshape_transform=yolov3_reshape_transform) | |
| grayscale_cam = cam(input_tensor = input_img_tensor, targets= targets) | |
| grayscale_cam = grayscale_cam[0, :] | |
| #renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32) | |
| #new_bboxes = [a[0] for a in sections] | |
| # for x1, y1, x2, y2 in new_bboxes: | |
| # renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy()) | |
| # renormalized_cam = scale_cam_image(renormalized_cam) | |
| #visualization = show_cam_on_image(input_img_copy, renormalized_cam, use_rgb=False, image_weight=gradcam_opa) | |
| visualization = show_cam_on_image(input_img_copy, grayscale_cam, use_rgb=False, image_weight=gradcam_opa) | |
| return (visualization,sections) | |
| else: | |
| return (np.array(input_img_tensor.squeeze(0).permute(1,2,0)),sections) | |
| # app GUI | |
| with gr.Row(): | |
| img_input = gr.Image() | |
| img_output = gr.AnnotatedImage().style(color_map = colors_hex) | |
| with gr.Row(): | |
| gradcam_check = gr.Checkbox(label="Gradcam") | |
| gradcam_opa = gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM") | |
| iou_threshold = gr.Slider(0, 1, value = 0.6, label="IOU Threshold") | |
| threshold = gr.Slider(0, 1, value = 0.5, label="Threshold") | |
| section_btn = gr.Button("Identify Objects") | |
| section_btn.click(yolo3_inference, inputs=[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=[img_output]) | |
| gr.Markdown("## Some Examples") | |
| gr.Examples(examples=examples, | |
| inputs =[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], | |
| outputs=img_output, | |
| fn=yolo3_inference, cache_examples=False) | |
| with gr.Row(): | |
| with gr.Box(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Box(): | |
| gr.Markdown(model_stats) | |
| if __name__ == "__main__": | |
| demo.launch() |