ERA_S12 / app.py
Gosula's picture
Update app.py
689ec47
raw
history blame
No virus
4.27 kB
import torch
from torchvision import transforms
import numpy as np
import gradio as gr
from PIL import Image
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from custom_resnet import *
#from resnet import ResNet18 # Assuming you have a custom ResNet18 implementation
def load_custom_state_dict(model, state_dict):
model_state_dict = model.state_dict()
# Filter out unexpected keys
filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
# Update the model's state_dict
model_state_dict.update(filtered_state_dict)
# Load the updated state_dict to the model
model.load_state_dict(model_state_dict)
model = CustomResNet() # Replace this with your CustomResNet if necessary
# Load the state_dict using the custom function
state_dict = torch.load("model_pth.ckpt", map_location=torch.device('cpu'))
load_custom_state_dict(model, state_dict['state_dict'])
inv_normalize = transforms.Normalize(
mean=[-0.494 / 0.2470, -0.4822 / 0.2435, -0.4465 / 0.2616],
std=[1 / 0.2470, 1 / 0.2435, 1 / 0.2616]
)
classes = ('plane', 'car', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck')
def inference(input_img, transparency=0.5, target_layer_number=-1, num_images=1, num_top_classes=3):
transform = transforms.ToTensor()
org_img = input_img
input_img = transform(input_img)
input_img = input_img.unsqueeze(0)
outputs = model(input_img)
softmax = torch.nn.Softmax(dim=1)
probabilities = softmax(outputs)
confidences = {classes[i]: float(probabilities[0, i]) for i in range(10)}
_, prediction = torch.max(outputs, 1)
# Get GradCAM for the specified target_layer_number
target_layers = [model.layer_2[target_layer_number]]
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)
grayscale_cam = cam(input_tensor=input_img, targets=None)
grayscale_cam = grayscale_cam[0, :]
img = input_img.squeeze(0)
img = inv_normalize(img)
rgb_img = np.transpose(img, (1, 2, 0))
rgb_img = rgb_img.numpy()
# Convert org_img (PIL image) to a NumPy array before performing arithmetic operations
visualization = show_cam_on_image(org_img / 255, grayscale_cam, use_rgb=True, image_weight=transparency)
# Create a list to store multiple visualizations
# # Generate multiple GradCAM visualizations if num_images > 1
# for _ in range(num_images - 1):
# # Get GradCAM for different target_layer_number if provided by the user
# if target_layer_number >= -1:
# target_layers = [model.layer_2[target_layer_number]]
# cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)
# grayscale_cam = cam(input_tensor=input_img, targets=None)
# grayscale_cam = grayscale_cam[0, :]
# visualization = show_cam_on_image(org_img / 255, grayscale_cam, use_rgb=True, image_weight=transparency)
# visualizations.append(visualization)
# Get top classes based on user input (up to a maximum of 10)
top_classes = {k: v for k, v in sorted(confidences.items(), key=lambda item: item[1], reverse=True)[:min(num_top_classes, 10)]}
return top_classes, visualization
title = "CIFAR10 trained on ResNet18 Model with GradCAM"
description = "A simple Gradio interface to infer on ResNet model, and get GradCAM results"
examples = [["car_1.jpg",0.5,-1],["car_2.jpg",0.5,-1],["cat_1.jpg",0.5,-1],["cat_2.jpg",0.5,-1],["dog_1.jpg",0.5,-1],["dog_2.jpg",0.5,-1],["frog_1.jpg",0.5,-1],["frog_2.jpg",0.5,-1],["horse_1.jpg",0.5,-1],["horse_2.jpg",0.5,-1]]
demo = gr.Interface(
inference,
inputs = [gr.Image(shape=(32, 32), label="Input Image"),
gr.Slider(0, 1, value=0.5, label="Opacity of GradCAM"),
gr.Slider(-2, -1, value=-2, step=1, label="Which Layer?"),
gr.Number(default=1, label="Number of GradCAM Images to Show"),
gr.Slider(1, 10, value=3, step=1, label="Number of Top Classes to Show")],
outputs = [gr.Label(num_top_classes=5), gr.Image(shape=(32, 32), label="Output").style(width=128, height=128)],
title = title,
description = description,
examples = examples,
)
demo.launch()