Spaces:
Sleeping
Sleeping
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch | |
from torchvision import transforms | |
import cv2 | |
import numpy as np | |
from pytorch_grad_cam import GradCAM | |
from pytorch_grad_cam import GradCAM | |
from pytorch_grad_cam.utils.image import show_cam_on_image | |
def apply_normalization(chennels): | |
return nn.BatchNorm2d(chennels) | |
class CustomResnet(nn.Module): | |
def __init__(self): | |
super(CustomResnet, self).__init__() | |
# Input Block | |
drop = 0.0 | |
# PrepLayer - Conv 3x3 s1, p1) >> BN >> RELU [64k] | |
self.preplayer = nn.Sequential( | |
nn.Conv2d(3, 64, (3, 3), padding=1, stride=1, bias=False), # 3 | |
apply_normalization(64), | |
nn.ReLU(), | |
) | |
# Layer1 - | |
# X = Conv 3x3 (s1, p1) >> MaxPool2D >> BN >> RELU [128k] | |
self.convlayer1 = nn.Sequential( | |
nn.Conv2d(64, 128, (3, 3), padding=1, stride=1, bias=False), # 3 | |
nn.MaxPool2d(2, 2), | |
apply_normalization(128), | |
nn.ReLU(), | |
) | |
# R1 = ResBlock( (Conv-BN-ReLU-Conv-BN-ReLU))(X) [128k] | |
self.reslayer1 = nn.Sequential( | |
nn.Conv2d(128, 128, (3, 3), padding=1, stride=1, bias=False), # 3 | |
apply_normalization(128), | |
nn.ReLU(), | |
nn.Conv2d(128, 128, (3, 3), padding=1, stride=1, bias=False), # 3 | |
apply_normalization(128), | |
nn.ReLU(), | |
) | |
# Conv 3x3 [256k] | |
self.convlayer2 = nn.Sequential( | |
nn.Conv2d(128, 256, (3, 3), padding=1, stride=1, bias=False), # 3 | |
nn.MaxPool2d(2, 2), | |
apply_normalization(256), | |
nn.ReLU(), | |
) | |
# X = Conv 3x3 (s1, p1) >> MaxPool2D >> BN >> RELU [512k] | |
self.convlayer3 = nn.Sequential( | |
nn.Conv2d(256, 512, (3, 3), padding=1, stride=1, bias=False), # 3 | |
nn.MaxPool2d(2, 2), | |
apply_normalization(512), | |
nn.ReLU(), | |
) | |
# R1 = ResBlock( (Conv-BN-ReLU-Conv-BN-ReLU))(X) [128k] | |
self.reslayer2 = nn.Sequential( | |
nn.Conv2d(512, 512, (3, 3), padding=1, stride=1, bias=False), # 3 | |
apply_normalization(512), | |
nn.ReLU(), | |
nn.Conv2d(512, 512, (3, 3), padding=1, stride=1, bias=False), # 3 | |
apply_normalization(512), | |
nn.ReLU(), | |
) | |
self.maxpool3 = nn.MaxPool2d(4, 2) | |
self.linear1 = nn.Linear(512,10) | |
def forward(self,x): | |
x = self.preplayer(x) | |
x1 = self.convlayer1(x) | |
x2 = self.reslayer1(x1) | |
x = x1+x2 | |
x = self.convlayer2(x) | |
x = self.convlayer3(x) | |
x1 = self.reslayer2(x) | |
x = x+x1 | |
x = self.maxpool3(x) | |
x = x.view(-1, 512) | |
x = self.linear1(x) | |
return F.log_softmax(x, dim=-1) | |
# Function to run inference and return top classes | |
def get_gradcam(model,input_img, opacity): | |
targets = None | |
inv_normalize = transforms.Normalize( | |
mean=[-0.50/0.23, -0.50/0.23, -0.50/0.23], | |
std=[1/0.23, 1/0.23, 1/0.23] | |
) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
transform = transforms.ToTensor() | |
input_img = transform(input_img) | |
input_img = input_img.to(device) | |
input_img = input_img.unsqueeze(0) | |
outputs = model(input_img) | |
_, prediction = torch.max(outputs, 1) | |
target_layers = [model.convlayer3[-2]] | |
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True) | |
grayscale_cam = cam(input_tensor=input_img, targets=targets) | |
grayscale_cam = grayscale_cam[0, :] | |
img = input_img.squeeze(0).to('cpu') | |
img = inv_normalize(img) | |
rgb_img = np.transpose(img, (1, 2, 0)) | |
rgb_img = rgb_img.numpy() | |
visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=opacity) | |
return visualization | |
def get_misclassified_images(show_misclassified,num): | |
if show_misclassified: | |
return cv2.imread(f"missclassified_images_examples/{int(num)}_missclassified.png") | |
else: | |
return None | |
def main_inference(num_of_output_classes,classes,model,input_img): | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
transform = transforms.ToTensor() | |
input_img = transform(input_img) | |
input_img = input_img.to(device) | |
input_img = input_img.unsqueeze(0) | |
softmax = torch.nn.Softmax(dim=0) | |
outputs = model(input_img) | |
out = softmax(outputs.flatten()) | |
_, prediction = torch.max(outputs, 1) | |
confidences = {classes[i]:float(out[i]) for i in range(num_of_output_classes)} | |
outputs = model(input_img) | |
_, prediction = torch.max(outputs, 1) | |
return confidences | |
# def run_inference(input_img, num_of_output_classes,transparency): | |
# transform = transforms.ToTensor() | |
# input_img = transform(input_img) | |
# input_img = input_img.to(device) | |
# input_img = input_img.unsqueeze(0) | |
# softmax = torch.nn.Softmax(dim=0) | |
# outputs = model(input_img) | |
# out = softmax(outputs.flatten()) | |
# _, prediction = torch.max(outputs, 1) | |
# confidences = {classes[i]:float(out[i]) for i in range(num_of_output_classes)} | |
# target_layers = [model.convlayer3[-2]] | |
# cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True) | |
# grayscale_cam = cam(input_tensor=input_img, targets=targets) | |
# grayscale_cam = grayscale_cam[0, :] | |
# img = input_img.squeeze(0).to('cpu') | |
# img = inv_normalize(img) | |
# rgb_img = np.transpose(img, (1, 2, 0)) | |
# rgb_img = rgb_img.numpy() | |
# visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency) | |
# return confidences, rgb_img, transparency,grayscale_cam | |