Spaces:

wgetdd
/

CIFAR_Custom_Resnet

Sleeping

File size: 6,857 Bytes

import torch.nn as nn
import torch.nn.functional as F
import torch 
from torchvision import transforms
import cv2
import numpy as np
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
import matplotlib.pyplot as plt
import textwrap
import io

def apply_normalization(chennels):
      return nn.BatchNorm2d(chennels)
  
class CustomResnet(nn.Module):
    def __init__(self):
        super(CustomResnet, self).__init__()
        # Input Block
        drop = 0.0
        # PrepLayer - Conv 3x3 s1, p1) >> BN >> RELU [64k]
        self.preplayer = nn.Sequential(
            nn.Conv2d(3, 64, (3, 3), padding=1, stride=1, bias=False), # 3
            apply_normalization(64),
            nn.ReLU(),
        )
        # Layer1 -
        # X = Conv 3x3 (s1, p1) >> MaxPool2D >> BN >> RELU [128k]
        self.convlayer1 = nn.Sequential(
            nn.Conv2d(64, 128, (3, 3), padding=1, stride=1, bias=False), # 3
            nn.MaxPool2d(2, 2),
            apply_normalization(128),
            nn.ReLU(),
        )
        # R1 = ResBlock( (Conv-BN-ReLU-Conv-BN-ReLU))(X) [128k]
        self.reslayer1 = nn.Sequential(
            nn.Conv2d(128, 128, (3, 3), padding=1, stride=1, bias=False), # 3
            apply_normalization(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, (3, 3), padding=1, stride=1, bias=False), # 3
            apply_normalization(128),
            nn.ReLU(),
        )
        # Conv 3x3 [256k]
        self.convlayer2 = nn.Sequential(
            nn.Conv2d(128, 256, (3, 3), padding=1, stride=1, bias=False), # 3
            nn.MaxPool2d(2, 2),
            apply_normalization(256),
            nn.ReLU(),
        )
        # X = Conv 3x3 (s1, p1) >> MaxPool2D >> BN >> RELU [512k]
        self.convlayer3 = nn.Sequential(
            nn.Conv2d(256, 512, (3, 3), padding=1, stride=1, bias=False), # 3
            nn.MaxPool2d(2, 2),
            apply_normalization(512),
            nn.ReLU(),
        )
        # R1 = ResBlock( (Conv-BN-ReLU-Conv-BN-ReLU))(X) [128k]
        self.reslayer2 = nn.Sequential(
            nn.Conv2d(512, 512, (3, 3), padding=1, stride=1, bias=False), # 3
            apply_normalization(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, (3, 3), padding=1, stride=1, bias=False), # 3
            apply_normalization(512),
            nn.ReLU(),
        )
        self.maxpool3 = nn.MaxPool2d(4, 2)
        self.linear1 = nn.Linear(512,10)

    def forward(self,x):
        x = self.preplayer(x)
        x1 = self.convlayer1(x)
        x2 = self.reslayer1(x1)
        x = x1+x2
        x = self.convlayer2(x)
        x = self.convlayer3(x)
        x1 = self.reslayer2(x)
        x = x+x1
        x = self.maxpool3(x)
        x = x.view(-1, 512)
        x = self.linear1(x)
        return F.log_softmax(x, dim=-1)


def resize_image(image, target_size=(200, 200)):
    return cv2.resize(image, target_size)

def wrap_text(text, width=20):
    return textwrap.fill(text, width)
    
import io
# define a function which returns an image as numpy array from figure
def get_img_from_fig(fig, dpi=180):
    buf = io.BytesIO()
    fig.savefig(buf, format="png", dpi=dpi)
    buf.seek(0)
    img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8)
    buf.close()
    img = cv2.imdecode(img_arr, 1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    return img
    
def save_plot_as_image(images,texts, output_path):
    num_images = len(images)
    num_cols = min(4, num_images)  # Assuming you want a maximum of 4 columns
    num_rows = (num_images - 1) // num_cols + 1

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(3 * num_cols, 3 * num_rows))

    subplot_height = 0.9 / num_rows  # Adjust this value to control the height of each subplot
    plt.subplots_adjust(hspace=subplot_height)
    for i, ax in enumerate(axes.flat):
        if i < num_images:
            ax.imshow(images[i], cmap='gray')
            ax.axis('off')
            if texts is not None and i < len(texts):
                wrapped_text = wrap_text(texts[i])
                ax.set_title(wrapped_text, fontsize=12, pad=5)
        else:
            ax.axis('off')
    plt.tight_layout()
    # plt.savefig("tmp_arrays.png")
    # plt.close()
    return get_img_from_fig(plt)


# Function to run inference and return top classes
def get_gradcam(model,input_img, opacity,layer):
    targets = None
    inv_normalize = transforms.Normalize(
        mean=[-0.50/0.23, -0.50/0.23, -0.50/0.23],
        std=[1/0.23, 1/0.23, 1/0.23]
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    transform = transforms.ToTensor()
    input_img = transform(input_img)
    input_img = input_img.to(device)
    input_img = input_img.unsqueeze(0)
    outputs = model(input_img)
    if layer == "convblock1":
        target_layers = model.convlayer1
    elif layer == "convblock2":
        target_layers = model.convlayer2
    elif layer == "resblock1":
        target_layers = model.reslayer1
    elif layer == "resblock2":
        target_layers = model.reslayer2
    elif layer == "convblock3":
        target_layers = model.convlayer3
    
    layer_to_user = []
    for i in target_layers:
        if str(i) != "ReLU()":
            layer_to_user.append(i)
    print(layer_to_user)
    final_outputs,texts = [],[]
    for i in range(len(layer_to_user)):
        cam = GradCAM(model=model, target_layers=[layer_to_user[i]], use_cuda=False)
        grayscale_cam = cam(input_tensor=input_img, targets=targets)
        grayscale_cam = grayscale_cam[0, :]
        img = input_img.squeeze(0).to('cpu')
        img = inv_normalize(img)
        rgb_img = np.transpose(img, (1, 2, 0))
        rgb_img = rgb_img.numpy()
        visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=opacity)
        final_outputs.append(resize_image(visualization))
        texts.append(str(layer_to_user[i]))
    figure = save_plot_as_image(final_outputs,texts, "plot.png")
    return figure
    
def get_misclassified_images(show_misclassified,num):
    if show_misclassified:
        return cv2.imread(f"missclassified_images_examples/{int(num)}.png")
    else:
        return None


def main_inference(num_of_output_classes,classes,model,input_img):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    transform = transforms.ToTensor()
    input_img = transform(input_img)
    input_img = input_img.to(device)
    input_img = input_img.unsqueeze(0)
    softmax = torch.nn.Softmax(dim=0)
    outputs = model(input_img)
    out = softmax(outputs.flatten())
    _, prediction = torch.max(outputs, 1)
    confidences = {classes[i]:float(out[i]) for i in range(num_of_output_classes)}
    outputs = model(input_img)
    _, prediction = torch.max(outputs, 1)
    return confidences