File size: 4,929 Bytes
ae11e16
 
 
 
 
 
 
a756789
ae11e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9986d5d
ae11e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8364d93
ae11e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import os
from typing import List

import gradio as gr
import numpy as np
import torch
import torch.nn.functional as F
from model import DavidPageNet
from PIL import Image
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from torchvision import transforms


# imagenet mean and std
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

inv_mean = [-mean / std for mean, std in zip(mean, std)]
inv_std = [1 / s for s in std]

# transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    
]
)

inv_normalize = transforms.Normalize(mean=inv_mean, std=inv_std)

classes = [
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
]


class Gradio:
    def __init__(self, model_path: str):
        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.model = self.load_model(model_path)
        self.temperature = 2

    def load_model(self, model_path: str):
        model = DavidPageNet().to(self.device)

        if os.path.isfile(model_path):
            model.load_state_dict(
                torch.load(model_path, map_location=self.device), strict=False
            )

        return model

    def cam(
        self,
        input_tensor: torch.Tensor,
        target_class_id: int,
        layer_nums: List,
        transparency: float = 0.7,
    ):
        targets = [ClassifierOutputTarget(target_class_id)]
        target_layers = [getattr(self.model, f"block{layer-1}") for layer in layer_nums]

        with GradCAM(
            model=self.model,
            target_layers=target_layers,
            use_cuda=self.device == torch.device("cuda"),
        ) as cam:
            grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
            grayscale_cam = grayscale_cam[0, :]

            img = inv_normalize(input_tensor)
            rgb_img = img[0].permute(1, 2, 0).cpu().numpy()

            visualization = show_cam_on_image(
                rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
            )
        return visualization

    def inference(
        self,
        input_img: np.array,
        transparency: float,
        ntop_classes: int,
        layer_nums: List,
        cam_for_class: str,
    ):
        self.model.eval()
        input_img = transform(input_img)

        input_img = input_img.to(self.device)
        input_img = input_img.unsqueeze(0)

        with torch.no_grad():
            outputs = self.model(input_img).squeeze(0)
            outputs = F.softmax(outputs / self.temperature, dim=-1)

        probability, prediction = torch.sort(outputs, descending=True)
        prediction = list(zip(prediction.tolist(), probability.tolist()))

        class_id = (
            prediction[0][0]
            if cam_for_class in ["default", ""]
            else classes.index(cam_for_class)
        )
        visualization = self.cam(
            input_tensor=input_img,
            target_class_id=class_id,
            layer_nums=layer_nums,
            transparency=transparency,
        )
        top_nclass_result = [
            (classes[class_id], round(score, 2))
            for class_id, score in prediction[:ntop_classes]
        ]
        return visualization, dict(top_nclass_result)


method = Gradio(model_path="./checkpoint/model.pt")
demo = gr.Interface(
    method.inference,
    [
        gr.Image(shape=(32, 32), label="Input Image", value="./samples/dog_cat.jpeg"),
        gr.Slider(
            minimum=0,
            maximum=1,
            value=0.5,
            label="Transparency",
            info="Transparency of the CAM-Attention Output",
        ),
        gr.Slider(
            minimum=1,
            maximum=10,
            step=1,
            value=2,
            label="Top Classes",
            info="Number of Top Predicted Classes",
        ),
        gr.CheckboxGroup(
            choices=[1, 2, 3, 4],
            value=[3, 4],
            label="Network Layers",
            info="Network Layers for CAM-Attention Extraction",
        ),
        gr.Dropdown(
            choices=["default"] + classes,
            multiselect=False,
            value="default",
            label="Class Activation Map (CAM) Focus Visualization",
            info="This section showcases the specific region of interest within the input image that the Class Activation Map (CAM) algorithm emphasizes to make predictions based on the selected class from the dropdown menu. The 'default' value serves as the default choice, representing the top class predicted by the model.",
        ),
    ],
    [
        gr.Image(shape=(32, 32)).style(width=128, height=128),
        gr.Label(label="Top Classes"),
    ],
)


demo.launch()