emikes-classifier / gradcam.py
ongkn's picture
Update gradcam.py
91a8c95
from transformers import ViTFeatureExtractor, ViTForImageClassification
import warnings
from torchvision import transforms
from datasets import load_dataset
from pytorch_grad_cam import run_dff_on_image, GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from PIL import Image
import numpy as np
import cv2 as cv
import torch
from typing import List, Callable, Optional
import logging
from face_grab import FaceGrabber
# original borrowed from https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/HuggingFace.ipynb
# thanks @jacobgil
# further mods beyond this commit by @simonSlamka
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
class HuggingfaceToTensorModelWrapper(torch.nn.Module):
def __init__(self, model):
super(HuggingfaceToTensorModelWrapper, self).__init__()
self.model = model
def forward(self, x):
return self.model(x).logits
class GradCam():
def __init__(self):
pass
def category_name_to_index(self, model, category_name):
name_to_index = dict((v, k) for k, v in model.config.id2label.items())
return name_to_index[category_name]
def run_grad_cam_on_image(self, model: torch.nn.Module,
target_layer: torch.nn.Module,
targets_for_gradcam: List[Callable],
reshape_transform: Optional[Callable],
input_tensor: torch.nn.Module,
input_image: Image,
method: Callable=GradCAM,
threshold: float=0.5):
with method(model=HuggingfaceToTensorModelWrapper(model),
target_layers=[target_layer],
reshape_transform=reshape_transform) as cam:
# Replicate the tensor for each of the categories we want to create Grad-CAM for:
repeated_tensor = input_tensor[None, :].repeat(len(targets_for_gradcam), 1, 1, 1)
batch_results = cam(input_tensor=repeated_tensor,
targets=targets_for_gradcam)
results = []
for grayscale_cam in batch_results:
grayscale_cam[grayscale_cam < threshold] = 0
visualization = show_cam_on_image(np.float32(input_image)/255,
grayscale_cam,
use_rgb=True)
# Make it weight less in the notebook:
visualization = cv.resize(visualization,
(visualization.shape[1]//2, visualization.shape[0]//2))
results.append(visualization)
return np.hstack(results)
def get_top_category(self, model, img_tensor, top_k=5):
logits = model(img_tensor.unsqueeze(0)).logits
probabilities = torch.nn.functional.softmax(logits, dim=1)
topIdx = logits.cpu()[0, :].detach().numpy().argsort()[-1]
topClass = model.config.id2label[topIdx]
topScore = probabilities[0][topIdx].item()
return [{"label": topClass, "score": topScore}]
def reshape_transform_vit_huggingface(self, x):
activations = x[:, 1:, :]
activations = activations.view(activations.shape[0],
14, 14, activations.shape[2])
activations = activations.transpose(2, 3).transpose(1, 2)
return activations