cs-mixer / get_grad_cam.py
crrrr30's picture
Upload folder using huggingface_hub
da716ed
import os
from tqdm import tqdm
import argparse
import cv2
import numpy as np
from torchvision import transforms
from datasets import Dataset, concatenate_datasets
from pytorch_grad_cam import GradCAM
from timm.models import create_model, load_checkpoint
from timm.data import create_transform
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
if not os.path.isdir('results/grad_cam/correct'):
os.mkdir('results/grad_cam/correct')
if not os.path.isdir('results/grad_cam/incorrect'):
os.mkdir('results/grad_cam/incorrect')
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--model', default='tpmlp_s', type=str, metavar='MODEL',
help='path to latest checkpoint (default: none)')
parser.add_argument('--checkpoint', default='/home/daa5724/tpmlp-s-300-ema/last.pth.tar', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('--idx', default='[1374, 27826, 14327, 1828, 31787, 21083, 38902, 7912, 10089, 16915, 20986, 35716, 15233, 20648, 30566, 20150, 45538, 42359, 39683, 20329, 20868, 48557, 10569, 37167, 11163, 6688, 21910, 44528, 10660, 13919, 10098, 46981, 36560, 14231, 45372, 6262, 23684, 16895, 17036, 15670, 35393, 26758, 18572, 48064, 29773, 25437, 5494, 12825, 25737, 45244, 16877, 29958, 38519, 5338, 46210, 15154, 15040, 15783, 13640, 14420, 26836, 38155, 45094, 33282, 13362, 42975, 38779, 24298, 20632, 48373, 28662, 21869, 37940, 25953, 29360, 9428, 22352, 6498, 2014, 9666, 30364, 21129, 43259, 16148, 31559, 4508, 42773, 8180, 17194, 46614, 23580, 3039, 36980, 35809, 860, 35940, 9670, 33552, 35731, 23777, 15272, 47792, 20589, 12044, 24154, 24852, 2090, 16158, 12333, 4109, 7612, 22611, 12808, 38787, 41688, 23714, 17498, 29326, 12237, 28137, 38521, 24060, 31545, 46094, 34674, 18182, 28380, 34046]', type=str, metavar='IDX',
help='list of indices to use (default: [...]')
parser.add_argument('--use-cuda', action='store_true', default=False,
help='Use NVIDIA GPU acceleration')
parser.add_argument('--aug_smooth', action='store_true',
help='Apply test time augmentation to smooth the CAM')
parser.add_argument(
'--eigen_smooth',
action='store_true',
help='Reduce noise by taking the first principle componenet'
'of cam_weights*activations')
args = parser.parse_args()
args.use_cuda = True
if args.use_cuda:
print('Using GPU for acceleration')
else:
print('Using CPU for computation')
return args
if __name__ == '__main__':
args = get_args()
model = create_model(
args.model,
num_classes=1000,
in_chans=3,
)
load_checkpoint(model, args.checkpoint, True)
# Choose the target layer you want to compute the visualization for.
# Usually this will be the last convolutional layer in the model.
# Some common choices can be:
# Resnet18 and 50: model.layer4
# VGG, densenet161: model.features[-1]
# mnasnet1_0: model.layers[-1]
# You can print the model to help chose the layer
# You can pass a list with several target layers,
# in that case the CAMs will be computed per layer and then aggregated.
# You can also try selecting all layers of a certain type, with e.g:
# from pytorch_grad_cam.utils.find_layers import find_layer_types_recursive
# find_layer_types_recursive(model, [torch.nn.ReLU])
target_layers = [model.layers[3]]
dataset = concatenate_datasets([Dataset.from_file(f"../../imagenet-1k/imagenet-1k-validation-{i:05d}-of-00013.arrow",) for i in range(13)])
# loader = create_loader(
# dataset,
# input_size=(3, 224, 224),
# batch_size=1,
# use_prefetcher=False,
# interpolation='bicubic',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225],
# num_workers=4,
# crop_pct=.9,
# crop_mode='center',
# pin_memory=False,
# device="cuda",
# tf_preprocessing=False
# )
augs = create_transform(
input_size=(3, 224, 224),
is_training=False,
use_prefetcher=False,
crop_pct=0.9,
)
resize = transforms.Compose(augs.transforms[:-1])
normalize = augs.transforms[-1]
def transform(img):
img = resize(img.convert("RGB"))
tensor = normalize(img)
return img, tensor[None]
# assert rgb_img.min() > -1e-5 and rgb_img.max() < 1 + 1e-5
# rgb_img = cv2.imread(args.image_path, 1)[:, :, ::-1]
# rgb_img = np.float32(rgb_img) / 255
# input_tensor = preprocess_image(rgb_img,
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225])
idx = eval(args.idx)
correct_idx = idx[:len(idx) // 2]
incorrect_idx = idx[len(idx) // 2:]
for idx in tqdm(correct_idx):
data = dataset[int(idx)]
image, label = data['image'], data['label']
rgb_img, input_tensor = transform(image)
rgb_img = rgb_img.permute(1, 2, 0)
input_tensor = input_tensor.cuda()
targets = [ClassifierOutputTarget(label)]
with GradCAM(model=model,
target_layers=target_layers,
use_cuda=True) as cam:
grayscale_cam, pred = cam(input_tensor=input_tensor,
targets=targets,
aug_smooth=args.aug_smooth,
eigen_smooth=args.eigen_smooth)
if pred[0] != label:
print(f"`pred != gdth` in correct_idx: {pred[0]} != {label}. Skipping idx {idx}.")
# Here grayscale_cam has only one image in the batch
grayscale_cam = grayscale_cam[0, :]
cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)
# cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
rbg_image = cv2.cvtColor((rgb_img * 255).numpy().astype(np.uint8), cv2.COLOR_RGB2BGR)
cv2.imwrite(f'results/grad_cam/correct/grad_cam_{idx}.png', cam_image)
cv2.imwrite(f'results/grad_cam/correct/image_{idx}[{label}].png', rbg_image)
for idx in tqdm(incorrect_idx):
data = dataset[int(idx)]
image, label = data['image'], data['label']
rgb_img, input_tensor = transform(image)
rgb_img = rgb_img.permute(1, 2, 0)
input_tensor = input_tensor.cuda()
targets = [ClassifierOutputTarget(label)]
with GradCAM(model=model,
target_layers=target_layers,
use_cuda=True) as cam:
grayscale_cam, pred = cam(input_tensor=input_tensor,
targets=targets,
aug_smooth=args.aug_smooth,
eigen_smooth=args.eigen_smooth)
if pred[0] == label:
print(f"`pred == gdth` in incorrect_idx: {pred[0]} == {label}. Skipping idx {idx}.")
# Here grayscale_cam has only one image in the batch
grayscale_cam = grayscale_cam[0, :]
cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)
# cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
rbg_image = cv2.cvtColor((rgb_img * 255).numpy().astype(np.uint8), cv2.COLOR_RGB2BGR)
cv2.imwrite(f'results/grad_cam/incorrect/grad_cam_gdth_{idx}.png', cam_image)
cv2.imwrite(f'results/grad_cam/incorrect/image_{idx}[{label}].png', rbg_image)
with GradCAM(model=model,
target_layers=target_layers,
use_cuda=True) as cam:
grayscale_cam, pred = cam(input_tensor=input_tensor,
targets=None,
aug_smooth=args.aug_smooth,
eigen_smooth=args.eigen_smooth)
# Here grayscale_cam has only one image in the batch
grayscale_cam = grayscale_cam[0, :]
cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)
# cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
cv2.imwrite(f'results/grad_cam/incorrect/grad_cam_pred_{idx}[{pred[0]}].png', cam_image)