Spaces:

crrrr30
/

cs-mixer

Runtime error

File size: 9,046 Bytes

da716ed

import os
from tqdm import tqdm
import argparse
import cv2
import numpy as np
from torchvision import transforms
from datasets import Dataset, concatenate_datasets
from pytorch_grad_cam import GradCAM

from timm.models import create_model, load_checkpoint
from timm.data import create_transform
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


if not os.path.isdir('results/grad_cam/correct'):
    os.mkdir('results/grad_cam/correct')
if not os.path.isdir('results/grad_cam/incorrect'):
    os.mkdir('results/grad_cam/incorrect')

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', default='tpmlp_s', type=str, metavar='MODEL',
                    help='path to latest checkpoint (default: none)')
    parser.add_argument('--checkpoint', default='/home/daa5724/tpmlp-s-300-ema/last.pth.tar', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
    parser.add_argument('--idx', default='[1374, 27826, 14327, 1828, 31787, 21083, 38902, 7912, 10089, 16915, 20986, 35716, 15233, 20648, 30566, 20150, 45538, 42359, 39683, 20329, 20868, 48557, 10569, 37167, 11163, 6688, 21910, 44528, 10660, 13919, 10098, 46981, 36560, 14231, 45372, 6262, 23684, 16895, 17036, 15670, 35393, 26758, 18572, 48064, 29773, 25437, 5494, 12825, 25737, 45244, 16877, 29958, 38519, 5338, 46210, 15154, 15040, 15783, 13640, 14420, 26836, 38155, 45094, 33282, 13362, 42975, 38779, 24298, 20632, 48373, 28662, 21869, 37940, 25953, 29360, 9428, 22352, 6498, 2014, 9666, 30364, 21129, 43259, 16148, 31559, 4508, 42773, 8180, 17194, 46614, 23580, 3039, 36980, 35809, 860, 35940, 9670, 33552, 35731, 23777, 15272, 47792, 20589, 12044, 24154, 24852, 2090, 16158, 12333, 4109, 7612, 22611, 12808, 38787, 41688, 23714, 17498, 29326, 12237, 28137, 38521, 24060, 31545, 46094, 34674, 18182, 28380, 34046]', type=str, metavar='IDX',
                    help='list of indices to use (default: [...]')
    parser.add_argument('--use-cuda', action='store_true', default=False,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--aug_smooth', action='store_true',
                        help='Apply test time augmentation to smooth the CAM')
    parser.add_argument(
        '--eigen_smooth',
        action='store_true',
        help='Reduce noise by taking the first principle componenet'
        'of cam_weights*activations')

    args = parser.parse_args()
    args.use_cuda = True
    if args.use_cuda:
        print('Using GPU for acceleration')
    else:
        print('Using CPU for computation')

    return args


if __name__ == '__main__':
    args = get_args()
    
    model = create_model(
        args.model,
        num_classes=1000,
        in_chans=3,
    )
    load_checkpoint(model, args.checkpoint, True)
    
    # Choose the target layer you want to compute the visualization for.
    # Usually this will be the last convolutional layer in the model.
    # Some common choices can be:
    # Resnet18 and 50: model.layer4
    # VGG, densenet161: model.features[-1]
    # mnasnet1_0: model.layers[-1]
    # You can print the model to help chose the layer
    # You can pass a list with several target layers,
    # in that case the CAMs will be computed per layer and then aggregated.
    # You can also try selecting all layers of a certain type, with e.g:
    # from pytorch_grad_cam.utils.find_layers import find_layer_types_recursive
    # find_layer_types_recursive(model, [torch.nn.ReLU])
    target_layers = [model.layers[3]]

    dataset = concatenate_datasets([Dataset.from_file(f"../../imagenet-1k/imagenet-1k-validation-{i:05d}-of-00013.arrow",) for i in range(13)])
    # loader = create_loader(
    #     dataset,
    #     input_size=(3, 224, 224),
    #     batch_size=1,
    #     use_prefetcher=False,
    #     interpolation='bicubic',
    #     mean=[0.485, 0.456, 0.406],
    #     std=[0.229, 0.224, 0.225],
    #     num_workers=4,
    #     crop_pct=.9,
    #     crop_mode='center',
    #     pin_memory=False,
    #     device="cuda",
    #     tf_preprocessing=False
    # )
    augs = create_transform(
        input_size=(3, 224, 224),
        is_training=False,
        use_prefetcher=False,
        crop_pct=0.9,
    )
    resize = transforms.Compose(augs.transforms[:-1])
    normalize = augs.transforms[-1]
    def transform(img):
        img = resize(img.convert("RGB"))
        tensor = normalize(img)
        return img, tensor[None]
        
    # assert rgb_img.min() > -1e-5 and rgb_img.max() < 1 + 1e-5
    # rgb_img = cv2.imread(args.image_path, 1)[:, :, ::-1]
    

    # rgb_img = np.float32(rgb_img) / 255
    # input_tensor = preprocess_image(rgb_img,
    #                                 mean=[0.485, 0.456, 0.406],
    #                                 std=[0.229, 0.224, 0.225])
    
    idx = eval(args.idx)
    correct_idx = idx[:len(idx) // 2]
    incorrect_idx = idx[len(idx) // 2:]
    
    
    for idx in tqdm(correct_idx):
        data = dataset[int(idx)]
        image, label = data['image'], data['label']       
        rgb_img, input_tensor = transform(image)
        rgb_img = rgb_img.permute(1, 2, 0)
        input_tensor = input_tensor.cuda()

        targets = [ClassifierOutputTarget(label)]

        with GradCAM(model=model,
                     target_layers=target_layers,
                     use_cuda=True) as cam:

            grayscale_cam, pred = cam(input_tensor=input_tensor,
                                   targets=targets,
                                   aug_smooth=args.aug_smooth,
                                   eigen_smooth=args.eigen_smooth)
            
            if pred[0] != label:
                print(f"`pred != gdth` in correct_idx: {pred[0]} != {label}. Skipping idx {idx}.")

            # Here grayscale_cam has only one image in the batch
            grayscale_cam = grayscale_cam[0, :]

            cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)

            # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
            cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
            rbg_image = cv2.cvtColor((rgb_img * 255).numpy().astype(np.uint8), cv2.COLOR_RGB2BGR)
            cv2.imwrite(f'results/grad_cam/correct/grad_cam_{idx}.png', cam_image)
            cv2.imwrite(f'results/grad_cam/correct/image_{idx}[{label}].png', rbg_image)
    
    
    for idx in tqdm(incorrect_idx):
        data = dataset[int(idx)]
        image, label = data['image'], data['label']       
        rgb_img, input_tensor = transform(image)
        rgb_img = rgb_img.permute(1, 2, 0)
        input_tensor = input_tensor.cuda()

        targets = [ClassifierOutputTarget(label)]

        with GradCAM(model=model,
                     target_layers=target_layers,
                     use_cuda=True) as cam:

            grayscale_cam, pred = cam(input_tensor=input_tensor,
                                      targets=targets,
                                      aug_smooth=args.aug_smooth,
                                      eigen_smooth=args.eigen_smooth)
            
            if pred[0] == label:
                print(f"`pred == gdth` in incorrect_idx: {pred[0]} == {label}. Skipping idx {idx}.")

            # Here grayscale_cam has only one image in the batch
            grayscale_cam = grayscale_cam[0, :]

            cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)

            # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
            cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
            rbg_image = cv2.cvtColor((rgb_img * 255).numpy().astype(np.uint8), cv2.COLOR_RGB2BGR)
            cv2.imwrite(f'results/grad_cam/incorrect/grad_cam_gdth_{idx}.png', cam_image)
            cv2.imwrite(f'results/grad_cam/incorrect/image_{idx}[{label}].png', rbg_image)
    
        with GradCAM(model=model,
                     target_layers=target_layers,
                     use_cuda=True) as cam:

            grayscale_cam, pred = cam(input_tensor=input_tensor,
                                      targets=None,
                                      aug_smooth=args.aug_smooth,
                                      eigen_smooth=args.eigen_smooth)
            
            # Here grayscale_cam has only one image in the batch
            grayscale_cam = grayscale_cam[0, :]

            cam_image = show_cam_on_image(rgb_img.detach().cpu().numpy(), grayscale_cam, use_rgb=True, image_weight=0.5, colormap=cv2.COLORMAP_TWILIGHT_SHIFTED)

            # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
            cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
            cv2.imwrite(f'results/grad_cam/incorrect/grad_cam_pred_{idx}[{pred[0]}].png', cam_image)