diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..25061cb1e11a8d10cb5b09a4c9cfe31f94b2bd5d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker +# you will also find guides on how best to write your Dockerfile + +FROM pytorch/pytorch + +WORKDIR /code + +COPY ./requirements.txt /code/requirements.txt + +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y + +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +RUN mkdir ./.cache + +RUN mkdir ./code/models + +RUN chmod -R 777 ./code/models + +RUN chmod -R 777 ./.cache + + +COPY . . + +CMD ["gunicorn", "-b", "0.0.0.0:7860", "main:app"] \ No newline at end of file diff --git a/api_link.py b/api_link.py new file mode 100644 index 0000000000000000000000000000000000000000..48aec592559e1c212b4ed41f05a8ce92ed635b63 --- /dev/null +++ b/api_link.py @@ -0,0 +1,201 @@ +from flask import Flask +from flask_cors import CORS, cross_origin +from flask import request +import os +import cv2 +import json +import urllib +import time + +from flask import request + +# Khởi tạo Flask Server Backend +app = Flask(__name__) + +# Apply Flask CORS +CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' +app.config['UPLOAD_FOLDER'] = 'static' + +# yolov6_model = my_yolov6.my_yolov6("weights/yolov6s.pt", 'cpu', 'data/coco.yaml', 640, True) + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +from torch import nn, optim +import torch.nn.functional as F +import torchvision +from torchvision import datasets, transforms, models +from torch.autograd import Variable +from torch.utils.data.sampler import SubsetRandomSampler + +import warnings +warnings.filterwarnings('ignore') +from pytorch_grad_cam import GradCAM, EigenCAM, LayerCAM, XGradCAM +from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget +from pytorch_grad_cam.utils.image import show_cam_on_image, \ + deprocess_image, \ + preprocess_image +from PIL import Image + +import copy + +# Load GoogleNet model +model = models.googlenet(pretrained=True) + +model.fc= nn.Linear(1024, 4) +model.load_state_dict(torch.load('./model_transfer_batch_2_epoch50.pt')) + + +data_transforms ={ + "train_transforms": transforms.Compose([transforms.RandomRotation(30), + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]), + "valid_transforms": transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]), + "test_transforms": transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) +} + +transform = transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + +use_cuda = torch.cuda.is_available() +classes = ['BrownSpot', 'Healthy', 'Hispa', 'LeafBlast'] + +def yolo_format(x, y, w, h, image_size): + x_center_norm = (x+w/2)/image_size[1] + y_center_norm = (y+h/2)/image_size[0] + w_norm = w/image_size[1] + h_norm = h/image_size[0] + return (x_center_norm, y_center_norm, w_norm, h_norm) + +def predict_image(image_url): + + img = np.array(Image.open(image_url)) + + img_cp = np.copy(img) + img_cp = cv2.resize(img_cp, (224, 224)) + img_cp = np.float32(img_cp) / 255 + input_tensor = preprocess_image(img_cp, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + input_tensor = torch.Tensor(input_tensor) + input_tensor.cuda() + + output = model(input_tensor) + # print(torch.max(output, 1)) + _, preds_tensor = torch.max(output, 1) + preds = np.squeeze(preds, preds_tensor.numpy()) if not use_cuda else np.squeeze(preds_tensor.cpu().numpy()) + print(preds) + + class_name = classes[preds] + if preds == 1: + grad_bounding_box = (0,0,0,0) + else: + img = np.array(Image.open(image_url)) + img = cv2.resize(img, (224, 224)) + img = np.float32(img) / 255 + input_tensor = torch.Tensor(input_tensor) + input_tensor.cuda() + + input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + targets = [ClassifierOutputTarget(0)] + target_layers = [model.inception5b.branch4[1].conv] + + with EigenCAM(model=model, target_layers=target_layers) as cam: + grayscale_cams = cam(input_tensor=input_tensor, targets=targets) + cam_image = show_cam_on_image(img, grayscale_cams[0, :], use_rgb=True) + cam = np.uint8(255*grayscale_cams[0, :]) + img = np.uint8(255*img) + ret, thresh1 = cv2.threshold(cam, 120, 255, cv2.THRESH_BINARY + + cv2.THRESH_OTSU) + img_otsu = cam < thresh1 + img_bin = np.multiply(img_otsu, 1) + img_bin = np.array(img_bin, np.uint8) + contours, _ = cv2.findContours(img_bin,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + cnt = contours[0] + x,y,w,h = cv2.boundingRect(cnt) + # grad_bounding_box = (x,y,x+w, y+h) + grad_bounding_box = yolo_format(x, y, w, h, (224, 224)) + + # print(grad_bounding_box) + + return class_name, grad_bounding_box + +def yolo2bbox(x, y, w, h, img_size=(224, 224)): + x = x * img_size[1] + y = y * img_size[0] + w = w * img_size[1] + h = h * img_size[0] + x1, y1 = x-w/2, y-h/2 + x2, y2 = x+w/2, y+h/2 + return int(x1), int(y1), int(x2), int(y2) + +def bb_intersection_over_union(boxA, boxB): + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) + boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) + boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) + iou = interArea / float(boxAArea + boxBArea - interArea) + return iou + +def read_annot_file(label_file): + with open(os.path.join(label_file), "r") as file1: + # Reading from a file + t = file1.read() + box = t[t.find(" ")+1:] + box = list(box.split(" ")) + # list(map(float, box)) + for i in range(len(box)): + box[i] = float(box[i]) + return box + + +@app.route('/', methods=['POST'] ) +@cross_origin(origin='*') +def predict_leaf(): + # image = request.files['file'] + img_url = request.form['url'] + print(img_url) + image = Image.open(urllib.request.urlopen(img_url)) + date = time.time() + filename = str(date) + '.jpg' + + if image: + # Lưu file + #save image + path_to_save = os.path.join(app.config['UPLOAD_FOLDER'], filename) + image.save(path_to_save) + + # print("Save= ", path_to_save) + + + predicted_class, grad_bounding_box = predict_image(path_to_save) + # print(predicted_class) + # print(grad_bounding_box) + result_dict = {'class': predicted_class, 'bounding_box': grad_bounding_box} + json_object = json.dumps(result_dict) + print(json_object) + return json_object + return 'Upload file to detect: ' + + + +# Start Backend +if __name__ == '__main__': + app.run(host='0.0.0.0', port='6868') \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..5f0dcb147ca58ef896bf7f17ddc7c6614ef7370d --- /dev/null +++ b/main.py @@ -0,0 +1,192 @@ +from flask import Flask +from flask_cors import CORS, cross_origin +from flask import request +import os +import cv2 +import json + + +from flask import request + +# Khởi tạo Flask Server Backend +app = Flask(__name__) + +# Apply Flask CORS +CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' +app.config['UPLOAD_FOLDER'] = 'static' + +# yolov6_model = my_yolov6.my_yolov6("weights/yolov6s.pt", 'cpu', 'data/coco.yaml', 640, True) + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +from torch import nn, optim +import torch.nn.functional as F +import torchvision +from torchvision import datasets, transforms, models +from torch.autograd import Variable +from torch.utils.data.sampler import SubsetRandomSampler + +import warnings +warnings.filterwarnings('ignore') +from pytorch_grad_cam import GradCAM, EigenCAM, LayerCAM, XGradCAM +from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget +from pytorch_grad_cam.utils.image import show_cam_on_image, \ + deprocess_image, \ + preprocess_image +from PIL import Image + +import copy + +# Load GoogleNet model +# os.makedirs('./model', exist_ok=True, mode=0o777) +os.environ['TORCH_HOME'] = './models' +model = models.googlenet(pretrained=True) +model.fc= nn.Linear(1024, 4) +model.load_state_dict(torch.load('./model_transfer_batch_2_epoch50.pt', map_location=torch.device('cpu'))) + + +data_transforms ={ + "train_transforms": transforms.Compose([transforms.RandomRotation(30), + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]), + "valid_transforms": transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]), + "test_transforms": transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) +} + +transform = transforms.Compose([transforms.Resize(225), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + +use_cuda = torch.cuda.is_available() +classes = ['BrownSpot', 'Healthy', 'Hispa', 'LeafBlast'] + +def yolo_format(x, y, w, h, image_size): + x_center_norm = (x+w/2)/image_size[1] + y_center_norm = (y+h/2)/image_size[0] + w_norm = w/image_size[1] + h_norm = h/image_size[0] + return (x_center_norm, y_center_norm, w_norm, h_norm) + +def predict_image(image_url): + + img = np.array(Image.open(image_url)) + + img_cp = np.copy(img) + img_cp = cv2.resize(img_cp, (224, 224)) + img_cp = np.float32(img_cp) / 255 + input_tensor = preprocess_image(img_cp, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + input_tensor = torch.Tensor(input_tensor) + input_tensor.cuda() + + output = model(input_tensor) + # print(torch.max(output, 1)) + _, preds_tensor = torch.max(output, 1) + preds = np.squeeze(preds, preds_tensor.numpy()) if not use_cuda else np.squeeze(preds_tensor.cpu().numpy()) + print(preds) + + class_name = classes[preds] + if preds == 1: + grad_bounding_box = (0,0,0,0) + else: + img = np.array(Image.open(image_url)) + img = cv2.resize(img, (224, 224)) + img = np.float32(img) / 255 + input_tensor = torch.Tensor(input_tensor) + input_tensor.cuda() + + input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + targets = [ClassifierOutputTarget(0)] + target_layers = [model.inception5b.branch4[1].conv] + + with EigenCAM(model=model, target_layers=target_layers) as cam: + grayscale_cams = cam(input_tensor=input_tensor, targets=targets) + cam_image = show_cam_on_image(img, grayscale_cams[0, :], use_rgb=True) + cam = np.uint8(255*grayscale_cams[0, :]) + img = np.uint8(255*img) + ret, thresh1 = cv2.threshold(cam, 120, 255, cv2.THRESH_BINARY + + cv2.THRESH_OTSU) + img_otsu = cam < thresh1 + img_bin = np.multiply(img_otsu, 1) + img_bin = np.array(img_bin, np.uint8) + contours, _ = cv2.findContours(img_bin,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + cnt = contours[0] + x,y,w,h = cv2.boundingRect(cnt) + # grad_bounding_box = (x,y,x+w, y+h) + grad_bounding_box = yolo_format(x, y, w, h, (224, 224)) + + # print(grad_bounding_box) + + return class_name, grad_bounding_box + +def yolo2bbox(x, y, w, h, img_size=(224, 224)): + x = x * img_size[1] + y = y * img_size[0] + w = w * img_size[1] + h = h * img_size[0] + x1, y1 = x-w/2, y-h/2 + x2, y2 = x+w/2, y+h/2 + return int(x1), int(y1), int(x2), int(y2) + +def bb_intersection_over_union(boxA, boxB): + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) + boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) + boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) + iou = interArea / float(boxAArea + boxBArea - interArea) + return iou + +def read_annot_file(label_file): + with open(os.path.join(label_file), "r") as file1: + # Reading from a file + t = file1.read() + box = t[t.find(" ")+1:] + box = list(box.split(" ")) + # list(map(float, box)) + for i in range(len(box)): + box[i] = float(box[i]) + return box + + +@app.route('/', methods=['POST'] ) +@cross_origin(origin='*') +def predict_leaf(): + image = request.files['file'] + if image: + # Lưu file + path_to_save = os.path.join(app.config['UPLOAD_FOLDER'], image.filename) + # print("Save= ", path_to_save) + image.save(path_to_save) + + predicted_class, grad_bounding_box = predict_image(path_to_save) + # print(predicted_class) + # print(grad_bounding_box) + result_dict = {'class': predicted_class, 'bounding_box': grad_bounding_box} + json_object = json.dumps(result_dict) + print(json_object) + return json_object + return 'Upload file to detect: ' + + + +# Start Backend +if __name__ == '__main__': + app.run(host='0.0.0.0', port='6868') \ No newline at end of file diff --git a/model_transfer_batch_2_epoch50.pt b/model_transfer_batch_2_epoch50.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5410d5ad353f3a6de80fe099da8d892df19837b --- /dev/null +++ b/model_transfer_batch_2_epoch50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7eb8ac31ecff2afe28dbd36a08aac278ab5deb032d394a2f530365af697eeed +size 22596295 diff --git a/pytorch_grad_cam/__init__.py b/pytorch_grad_cam/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d6e8f3e952cd024a6af7895ddce8aa241dab026 --- /dev/null +++ b/pytorch_grad_cam/__init__.py @@ -0,0 +1,20 @@ +from pytorch_grad_cam.grad_cam import GradCAM +from pytorch_grad_cam.hirescam import HiResCAM +from pytorch_grad_cam.grad_cam_elementwise import GradCAMElementWise +from pytorch_grad_cam.ablation_layer import AblationLayer, AblationLayerVit, AblationLayerFasterRCNN +from pytorch_grad_cam.ablation_cam import AblationCAM +from pytorch_grad_cam.xgrad_cam import XGradCAM +from pytorch_grad_cam.grad_cam_plusplus import GradCAMPlusPlus +from pytorch_grad_cam.score_cam import ScoreCAM +from pytorch_grad_cam.layer_cam import LayerCAM +from pytorch_grad_cam.eigen_cam import EigenCAM +from pytorch_grad_cam.eigen_grad_cam import EigenGradCAM +from pytorch_grad_cam.random_cam import RandomCAM +from pytorch_grad_cam.fullgrad_cam import FullGrad +from pytorch_grad_cam.guided_backprop import GuidedBackpropReLUModel +from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients +from pytorch_grad_cam.feature_factorization.deep_feature_factorization import DeepFeatureFactorization, run_dff_on_image +import pytorch_grad_cam.utils.model_targets +import pytorch_grad_cam.utils.reshape_transforms +import pytorch_grad_cam.metrics.cam_mult_image +import pytorch_grad_cam.metrics.road diff --git a/pytorch_grad_cam/__pycache__/__init__.cpython-310.pyc b/pytorch_grad_cam/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b15067cab0273f51e3fe6f42c7c1ff4e263bb7d Binary files /dev/null and b/pytorch_grad_cam/__pycache__/__init__.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc b/pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac5ce6a3697f3c160f0eb40104b958b1c01b46f3 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc differ diff --git a/pytorch_grad_cam/__pycache__/ablation_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/ablation_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3672d56f8a01bbbf5af7e359f65cf0f9316b5c6b Binary files /dev/null and b/pytorch_grad_cam/__pycache__/ablation_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/ablation_layer.cpython-310.pyc b/pytorch_grad_cam/__pycache__/ablation_layer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecd3665497f8ae4777a990e6c9bd0743066f0180 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/ablation_layer.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-310.pyc b/pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e9449f37c0e1efd548ba2221fd9817c1063518a Binary files /dev/null and b/pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/base_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/base_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d42b53d5dc44a26e4d4f527c26a3387a98ba44f8 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/base_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc b/pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd4fe119f07eaafc72ac485753b8bb9d1fce030f Binary files /dev/null and b/pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc differ diff --git a/pytorch_grad_cam/__pycache__/eigen_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/eigen_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ffd0315fc72604e26dc9ea3a67e0e7c628a1919 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/eigen_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..57cd850dcd1ed27d2d79e9b14017d198547bf65f Binary files /dev/null and b/pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..81ce2e37a0d8e9aa91b9f38a22a6dbecfd382141 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/grad_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/grad_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a839c4544734ab475a6c1b2f9a83c850ff6e6c1 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/grad_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc b/pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f70dc5edd23d9be2abc7f02d9f673ae1f5002ea Binary files /dev/null and b/pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc differ diff --git a/pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-310.pyc b/pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..490a731422201c3cae426b6f2808e202322d1610 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-310.pyc b/pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c86996edd6c4ec27b17d24b9fe9d35b394ad56f9 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/guided_backprop.cpython-310.pyc b/pytorch_grad_cam/__pycache__/guided_backprop.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e23f90d29a26435c85658dd9a28a5e2e62fe42b4 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/guided_backprop.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/hirescam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/hirescam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..741c3ea1412237978b6f4bba8f0c23263c45c6ff Binary files /dev/null and b/pytorch_grad_cam/__pycache__/hirescam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/layer_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/layer_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2bf5a0847054e4a8cd58864fa857b019deb7983 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/layer_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/random_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/random_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5b5f2be3b5588cffb546d9f0286445e9ab79574 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/random_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/score_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/score_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c5ef3b4a79c4bdcf1e6daaddbf0cc86271e6eba Binary files /dev/null and b/pytorch_grad_cam/__pycache__/score_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/__pycache__/xgrad_cam.cpython-310.pyc b/pytorch_grad_cam/__pycache__/xgrad_cam.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4810b61e45ef830b23338a5ba78e82bb14967098 Binary files /dev/null and b/pytorch_grad_cam/__pycache__/xgrad_cam.cpython-310.pyc differ diff --git a/pytorch_grad_cam/ablation_cam.py b/pytorch_grad_cam/ablation_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..77e65fc78960f4869068ca92ccf51299159978df --- /dev/null +++ b/pytorch_grad_cam/ablation_cam.py @@ -0,0 +1,148 @@ +import numpy as np +import torch +import tqdm +from typing import Callable, List +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.find_layers import replace_layer_recursive +from pytorch_grad_cam.ablation_layer import AblationLayer + + +""" Implementation of AblationCAM +https://openaccess.thecvf.com/content_WACV_2020/papers/Desai_Ablation-CAM_Visual_Explanations_for_Deep_Convolutional_Network_via_Gradient-free_Localization_WACV_2020_paper.pdf + +Ablate individual activations, and then measure the drop in the target score. + +In the current implementation, the target layer activations is cached, so it won't be re-computed. +However layers before it, if any, will not be cached. +This means that if the target layer is a large block, for example model.featuers (in vgg), there will +be a large save in run time. + +Since we have to go over many channels and ablate them, and every channel ablation requires a forward pass, +it would be nice if we could avoid doing that for channels that won't contribute anwyay, making it much faster. +The parameter ratio_channels_to_ablate controls how many channels should be ablated, using an experimental method +(to be improved). The default 1.0 value means that all channels will be ablated. +""" + + +class AblationCAM(BaseCAM): + def __init__(self, + model: torch.nn.Module, + target_layers: List[torch.nn.Module], + use_cuda: bool = False, + reshape_transform: Callable = None, + ablation_layer: torch.nn.Module = AblationLayer(), + batch_size: int = 32, + ratio_channels_to_ablate: float = 1.0) -> None: + + super(AblationCAM, self).__init__(model, + target_layers, + use_cuda, + reshape_transform, + uses_gradients=False) + self.batch_size = batch_size + self.ablation_layer = ablation_layer + self.ratio_channels_to_ablate = ratio_channels_to_ablate + + def save_activation(self, module, input, output) -> None: + """ Helper function to save the raw activations from the target layer """ + self.activations = output + + def assemble_ablation_scores(self, + new_scores: list, + original_score: float, + ablated_channels: np.ndarray, + number_of_channels: int) -> np.ndarray: + """ Take the value from the channels that were ablated, + and just set the original score for the channels that were skipped """ + + index = 0 + result = [] + sorted_indices = np.argsort(ablated_channels) + ablated_channels = ablated_channels[sorted_indices] + new_scores = np.float32(new_scores)[sorted_indices] + + for i in range(number_of_channels): + if index < len(ablated_channels) and ablated_channels[index] == i: + weight = new_scores[index] + index = index + 1 + else: + weight = original_score + result.append(weight) + + return result + + def get_cam_weights(self, + input_tensor: torch.Tensor, + target_layer: torch.nn.Module, + targets: List[Callable], + activations: torch.Tensor, + grads: torch.Tensor) -> np.ndarray: + + # Do a forward pass, compute the target scores, and cache the + # activations + handle = target_layer.register_forward_hook(self.save_activation) + with torch.no_grad(): + outputs = self.model(input_tensor) + handle.remove() + original_scores = np.float32( + [target(output).cpu().item() for target, output in zip(targets, outputs)]) + + # Replace the layer with the ablation layer. + # When we finish, we will replace it back, so the original model is + # unchanged. + ablation_layer = self.ablation_layer + replace_layer_recursive(self.model, target_layer, ablation_layer) + + number_of_channels = activations.shape[1] + weights = [] + # This is a "gradient free" method, so we don't need gradients here. + with torch.no_grad(): + # Loop over each of the batch images and ablate activations for it. + for batch_index, (target, tensor) in enumerate( + zip(targets, input_tensor)): + new_scores = [] + batch_tensor = tensor.repeat(self.batch_size, 1, 1, 1) + + # Check which channels should be ablated. Normally this will be all channels, + # But we can also try to speed this up by using a low + # ratio_channels_to_ablate. + channels_to_ablate = ablation_layer.activations_to_be_ablated( + activations[batch_index, :], self.ratio_channels_to_ablate) + number_channels_to_ablate = len(channels_to_ablate) + + for i in tqdm.tqdm( + range( + 0, + number_channels_to_ablate, + self.batch_size)): + if i + self.batch_size > number_channels_to_ablate: + batch_tensor = batch_tensor[:( + number_channels_to_ablate - i)] + + # Change the state of the ablation layer so it ablates the next channels. + # TBD: Move this into the ablation layer forward pass. + ablation_layer.set_next_batch( + input_batch_index=batch_index, + activations=self.activations, + num_channels_to_ablate=batch_tensor.size(0)) + score = [target(o).cpu().item() + for o in self.model(batch_tensor)] + new_scores.extend(score) + ablation_layer.indices = ablation_layer.indices[batch_tensor.size( + 0):] + + new_scores = self.assemble_ablation_scores( + new_scores, + original_scores[batch_index], + channels_to_ablate, + number_of_channels) + weights.extend(new_scores) + + weights = np.float32(weights) + weights = weights.reshape(activations.shape[:2]) + original_scores = original_scores[:, None] + weights = (original_scores - weights) / original_scores + + # Replace the model back to the original state + replace_layer_recursive(self.model, ablation_layer, target_layer) + return weights diff --git a/pytorch_grad_cam/ablation_cam_multilayer.py b/pytorch_grad_cam/ablation_cam_multilayer.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9dc806d845422e594bd0082dc718b947c593f2 --- /dev/null +++ b/pytorch_grad_cam/ablation_cam_multilayer.py @@ -0,0 +1,136 @@ +import cv2 +import numpy as np +import torch +import tqdm +from pytorch_grad_cam.base_cam import BaseCAM + + +class AblationLayer(torch.nn.Module): + def __init__(self, layer, reshape_transform, indices): + super(AblationLayer, self).__init__() + + self.layer = layer + self.reshape_transform = reshape_transform + # The channels to zero out: + self.indices = indices + + def forward(self, x): + self.__call__(x) + + def __call__(self, x): + output = self.layer(x) + + # Hack to work with ViT, + # Since the activation channels are last and not first like in CNNs + # Probably should remove it? + if self.reshape_transform is not None: + output = output.transpose(1, 2) + + for i in range(output.size(0)): + + # Commonly the minimum activation will be 0, + # And then it makes sense to zero it out. + # However depending on the architecture, + # If the values can be negative, we use very negative values + # to perform the ablation, deviating from the paper. + if torch.min(output) == 0: + output[i, self.indices[i], :] = 0 + else: + ABLATION_VALUE = 1e5 + output[i, self.indices[i], :] = torch.min( + output) - ABLATION_VALUE + + if self.reshape_transform is not None: + output = output.transpose(2, 1) + + return output + + +def replace_layer_recursive(model, old_layer, new_layer): + for name, layer in model._modules.items(): + if layer == old_layer: + model._modules[name] = new_layer + return True + elif replace_layer_recursive(layer, old_layer, new_layer): + return True + return False + + +class AblationCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super(AblationCAM, self).__init__(model, target_layers, use_cuda, + reshape_transform) + + if len(target_layers) > 1: + print( + "Warning. You are usign Ablation CAM with more than 1 layers. " + "This is supported only if all layers have the same output shape") + + def set_ablation_layers(self): + self.ablation_layers = [] + for target_layer in self.target_layers: + ablation_layer = AblationLayer(target_layer, + self.reshape_transform, indices=[]) + self.ablation_layers.append(ablation_layer) + replace_layer_recursive(self.model, target_layer, ablation_layer) + + def unset_ablation_layers(self): + # replace the model back to the original state + for ablation_layer, target_layer in zip( + self.ablation_layers, self.target_layers): + replace_layer_recursive(self.model, ablation_layer, target_layer) + + def set_ablation_layer_batch_indices(self, indices): + for ablation_layer in self.ablation_layers: + ablation_layer.indices = indices + + def trim_ablation_layer_batch_indices(self, keep): + for ablation_layer in self.ablation_layers: + ablation_layer.indices = ablation_layer.indices[:keep] + + def get_cam_weights(self, + input_tensor, + target_category, + activations, + grads): + with torch.no_grad(): + outputs = self.model(input_tensor).cpu().numpy() + original_scores = [] + for i in range(input_tensor.size(0)): + original_scores.append(outputs[i, target_category[i]]) + original_scores = np.float32(original_scores) + + self.set_ablation_layers() + + if hasattr(self, "batch_size"): + BATCH_SIZE = self.batch_size + else: + BATCH_SIZE = 32 + + number_of_channels = activations.shape[1] + weights = [] + + with torch.no_grad(): + # Iterate over the input batch + for tensor, category in zip(input_tensor, target_category): + batch_tensor = tensor.repeat(BATCH_SIZE, 1, 1, 1) + for i in tqdm.tqdm(range(0, number_of_channels, BATCH_SIZE)): + self.set_ablation_layer_batch_indices( + list(range(i, i + BATCH_SIZE))) + + if i + BATCH_SIZE > number_of_channels: + keep = number_of_channels - i + batch_tensor = batch_tensor[:keep] + self.trim_ablation_layer_batch_indices(self, keep) + score = self.model(batch_tensor)[:, category].cpu().numpy() + weights.extend(score) + + weights = np.float32(weights) + weights = weights.reshape(activations.shape[:2]) + original_scores = original_scores[:, None] + weights = (original_scores - weights) / original_scores + + # replace the model back to the original state + self.unset_ablation_layers() + return weights diff --git a/pytorch_grad_cam/ablation_layer.py b/pytorch_grad_cam/ablation_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..b404f3be6390690235f7dc4f1b615fa4770c56d4 --- /dev/null +++ b/pytorch_grad_cam/ablation_layer.py @@ -0,0 +1,155 @@ +import torch +from collections import OrderedDict +import numpy as np +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + + +class AblationLayer(torch.nn.Module): + def __init__(self): + super(AblationLayer, self).__init__() + + def objectiveness_mask_from_svd(self, activations, threshold=0.01): + """ Experimental method to get a binary mask to compare if the activation is worth ablating. + The idea is to apply the EigenCAM method by doing PCA on the activations. + Then we create a binary mask by comparing to a low threshold. + Areas that are masked out, are probably not interesting anyway. + """ + + projection = get_2d_projection(activations[None, :])[0, :] + projection = np.abs(projection) + projection = projection - projection.min() + projection = projection / projection.max() + projection = projection > threshold + return projection + + def activations_to_be_ablated( + self, + activations, + ratio_channels_to_ablate=1.0): + """ Experimental method to get a binary mask to compare if the activation is worth ablating. + Create a binary CAM mask with objectiveness_mask_from_svd. + Score each Activation channel, by seeing how much of its values are inside the mask. + Then keep the top channels. + + """ + if ratio_channels_to_ablate == 1.0: + self.indices = np.int32(range(activations.shape[0])) + return self.indices + + projection = self.objectiveness_mask_from_svd(activations) + + scores = [] + for channel in activations: + normalized = np.abs(channel) + normalized = normalized - normalized.min() + normalized = normalized / np.max(normalized) + score = (projection * normalized).sum() / normalized.sum() + scores.append(score) + scores = np.float32(scores) + + indices = list(np.argsort(scores)) + high_score_indices = indices[::- + 1][: int(len(indices) * + ratio_channels_to_ablate)] + low_score_indices = indices[: int( + len(indices) * ratio_channels_to_ablate)] + self.indices = np.int32(high_score_indices + low_score_indices) + return self.indices + + def set_next_batch( + self, + input_batch_index, + activations, + num_channels_to_ablate): + """ This creates the next batch of activations from the layer. + Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times. + """ + self.activations = activations[input_batch_index, :, :, :].clone( + ).unsqueeze(0).repeat(num_channels_to_ablate, 1, 1, 1) + + def __call__(self, x): + output = self.activations + for i in range(output.size(0)): + # Commonly the minimum activation will be 0, + # And then it makes sense to zero it out. + # However depending on the architecture, + # If the values can be negative, we use very negative values + # to perform the ablation, deviating from the paper. + if torch.min(output) == 0: + output[i, self.indices[i], :] = 0 + else: + ABLATION_VALUE = 1e7 + output[i, self.indices[i], :] = torch.min( + output) - ABLATION_VALUE + + return output + + +class AblationLayerVit(AblationLayer): + def __init__(self): + super(AblationLayerVit, self).__init__() + + def __call__(self, x): + output = self.activations + output = output.transpose(1, len(output.shape) - 1) + for i in range(output.size(0)): + + # Commonly the minimum activation will be 0, + # And then it makes sense to zero it out. + # However depending on the architecture, + # If the values can be negative, we use very negative values + # to perform the ablation, deviating from the paper. + if torch.min(output) == 0: + output[i, self.indices[i], :] = 0 + else: + ABLATION_VALUE = 1e7 + output[i, self.indices[i], :] = torch.min( + output) - ABLATION_VALUE + + output = output.transpose(len(output.shape) - 1, 1) + + return output + + def set_next_batch( + self, + input_batch_index, + activations, + num_channels_to_ablate): + """ This creates the next batch of activations from the layer. + Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times. + """ + repeat_params = [num_channels_to_ablate] + \ + len(activations.shape[:-1]) * [1] + self.activations = activations[input_batch_index, :, :].clone( + ).unsqueeze(0).repeat(*repeat_params) + + +class AblationLayerFasterRCNN(AblationLayer): + def __init__(self): + super(AblationLayerFasterRCNN, self).__init__() + + def set_next_batch( + self, + input_batch_index, + activations, + num_channels_to_ablate): + """ Extract the next batch member from activations, + and repeat it num_channels_to_ablate times. + """ + self.activations = OrderedDict() + for key, value in activations.items(): + fpn_activation = value[input_batch_index, + :, :, :].clone().unsqueeze(0) + self.activations[key] = fpn_activation.repeat( + num_channels_to_ablate, 1, 1, 1) + + def __call__(self, x): + result = self.activations + layers = {0: '0', 1: '1', 2: '2', 3: '3', 4: 'pool'} + num_channels_to_ablate = result['pool'].size(0) + for i in range(num_channels_to_ablate): + pyramid_layer = int(self.indices[i] / 256) + index_in_pyramid_layer = int(self.indices[i] % 256) + result[layers[pyramid_layer]][i, + index_in_pyramid_layer, :, :] = -1000 + return result diff --git a/pytorch_grad_cam/activations_and_gradients.py b/pytorch_grad_cam/activations_and_gradients.py new file mode 100644 index 0000000000000000000000000000000000000000..0c2071e59165e96d3ddd9b7cdef678a252720e63 --- /dev/null +++ b/pytorch_grad_cam/activations_and_gradients.py @@ -0,0 +1,46 @@ +class ActivationsAndGradients: + """ Class for extracting activations and + registering gradients from targetted intermediate layers """ + + def __init__(self, model, target_layers, reshape_transform): + self.model = model + self.gradients = [] + self.activations = [] + self.reshape_transform = reshape_transform + self.handles = [] + for target_layer in target_layers: + self.handles.append( + target_layer.register_forward_hook(self.save_activation)) + # Because of https://github.com/pytorch/pytorch/issues/61519, + # we don't use backward hook to record gradients. + self.handles.append( + target_layer.register_forward_hook(self.save_gradient)) + + def save_activation(self, module, input, output): + activation = output + + if self.reshape_transform is not None: + activation = self.reshape_transform(activation) + self.activations.append(activation.cpu().detach()) + + def save_gradient(self, module, input, output): + if not hasattr(output, "requires_grad") or not output.requires_grad: + # You can only register hooks on tensor requires grad. + return + + # Gradients are computed in reverse order + def _store_grad(grad): + if self.reshape_transform is not None: + grad = self.reshape_transform(grad) + self.gradients = [grad.cpu().detach()] + self.gradients + + output.register_hook(_store_grad) + + def __call__(self, x): + self.gradients = [] + self.activations = [] + return self.model(x) + + def release(self): + for handle in self.handles: + handle.remove() diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..7ee1929712013c8201ddade8988f31d4be4d6346 --- /dev/null +++ b/pytorch_grad_cam/base_cam.py @@ -0,0 +1,203 @@ +import numpy as np +import torch +import ttach as tta +from typing import Callable, List, Tuple +from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection +from pytorch_grad_cam.utils.image import scale_cam_image +from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget + + +class BaseCAM: + def __init__(self, + model: torch.nn.Module, + target_layers: List[torch.nn.Module], + use_cuda: bool = False, + reshape_transform: Callable = None, + compute_input_gradient: bool = False, + uses_gradients: bool = True) -> None: + self.model = model.eval() + self.target_layers = target_layers + self.cuda = use_cuda + if self.cuda: + self.model = model.cuda() + self.reshape_transform = reshape_transform + self.compute_input_gradient = compute_input_gradient + self.uses_gradients = uses_gradients + self.activations_and_grads = ActivationsAndGradients( + self.model, target_layers, reshape_transform) + + """ Get a vector of weights for every channel in the target layer. + Methods that return weights channels, + will typically need to only implement this function. """ + + def get_cam_weights(self, + input_tensor: torch.Tensor, + target_layers: List[torch.nn.Module], + targets: List[torch.nn.Module], + activations: torch.Tensor, + grads: torch.Tensor) -> np.ndarray: + raise Exception("Not Implemented") + + def get_cam_image(self, + input_tensor: torch.Tensor, + target_layer: torch.nn.Module, + targets: List[torch.nn.Module], + activations: torch.Tensor, + grads: torch.Tensor, + eigen_smooth: bool = False) -> np.ndarray: + + weights = self.get_cam_weights(input_tensor, + target_layer, + targets, + activations, + grads) + weighted_activations = weights[:, :, None, None] * activations + if eigen_smooth: + cam = get_2d_projection(weighted_activations) + else: + cam = weighted_activations.sum(axis=1) + return cam + + def forward(self, + input_tensor: torch.Tensor, + targets: List[torch.nn.Module], + eigen_smooth: bool = False) -> np.ndarray: + + if self.cuda: + input_tensor = input_tensor.cuda() + + if self.compute_input_gradient: + input_tensor = torch.autograd.Variable(input_tensor, + requires_grad=True) + + outputs = self.activations_and_grads(input_tensor) + if targets is None: + target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1) + targets = [ClassifierOutputTarget( + category) for category in target_categories] + + if self.uses_gradients: + self.model.zero_grad() + loss = sum([target(output) + for target, output in zip(targets, outputs)]) + loss.backward(retain_graph=True) + + # In most of the saliency attribution papers, the saliency is + # computed with a single target layer. + # Commonly it is the last convolutional layer. + # Here we support passing a list with multiple target layers. + # It will compute the saliency image for every image, + # and then aggregate them (with a default mean aggregation). + # This gives you more flexibility in case you just want to + # use all conv layers for example, all Batchnorm layers, + # or something else. + cam_per_layer = self.compute_cam_per_layer(input_tensor, + targets, + eigen_smooth) + return self.aggregate_multi_layers(cam_per_layer) + + def get_target_width_height(self, + input_tensor: torch.Tensor) -> Tuple[int, int]: + width, height = input_tensor.size(-1), input_tensor.size(-2) + return width, height + + def compute_cam_per_layer( + self, + input_tensor: torch.Tensor, + targets: List[torch.nn.Module], + eigen_smooth: bool) -> np.ndarray: + activations_list = [a.cpu().data.numpy() + for a in self.activations_and_grads.activations] + grads_list = [g.cpu().data.numpy() + for g in self.activations_and_grads.gradients] + target_size = self.get_target_width_height(input_tensor) + + cam_per_target_layer = [] + # Loop over the saliency image from every layer + for i in range(len(self.target_layers)): + target_layer = self.target_layers[i] + layer_activations = None + layer_grads = None + if i < len(activations_list): + layer_activations = activations_list[i] + if i < len(grads_list): + layer_grads = grads_list[i] + + cam = self.get_cam_image(input_tensor, + target_layer, + targets, + layer_activations, + layer_grads, + eigen_smooth) + cam = np.maximum(cam, 0) + scaled = scale_cam_image(cam, target_size) + cam_per_target_layer.append(scaled[:, None, :]) + + return cam_per_target_layer + + def aggregate_multi_layers( + self, + cam_per_target_layer: np.ndarray) -> np.ndarray: + cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) + cam_per_target_layer = np.maximum(cam_per_target_layer, 0) + result = np.mean(cam_per_target_layer, axis=1) + return scale_cam_image(result) + + def forward_augmentation_smoothing(self, + input_tensor: torch.Tensor, + targets: List[torch.nn.Module], + eigen_smooth: bool = False) -> np.ndarray: + transforms = tta.Compose( + [ + tta.HorizontalFlip(), + tta.Multiply(factors=[0.9, 1, 1.1]), + ] + ) + cams = [] + for transform in transforms: + augmented_tensor = transform.augment_image(input_tensor) + cam = self.forward(augmented_tensor, + targets, + eigen_smooth) + + # The ttach library expects a tensor of size BxCxHxW + cam = cam[:, None, :, :] + cam = torch.from_numpy(cam) + cam = transform.deaugment_mask(cam) + + # Back to numpy float32, HxW + cam = cam.numpy() + cam = cam[:, 0, :, :] + cams.append(cam) + + cam = np.mean(np.float32(cams), axis=0) + return cam + + def __call__(self, + input_tensor: torch.Tensor, + targets: List[torch.nn.Module] = None, + aug_smooth: bool = False, + eigen_smooth: bool = False) -> np.ndarray: + + # Smooth the CAM result with test time augmentation + if aug_smooth is True: + return self.forward_augmentation_smoothing( + input_tensor, targets, eigen_smooth) + + return self.forward(input_tensor, + targets, eigen_smooth) + + def __del__(self): + self.activations_and_grads.release() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + self.activations_and_grads.release() + if isinstance(exc_value, IndexError): + # Handle IndexError here... + print( + f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}") + return True diff --git a/pytorch_grad_cam/eigen_cam.py b/pytorch_grad_cam/eigen_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6d6bc1884181b3cad7d0ed51614a8be14e37b1 --- /dev/null +++ b/pytorch_grad_cam/eigen_cam.py @@ -0,0 +1,23 @@ +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + +# https://arxiv.org/abs/2008.00299 + + +class EigenCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super(EigenCAM, self).__init__(model, + target_layers, + use_cuda, + reshape_transform, + uses_gradients=False) + + def get_cam_image(self, + input_tensor, + target_layer, + target_category, + activations, + grads, + eigen_smooth): + return get_2d_projection(activations) diff --git a/pytorch_grad_cam/eigen_grad_cam.py b/pytorch_grad_cam/eigen_grad_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..3932a96d27b6019ed0f537688f0beb47d3c57e11 --- /dev/null +++ b/pytorch_grad_cam/eigen_grad_cam.py @@ -0,0 +1,21 @@ +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + +# Like Eigen CAM: https://arxiv.org/abs/2008.00299 +# But multiply the activations x gradients + + +class EigenGradCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super(EigenGradCAM, self).__init__(model, target_layers, use_cuda, + reshape_transform) + + def get_cam_image(self, + input_tensor, + target_layer, + target_category, + activations, + grads, + eigen_smooth): + return get_2d_projection(grads * activations) diff --git a/pytorch_grad_cam/feature_factorization/__init__.py b/pytorch_grad_cam/feature_factorization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-310.pyc b/pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..792efccb8a3d06cd0c66816f84b88f0062984931 Binary files /dev/null and b/pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-310.pyc differ diff --git a/pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-310.pyc b/pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a70780ac01c3c656ff3f00b01e5fa6251687be7f Binary files /dev/null and b/pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-310.pyc differ diff --git a/pytorch_grad_cam/feature_factorization/deep_feature_factorization.py b/pytorch_grad_cam/feature_factorization/deep_feature_factorization.py new file mode 100644 index 0000000000000000000000000000000000000000..b9db2c3e32dda9fa64e5e2c23c129223f98ef3ce --- /dev/null +++ b/pytorch_grad_cam/feature_factorization/deep_feature_factorization.py @@ -0,0 +1,131 @@ +import numpy as np +from PIL import Image +import torch +from typing import Callable, List, Tuple, Optional +from sklearn.decomposition import NMF +from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients +from pytorch_grad_cam.utils.image import scale_cam_image, create_labels_legend, show_factorization_on_image + + +def dff(activations: np.ndarray, n_components: int = 5): + """ Compute Deep Feature Factorization on a 2d Activations tensor. + + :param activations: A numpy array of shape batch x channels x height x width + :param n_components: The number of components for the non negative matrix factorization + :returns: A tuple of the concepts (a numpy array with shape channels x components), + and the explanation heatmaps (a numpy arary with shape batch x height x width) + """ + + batch_size, channels, h, w = activations.shape + reshaped_activations = activations.transpose((1, 0, 2, 3)) + reshaped_activations[np.isnan(reshaped_activations)] = 0 + reshaped_activations = reshaped_activations.reshape( + reshaped_activations.shape[0], -1) + offset = reshaped_activations.min(axis=-1) + reshaped_activations = reshaped_activations - offset[:, None] + + model = NMF(n_components=n_components, init='random', random_state=0) + W = model.fit_transform(reshaped_activations) + H = model.components_ + concepts = W + offset[:, None] + explanations = H.reshape(n_components, batch_size, h, w) + explanations = explanations.transpose((1, 0, 2, 3)) + return concepts, explanations + + +class DeepFeatureFactorization: + """ Deep Feature Factorization: https://arxiv.org/abs/1806.10206 + This gets a model andcomputes the 2D activations for a target layer, + and computes Non Negative Matrix Factorization on the activations. + + Optionally it runs a computation on the concept embeddings, + like running a classifier on them. + + The explanation heatmaps are scalled to the range [0, 1] + and to the input tensor width and height. + """ + + def __init__(self, + model: torch.nn.Module, + target_layer: torch.nn.Module, + reshape_transform: Callable = None, + computation_on_concepts=None + ): + self.model = model + self.computation_on_concepts = computation_on_concepts + self.activations_and_grads = ActivationsAndGradients( + self.model, [target_layer], reshape_transform) + + def __call__(self, + input_tensor: torch.Tensor, + n_components: int = 16): + batch_size, channels, h, w = input_tensor.size() + _ = self.activations_and_grads(input_tensor) + + with torch.no_grad(): + activations = self.activations_and_grads.activations[0].cpu( + ).numpy() + + concepts, explanations = dff(activations, n_components=n_components) + + processed_explanations = [] + + for batch in explanations: + processed_explanations.append(scale_cam_image(batch, (w, h))) + + if self.computation_on_concepts: + with torch.no_grad(): + concept_tensors = torch.from_numpy( + np.float32(concepts).transpose((1, 0))) + concept_outputs = self.computation_on_concepts( + concept_tensors).cpu().numpy() + return concepts, processed_explanations, concept_outputs + else: + return concepts, processed_explanations + + def __del__(self): + self.activations_and_grads.release() + + def __exit__(self, exc_type, exc_value, exc_tb): + self.activations_and_grads.release() + if isinstance(exc_value, IndexError): + # Handle IndexError here... + print( + f"An exception occurred in ActivationSummary with block: {exc_type}. Message: {exc_value}") + return True + + +def run_dff_on_image(model: torch.nn.Module, + target_layer: torch.nn.Module, + classifier: torch.nn.Module, + img_pil: Image, + img_tensor: torch.Tensor, + reshape_transform=Optional[Callable], + n_components: int = 5, + top_k: int = 2) -> np.ndarray: + """ Helper function to create a Deep Feature Factorization visualization for a single image. + TBD: Run this on a batch with several images. + """ + rgb_img_float = np.array(img_pil) / 255 + dff = DeepFeatureFactorization(model=model, + reshape_transform=reshape_transform, + target_layer=target_layer, + computation_on_concepts=classifier) + + concepts, batch_explanations, concept_outputs = dff( + img_tensor[None, :], n_components) + + concept_outputs = torch.softmax( + torch.from_numpy(concept_outputs), + axis=-1).numpy() + concept_label_strings = create_labels_legend(concept_outputs, + labels=model.config.id2label, + top_k=top_k) + visualization = show_factorization_on_image( + rgb_img_float, + batch_explanations[0], + image_weight=0.3, + concept_labels=concept_label_strings) + + result = np.hstack((np.array(img_pil), visualization)) + return result diff --git a/pytorch_grad_cam/fullgrad_cam.py b/pytorch_grad_cam/fullgrad_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..1a2685eff60d63ee758e4b11510ad148311160e9 --- /dev/null +++ b/pytorch_grad_cam/fullgrad_cam.py @@ -0,0 +1,95 @@ +import numpy as np +import torch +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.find_layers import find_layer_predicate_recursive +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection +from pytorch_grad_cam.utils.image import scale_accross_batch_and_channels, scale_cam_image + +# https://arxiv.org/abs/1905.00780 + + +class FullGrad(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + if len(target_layers) > 0: + print( + "Warning: target_layers is ignored in FullGrad. All bias layers will be used instead") + + def layer_with_2D_bias(layer): + bias_target_layers = [torch.nn.Conv2d, torch.nn.BatchNorm2d] + if type(layer) in bias_target_layers and layer.bias is not None: + return True + return False + target_layers = find_layer_predicate_recursive( + model, layer_with_2D_bias) + super( + FullGrad, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform, + compute_input_gradient=True) + self.bias_data = [self.get_bias_data( + layer).cpu().numpy() for layer in target_layers] + + def get_bias_data(self, layer): + # Borrowed from official paper impl: + # https://github.com/idiap/fullgrad-saliency/blob/master/saliency/tensor_extractor.py#L47 + if isinstance(layer, torch.nn.BatchNorm2d): + bias = - (layer.running_mean * layer.weight + / torch.sqrt(layer.running_var + layer.eps)) + layer.bias + return bias.data + else: + return layer.bias.data + + def compute_cam_per_layer( + self, + input_tensor, + target_category, + eigen_smooth): + input_grad = input_tensor.grad.data.cpu().numpy() + grads_list = [g.cpu().data.numpy() for g in + self.activations_and_grads.gradients] + cam_per_target_layer = [] + target_size = self.get_target_width_height(input_tensor) + + gradient_multiplied_input = input_grad * input_tensor.data.cpu().numpy() + gradient_multiplied_input = np.abs(gradient_multiplied_input) + gradient_multiplied_input = scale_accross_batch_and_channels( + gradient_multiplied_input, + target_size) + cam_per_target_layer.append(gradient_multiplied_input) + + # Loop over the saliency image from every layer + assert(len(self.bias_data) == len(grads_list)) + for bias, grads in zip(self.bias_data, grads_list): + bias = bias[None, :, None, None] + # In the paper they take the absolute value, + # but possibily taking only the positive gradients will work + # better. + bias_grad = np.abs(bias * grads) + result = scale_accross_batch_and_channels( + bias_grad, target_size) + result = np.sum(result, axis=1) + cam_per_target_layer.append(result[:, None, :]) + cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) + if eigen_smooth: + # Resize to a smaller image, since this method typically has a very large number of channels, + # and then consumes a lot of memory + cam_per_target_layer = scale_accross_batch_and_channels( + cam_per_target_layer, (target_size[0] // 8, target_size[1] // 8)) + cam_per_target_layer = get_2d_projection(cam_per_target_layer) + cam_per_target_layer = cam_per_target_layer[:, None, :, :] + cam_per_target_layer = scale_accross_batch_and_channels( + cam_per_target_layer, + target_size) + else: + cam_per_target_layer = np.sum( + cam_per_target_layer, axis=1)[:, None, :] + + return cam_per_target_layer + + def aggregate_multi_layers(self, cam_per_target_layer): + result = np.sum(cam_per_target_layer, axis=1) + return scale_cam_image(result) diff --git a/pytorch_grad_cam/grad_cam.py b/pytorch_grad_cam/grad_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..025bf45ddc57ce3105945d7f4a747d001618a428 --- /dev/null +++ b/pytorch_grad_cam/grad_cam.py @@ -0,0 +1,22 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM + + +class GradCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super( + GradCAM, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_weights(self, + input_tensor, + target_layer, + target_category, + activations, + grads): + return np.mean(grads, axis=(2, 3)) diff --git a/pytorch_grad_cam/grad_cam_elementwise.py b/pytorch_grad_cam/grad_cam_elementwise.py new file mode 100644 index 0000000000000000000000000000000000000000..2698d474a08aa0f0aee8f11a92dc887aa6ee3dc8 --- /dev/null +++ b/pytorch_grad_cam/grad_cam_elementwise.py @@ -0,0 +1,30 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + + +class GradCAMElementWise(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super( + GradCAMElementWise, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_image(self, + input_tensor, + target_layer, + target_category, + activations, + grads, + eigen_smooth): + elementwise_activations = np.maximum(grads * activations, 0) + + if eigen_smooth: + cam = get_2d_projection(elementwise_activations) + else: + cam = elementwise_activations.sum(axis=1) + return cam diff --git a/pytorch_grad_cam/grad_cam_plusplus.py b/pytorch_grad_cam/grad_cam_plusplus.py new file mode 100644 index 0000000000000000000000000000000000000000..4466826b7dd8707063885a1742332492213b03dd --- /dev/null +++ b/pytorch_grad_cam/grad_cam_plusplus.py @@ -0,0 +1,32 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM + +# https://arxiv.org/abs/1710.11063 + + +class GradCAMPlusPlus(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super(GradCAMPlusPlus, self).__init__(model, target_layers, use_cuda, + reshape_transform) + + def get_cam_weights(self, + input_tensor, + target_layers, + target_category, + activations, + grads): + grads_power_2 = grads**2 + grads_power_3 = grads_power_2 * grads + # Equation 19 in https://arxiv.org/abs/1710.11063 + sum_activations = np.sum(activations, axis=(2, 3)) + eps = 0.000001 + aij = grads_power_2 / (2 * grads_power_2 + + sum_activations[:, :, None, None] * grads_power_3 + eps) + # Now bring back the ReLU from eq.7 in the paper, + # And zero out aijs where the activations are 0 + aij = np.where(grads != 0, aij, 0) + + weights = np.maximum(grads, 0) * aij + weights = np.sum(weights, axis=(2, 3)) + return weights diff --git a/pytorch_grad_cam/guided_backprop.py b/pytorch_grad_cam/guided_backprop.py new file mode 100644 index 0000000000000000000000000000000000000000..602fbf354397bf8596f700e8dce94dd0b7f49011 --- /dev/null +++ b/pytorch_grad_cam/guided_backprop.py @@ -0,0 +1,100 @@ +import numpy as np +import torch +from torch.autograd import Function +from pytorch_grad_cam.utils.find_layers import replace_all_layer_type_recursive + + +class GuidedBackpropReLU(Function): + @staticmethod + def forward(self, input_img): + positive_mask = (input_img > 0).type_as(input_img) + output = torch.addcmul( + torch.zeros( + input_img.size()).type_as(input_img), + input_img, + positive_mask) + self.save_for_backward(input_img, output) + return output + + @staticmethod + def backward(self, grad_output): + input_img, output = self.saved_tensors + grad_input = None + + positive_mask_1 = (input_img > 0).type_as(grad_output) + positive_mask_2 = (grad_output > 0).type_as(grad_output) + grad_input = torch.addcmul( + torch.zeros( + input_img.size()).type_as(input_img), + torch.addcmul( + torch.zeros( + input_img.size()).type_as(input_img), + grad_output, + positive_mask_1), + positive_mask_2) + return grad_input + + +class GuidedBackpropReLUasModule(torch.nn.Module): + def __init__(self): + super(GuidedBackpropReLUasModule, self).__init__() + + def forward(self, input_img): + return GuidedBackpropReLU.apply(input_img) + + +class GuidedBackpropReLUModel: + def __init__(self, model, use_cuda): + self.model = model + self.model.eval() + self.cuda = use_cuda + if self.cuda: + self.model = self.model.cuda() + + def forward(self, input_img): + return self.model(input_img) + + def recursive_replace_relu_with_guidedrelu(self, module_top): + + for idx, module in module_top._modules.items(): + self.recursive_replace_relu_with_guidedrelu(module) + if module.__class__.__name__ == 'ReLU': + module_top._modules[idx] = GuidedBackpropReLU.apply + print("b") + + def recursive_replace_guidedrelu_with_relu(self, module_top): + try: + for idx, module in module_top._modules.items(): + self.recursive_replace_guidedrelu_with_relu(module) + if module == GuidedBackpropReLU.apply: + module_top._modules[idx] = torch.nn.ReLU() + except BaseException: + pass + + def __call__(self, input_img, target_category=None): + replace_all_layer_type_recursive(self.model, + torch.nn.ReLU, + GuidedBackpropReLUasModule()) + + if self.cuda: + input_img = input_img.cuda() + + input_img = input_img.requires_grad_(True) + + output = self.forward(input_img) + + if target_category is None: + target_category = np.argmax(output.cpu().data.numpy()) + + loss = output[0, target_category] + loss.backward(retain_graph=True) + + output = input_img.grad.cpu().data.numpy() + output = output[0, :, :, :] + output = output.transpose((1, 2, 0)) + + replace_all_layer_type_recursive(self.model, + GuidedBackpropReLUasModule, + torch.nn.ReLU()) + + return output diff --git a/pytorch_grad_cam/hirescam.py b/pytorch_grad_cam/hirescam.py new file mode 100644 index 0000000000000000000000000000000000000000..381d8d45ec8a4658eeb52a6e5dedcca6a4fc976b --- /dev/null +++ b/pytorch_grad_cam/hirescam.py @@ -0,0 +1,32 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + + +class HiResCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super( + HiResCAM, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_image(self, + input_tensor, + target_layer, + target_category, + activations, + grads, + eigen_smooth): + elementwise_activations = grads * activations + + if eigen_smooth: + print( + "Warning: HiResCAM's faithfulness guarantees do not hold if smoothing is applied") + cam = get_2d_projection(elementwise_activations) + else: + cam = elementwise_activations.sum(axis=1) + return cam diff --git a/pytorch_grad_cam/layer_cam.py b/pytorch_grad_cam/layer_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..971443d798658d6c29ff9da54481511ac317a1b0 --- /dev/null +++ b/pytorch_grad_cam/layer_cam.py @@ -0,0 +1,36 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection + +# https://ieeexplore.ieee.org/document/9462463 + + +class LayerCAM(BaseCAM): + def __init__( + self, + model, + target_layers, + use_cuda=False, + reshape_transform=None): + super( + LayerCAM, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_image(self, + input_tensor, + target_layer, + target_category, + activations, + grads, + eigen_smooth): + spatial_weighted_activations = np.maximum(grads, 0) * activations + + if eigen_smooth: + cam = get_2d_projection(spatial_weighted_activations) + else: + cam = spatial_weighted_activations.sum(axis=1) + return cam diff --git a/pytorch_grad_cam/metrics/__init__.py b/pytorch_grad_cam/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pytorch_grad_cam/metrics/__pycache__/__init__.cpython-310.pyc b/pytorch_grad_cam/metrics/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5715cb59dbfb203f7b50db533624c895095ed19 Binary files /dev/null and b/pytorch_grad_cam/metrics/__pycache__/__init__.cpython-310.pyc differ diff --git a/pytorch_grad_cam/metrics/__pycache__/cam_mult_image.cpython-310.pyc b/pytorch_grad_cam/metrics/__pycache__/cam_mult_image.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12d301af2efdae2251849d89a5495d07f8e3e66f Binary files /dev/null and b/pytorch_grad_cam/metrics/__pycache__/cam_mult_image.cpython-310.pyc differ diff --git a/pytorch_grad_cam/metrics/__pycache__/perturbation_confidence.cpython-310.pyc b/pytorch_grad_cam/metrics/__pycache__/perturbation_confidence.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24e02306317c666bda8a815a8dd64ef42e6f4190 Binary files /dev/null and b/pytorch_grad_cam/metrics/__pycache__/perturbation_confidence.cpython-310.pyc differ diff --git a/pytorch_grad_cam/metrics/__pycache__/road.cpython-310.pyc b/pytorch_grad_cam/metrics/__pycache__/road.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f57e6f4af690ead9f90615694bf5cf808f75f343 Binary files /dev/null and b/pytorch_grad_cam/metrics/__pycache__/road.cpython-310.pyc differ diff --git a/pytorch_grad_cam/metrics/cam_mult_image.py b/pytorch_grad_cam/metrics/cam_mult_image.py new file mode 100644 index 0000000000000000000000000000000000000000..bd4bf8a2f733f6379d888c09e0589e54d9beb4c5 --- /dev/null +++ b/pytorch_grad_cam/metrics/cam_mult_image.py @@ -0,0 +1,37 @@ +import torch +import numpy as np +from typing import List, Callable +from pytorch_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric + + +def multiply_tensor_with_cam(input_tensor: torch.Tensor, + cam: torch.Tensor): + """ Multiply an input tensor (after normalization) + with a pixel attribution map + """ + return input_tensor * cam + + +class CamMultImageConfidenceChange(PerturbationConfidenceMetric): + def __init__(self): + super(CamMultImageConfidenceChange, + self).__init__(multiply_tensor_with_cam) + + +class DropInConfidence(CamMultImageConfidenceChange): + def __init__(self): + super(DropInConfidence, self).__init__() + + def __call__(self, *args, **kwargs): + scores = super(DropInConfidence, self).__call__(*args, **kwargs) + scores = -scores + return np.maximum(scores, 0) + + +class IncreaseInConfidence(CamMultImageConfidenceChange): + def __init__(self): + super(IncreaseInConfidence, self).__init__() + + def __call__(self, *args, **kwargs): + scores = super(IncreaseInConfidence, self).__call__(*args, **kwargs) + return np.float32(scores > 0) diff --git a/pytorch_grad_cam/metrics/perturbation_confidence.py b/pytorch_grad_cam/metrics/perturbation_confidence.py new file mode 100644 index 0000000000000000000000000000000000000000..813ffc7c67d3c12b8a67ea89e7f4478c48f652f5 --- /dev/null +++ b/pytorch_grad_cam/metrics/perturbation_confidence.py @@ -0,0 +1,109 @@ +import torch +import numpy as np +from typing import List, Callable + +import numpy as np +import cv2 + + +class PerturbationConfidenceMetric: + def __init__(self, perturbation): + self.perturbation = perturbation + + def __call__(self, input_tensor: torch.Tensor, + cams: np.ndarray, + targets: List[Callable], + model: torch.nn.Module, + return_visualization=False, + return_diff=True): + + if return_diff: + with torch.no_grad(): + outputs = model(input_tensor) + scores = [target(output).cpu().numpy() + for target, output in zip(targets, outputs)] + scores = np.float32(scores) + + batch_size = input_tensor.size(0) + perturbated_tensors = [] + for i in range(batch_size): + cam = cams[i] + tensor = self.perturbation(input_tensor[i, ...].cpu(), + torch.from_numpy(cam)) + tensor = tensor.to(input_tensor.device) + perturbated_tensors.append(tensor.unsqueeze(0)) + perturbated_tensors = torch.cat(perturbated_tensors) + + with torch.no_grad(): + outputs_after_imputation = model(perturbated_tensors) + scores_after_imputation = [ + target(output).cpu().numpy() for target, output in zip( + targets, outputs_after_imputation)] + scores_after_imputation = np.float32(scores_after_imputation) + + if return_diff: + result = scores_after_imputation - scores + else: + result = scores_after_imputation + + if return_visualization: + return result, perturbated_tensors + else: + return result + + +class RemoveMostRelevantFirst: + def __init__(self, percentile, imputer): + self.percentile = percentile + self.imputer = imputer + + def __call__(self, input_tensor, mask): + imputer = self.imputer + if self.percentile != 'auto': + threshold = np.percentile(mask.cpu().numpy(), self.percentile) + binary_mask = np.float32(mask < threshold) + else: + _, binary_mask = cv2.threshold( + np.uint8(mask * 255), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + binary_mask = torch.from_numpy(binary_mask) + binary_mask = binary_mask.to(mask.device) + return imputer(input_tensor, binary_mask) + + +class RemoveLeastRelevantFirst(RemoveMostRelevantFirst): + def __init__(self, percentile, imputer): + super(RemoveLeastRelevantFirst, self).__init__(percentile, imputer) + + def __call__(self, input_tensor, mask): + return super(RemoveLeastRelevantFirst, self).__call__( + input_tensor, 1 - mask) + + +class AveragerAcrossThresholds: + def __init__( + self, + imputer, + percentiles=[ + 10, + 20, + 30, + 40, + 50, + 60, + 70, + 80, + 90]): + self.imputer = imputer + self.percentiles = percentiles + + def __call__(self, + input_tensor: torch.Tensor, + cams: np.ndarray, + targets: List[Callable], + model: torch.nn.Module): + scores = [] + for percentile in self.percentiles: + imputer = self.imputer(percentile) + scores.append(imputer(input_tensor, cams, targets, model)) + return np.mean(np.float32(scores), axis=0) diff --git a/pytorch_grad_cam/metrics/road.py b/pytorch_grad_cam/metrics/road.py new file mode 100644 index 0000000000000000000000000000000000000000..7b09c4ba7c6f745532411278e390c540134ebe34 --- /dev/null +++ b/pytorch_grad_cam/metrics/road.py @@ -0,0 +1,181 @@ +# A Consistent and Efficient Evaluation Strategy for Attribution Methods +# https://arxiv.org/abs/2202.00449 +# Taken from https://raw.githubusercontent.com/tleemann/road_evaluation/main/imputations.py +# MIT License + +# Copyright (c) 2022 Tobias Leemann + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +# Implementations of our imputation models. +import torch +import numpy as np +from scipy.sparse import lil_matrix, csc_matrix +from scipy.sparse.linalg import spsolve +from typing import List, Callable +from pytorch_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric, \ + AveragerAcrossThresholds, \ + RemoveMostRelevantFirst, \ + RemoveLeastRelevantFirst + +# The weights of the surrounding pixels +neighbors_weights = [((1, 1), 1 / 12), + ((0, 1), 1 / 6), + ((-1, 1), 1 / 12), + ((1, -1), 1 / 12), + ((0, -1), 1 / 6), + ((-1, -1), 1 / 12), + ((1, 0), 1 / 6), + ((-1, 0), 1 / 6)] + + +class NoisyLinearImputer: + def __init__(self, + noise: float = 0.01, + weighting: List[float] = neighbors_weights): + """ + Noisy linear imputation. + noise: magnitude of noise to add (absolute, set to 0 for no noise) + weighting: Weights of the neighboring pixels in the computation. + List of tuples of (offset, weight) + """ + self.noise = noise + self.weighting = neighbors_weights + + @staticmethod + def add_offset_to_indices(indices, offset, mask_shape): + """ Add the corresponding offset to the indices. + Return new indices plus a valid bit-vector. """ + cord1 = indices % mask_shape[1] + cord0 = indices // mask_shape[1] + cord0 += offset[0] + cord1 += offset[1] + valid = ((cord0 < 0) | (cord1 < 0) | + (cord0 >= mask_shape[0]) | + (cord1 >= mask_shape[1])) + return ~valid, indices + offset[0] * mask_shape[1] + offset[1] + + @staticmethod + def setup_sparse_system(mask, img, neighbors_weights): + """ Vectorized version to set up the equation system. + mask: (H, W)-tensor of missing pixels. + Image: (H, W, C)-tensor of all values. + Return (N,N)-System matrix, (N,C)-Right hand side for each of the C channels. + """ + maskflt = mask.flatten() + imgflat = img.reshape((img.shape[0], -1)) + # Indices that are imputed in the flattened mask: + indices = np.argwhere(maskflt == 0).flatten() + coords_to_vidx = np.zeros(len(maskflt), dtype=int) + coords_to_vidx[indices] = np.arange(len(indices)) + numEquations = len(indices) + # System matrix: + A = lil_matrix((numEquations, numEquations)) + b = np.zeros((numEquations, img.shape[0])) + # Sum of weights assigned: + sum_neighbors = np.ones(numEquations) + for n in neighbors_weights: + offset, weight = n[0], n[1] + # Take out outliers + valid, new_coords = NoisyLinearImputer.add_offset_to_indices( + indices, offset, mask.shape) + valid_coords = new_coords[valid] + valid_ids = np.argwhere(valid == 1).flatten() + # Add values to the right hand-side + has_values_coords = valid_coords[maskflt[valid_coords] > 0.5] + has_values_ids = valid_ids[maskflt[valid_coords] > 0.5] + b[has_values_ids, :] -= weight * imgflat[:, has_values_coords].T + # Add weights to the system (left hand side) +# Find coordinates in the system. + has_no_values = valid_coords[maskflt[valid_coords] < 0.5] + variable_ids = coords_to_vidx[has_no_values] + has_no_values_ids = valid_ids[maskflt[valid_coords] < 0.5] + A[has_no_values_ids, variable_ids] = weight + # Reduce weight for invalid + sum_neighbors[np.argwhere(valid == 0).flatten()] = \ + sum_neighbors[np.argwhere(valid == 0).flatten()] - weight + + A[np.arange(numEquations), np.arange(numEquations)] = -sum_neighbors + return A, b + + def __call__(self, img: torch.Tensor, mask: torch.Tensor): + """ Our linear inputation scheme. """ + """ + This is the function to do the linear infilling + img: original image (C,H,W)-tensor; + mask: mask; (H,W)-tensor + + """ + imgflt = img.reshape(img.shape[0], -1) + maskflt = mask.reshape(-1) + # Indices that need to be imputed. + indices_linear = np.argwhere(maskflt == 0).flatten() + # Set up sparse equation system, solve system. + A, b = NoisyLinearImputer.setup_sparse_system( + mask.numpy(), img.numpy(), neighbors_weights) + res = torch.tensor(spsolve(csc_matrix(A), b), dtype=torch.float) + + # Fill the values with the solution of the system. + img_infill = imgflt.clone() + img_infill[:, indices_linear] = res.t() + self.noise * \ + torch.randn_like(res.t()) + + return img_infill.reshape_as(img) + + +class ROADMostRelevantFirst(PerturbationConfidenceMetric): + def __init__(self, percentile=80): + super(ROADMostRelevantFirst, self).__init__( + RemoveMostRelevantFirst(percentile, NoisyLinearImputer())) + + +class ROADLeastRelevantFirst(PerturbationConfidenceMetric): + def __init__(self, percentile=20): + super(ROADLeastRelevantFirst, self).__init__( + RemoveLeastRelevantFirst(percentile, NoisyLinearImputer())) + + +class ROADMostRelevantFirstAverage(AveragerAcrossThresholds): + def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]): + super(ROADMostRelevantFirstAverage, self).__init__( + ROADMostRelevantFirst, percentiles) + + +class ROADLeastRelevantFirstAverage(AveragerAcrossThresholds): + def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]): + super(ROADLeastRelevantFirstAverage, self).__init__( + ROADLeastRelevantFirst, percentiles) + + +class ROADCombined: + def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]): + self.percentiles = percentiles + self.morf_averager = ROADMostRelevantFirstAverage(percentiles) + self.lerf_averager = ROADLeastRelevantFirstAverage(percentiles) + + def __call__(self, + input_tensor: torch.Tensor, + cams: np.ndarray, + targets: List[Callable], + model: torch.nn.Module): + + scores_lerf = self.lerf_averager(input_tensor, cams, targets, model) + scores_morf = self.morf_averager(input_tensor, cams, targets, model) + return (scores_lerf - scores_morf) / 2 diff --git a/pytorch_grad_cam/random_cam.py b/pytorch_grad_cam/random_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..5bb6eccd79960a3dbe77f76ed4aa73bb0a4b74cd --- /dev/null +++ b/pytorch_grad_cam/random_cam.py @@ -0,0 +1,22 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM + + +class RandomCAM(BaseCAM): + def __init__(self, model, target_layers, use_cuda=False, + reshape_transform=None): + super( + RandomCAM, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_weights(self, + input_tensor, + target_layer, + target_category, + activations, + grads): + return np.random.uniform(-1, 1, size=(grads.shape[0], grads.shape[1])) diff --git a/pytorch_grad_cam/score_cam.py b/pytorch_grad_cam/score_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..38460c55164ad69026d331520fa51f60f4d050c6 --- /dev/null +++ b/pytorch_grad_cam/score_cam.py @@ -0,0 +1,60 @@ +import torch +import tqdm +from pytorch_grad_cam.base_cam import BaseCAM + + +class ScoreCAM(BaseCAM): + def __init__( + self, + model, + target_layers, + use_cuda=False, + reshape_transform=None): + super(ScoreCAM, self).__init__(model, + target_layers, + use_cuda, + reshape_transform=reshape_transform, + uses_gradients=False) + + def get_cam_weights(self, + input_tensor, + target_layer, + targets, + activations, + grads): + with torch.no_grad(): + upsample = torch.nn.UpsamplingBilinear2d( + size=input_tensor.shape[-2:]) + activation_tensor = torch.from_numpy(activations) + if self.cuda: + activation_tensor = activation_tensor.cuda() + + upsampled = upsample(activation_tensor) + + maxs = upsampled.view(upsampled.size(0), + upsampled.size(1), -1).max(dim=-1)[0] + mins = upsampled.view(upsampled.size(0), + upsampled.size(1), -1).min(dim=-1)[0] + + maxs, mins = maxs[:, :, None, None], mins[:, :, None, None] + upsampled = (upsampled - mins) / (maxs - mins) + + input_tensors = input_tensor[:, None, + :, :] * upsampled[:, :, None, :, :] + + if hasattr(self, "batch_size"): + BATCH_SIZE = self.batch_size + else: + BATCH_SIZE = 16 + + scores = [] + for target, tensor in zip(targets, input_tensors): + for i in tqdm.tqdm(range(0, tensor.size(0), BATCH_SIZE)): + batch = tensor[i: i + BATCH_SIZE, :] + outputs = [target(o).cpu().item() + for o in self.model(batch)] + scores.extend(outputs) + scores = torch.Tensor(scores) + scores = scores.view(activations.shape[0], activations.shape[1]) + weights = torch.nn.Softmax(dim=-1)(scores).numpy() + return weights diff --git a/pytorch_grad_cam/sobel_cam.py b/pytorch_grad_cam/sobel_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..84168a789ac80f5215238214969b63363237bed2 --- /dev/null +++ b/pytorch_grad_cam/sobel_cam.py @@ -0,0 +1,11 @@ +import cv2 + + +def sobel_cam(img): + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) + grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3) + abs_grad_x = cv2.convertScaleAbs(grad_x) + abs_grad_y = cv2.convertScaleAbs(grad_y) + grad = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0) + return grad diff --git a/pytorch_grad_cam/utils/__init__.py b/pytorch_grad_cam/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..269a52681645da3cc2c032877466b1ee6284efb2 --- /dev/null +++ b/pytorch_grad_cam/utils/__init__.py @@ -0,0 +1,4 @@ +from pytorch_grad_cam.utils.image import deprocess_image +from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection +from pytorch_grad_cam.utils import model_targets +from pytorch_grad_cam.utils import reshape_transforms diff --git a/pytorch_grad_cam/utils/__pycache__/__init__.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..710b65fb7e9705831e63e71a332961983d531c79 Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/__pycache__/find_layers.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/find_layers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bb5ea51e3d3e2777bd50f8db433f34cc39276aa Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/find_layers.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/__pycache__/image.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/image.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80279a7bd5a36a76b8cba59c7b0561ea0eab1b6f Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/image.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/__pycache__/model_targets.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/model_targets.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..203d0d52b33aedf1e719bc62fef14facbf29a4db Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/model_targets.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/__pycache__/reshape_transforms.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/reshape_transforms.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70e5d9f64ff0f91c7b5a243eae0972e1425208a1 Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/reshape_transforms.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/__pycache__/svd_on_activations.cpython-310.pyc b/pytorch_grad_cam/utils/__pycache__/svd_on_activations.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6af3b221ae03e4fec9959c1878f736aad517161 Binary files /dev/null and b/pytorch_grad_cam/utils/__pycache__/svd_on_activations.cpython-310.pyc differ diff --git a/pytorch_grad_cam/utils/find_layers.py b/pytorch_grad_cam/utils/find_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..4b9e44590664fdc30e996f79bd1a3497db40e822 --- /dev/null +++ b/pytorch_grad_cam/utils/find_layers.py @@ -0,0 +1,30 @@ +def replace_layer_recursive(model, old_layer, new_layer): + for name, layer in model._modules.items(): + if layer == old_layer: + model._modules[name] = new_layer + return True + elif replace_layer_recursive(layer, old_layer, new_layer): + return True + return False + + +def replace_all_layer_type_recursive(model, old_layer_type, new_layer): + for name, layer in model._modules.items(): + if isinstance(layer, old_layer_type): + model._modules[name] = new_layer + replace_all_layer_type_recursive(layer, old_layer_type, new_layer) + + +def find_layer_types_recursive(model, layer_types): + def predicate(layer): + return type(layer) in layer_types + return find_layer_predicate_recursive(model, predicate) + + +def find_layer_predicate_recursive(model, predicate): + result = [] + for name, layer in model._modules.items(): + if predicate(layer): + result.append(layer) + result.extend(find_layer_predicate_recursive(layer, predicate)) + return result diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py new file mode 100644 index 0000000000000000000000000000000000000000..34d92ba6f6cd82459059806ff0b311afb412cf9d --- /dev/null +++ b/pytorch_grad_cam/utils/image.py @@ -0,0 +1,183 @@ +import matplotlib +from matplotlib import pyplot as plt +from matplotlib.lines import Line2D +import cv2 +import numpy as np +import torch +from torchvision.transforms import Compose, Normalize, ToTensor +from typing import List, Dict +import math + + +def preprocess_image( + img: np.ndarray, mean=[ + 0.5, 0.5, 0.5], std=[ + 0.5, 0.5, 0.5]) -> torch.Tensor: + preprocessing = Compose([ + ToTensor(), + Normalize(mean=mean, std=std) + ]) + return preprocessing(img.copy()).unsqueeze(0) + + +def deprocess_image(img): + """ see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """ + img = img - np.mean(img) + img = img / (np.std(img) + 1e-5) + img = img * 0.1 + img = img + 0.5 + img = np.clip(img, 0, 1) + return np.uint8(img * 255) + + +def show_cam_on_image(img: np.ndarray, + mask: np.ndarray, + use_rgb: bool = False, + colormap: int = cv2.COLORMAP_JET, + image_weight: float = 0.5) -> np.ndarray: + """ This function overlays the cam mask on the image as an heatmap. + By default the heatmap is in BGR format. + + :param img: The base image in RGB or BGR format. + :param mask: The cam mask. + :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format. + :param colormap: The OpenCV colormap to be used. + :param image_weight: The final result is image_weight * img + (1-image_weight) * mask. + :returns: The default image with the cam overlay. + """ + heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) + if use_rgb: + heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) + heatmap = np.float32(heatmap) / 255 + + if np.max(img) > 1: + raise Exception( + "The input image should np.float32 in the range [0, 1]") + + if image_weight < 0 or image_weight > 1: + raise Exception( + f"image_weight should be in the range [0, 1].\ + Got: {image_weight}") + + cam = (1 - image_weight) * heatmap + image_weight * img + cam = cam / np.max(cam) + return np.uint8(255 * cam) + + +def create_labels_legend(concept_scores: np.ndarray, + labels: Dict[int, str], + top_k=2): + concept_categories = np.argsort(concept_scores, axis=1)[:, ::-1][:, :top_k] + concept_labels_topk = [] + for concept_index in range(concept_categories.shape[0]): + categories = concept_categories[concept_index, :] + concept_labels = [] + for category in categories: + score = concept_scores[concept_index, category] + label = f"{','.join(labels[category].split(',')[:3])}:{score:.2f}" + concept_labels.append(label) + concept_labels_topk.append("\n".join(concept_labels)) + return concept_labels_topk + + +def show_factorization_on_image(img: np.ndarray, + explanations: np.ndarray, + colors: List[np.ndarray] = None, + image_weight: float = 0.5, + concept_labels: List = None) -> np.ndarray: + """ Color code the different component heatmaps on top of the image. + Every component color code will be magnified according to the heatmap itensity + (by modifying the V channel in the HSV color space), + and optionally create a lagend that shows the labels. + + Since different factorization component heatmaps can overlap in principle, + we need a strategy to decide how to deal with the overlaps. + This keeps the component that has a higher value in it's heatmap. + + :param img: The base image RGB format. + :param explanations: A tensor of shape num_componetns x height x width, with the component visualizations. + :param colors: List of R, G, B colors to be used for the components. + If None, will use the gist_rainbow cmap as a default. + :param image_weight: The final result is image_weight * img + (1-image_weight) * visualization. + :concept_labels: A list of strings for every component. If this is paseed, a legend that shows + the labels and their colors will be added to the image. + :returns: The visualized image. + """ + n_components = explanations.shape[0] + if colors is None: + # taken from https://github.com/edocollins/DFF/blob/master/utils.py + _cmap = plt.cm.get_cmap('gist_rainbow') + colors = [ + np.array( + _cmap(i)) for i in np.arange( + 0, + 1, + 1.0 / + n_components)] + concept_per_pixel = explanations.argmax(axis=0) + masks = [] + for i in range(n_components): + mask = np.zeros(shape=(img.shape[0], img.shape[1], 3)) + mask[:, :, :] = colors[i][:3] + explanation = explanations[i] + explanation[concept_per_pixel != i] = 0 + mask = np.uint8(mask * 255) + mask = cv2.cvtColor(mask, cv2.COLOR_RGB2HSV) + mask[:, :, 2] = np.uint8(255 * explanation) + mask = cv2.cvtColor(mask, cv2.COLOR_HSV2RGB) + mask = np.float32(mask) / 255 + masks.append(mask) + + mask = np.sum(np.float32(masks), axis=0) + result = img * image_weight + mask * (1 - image_weight) + result = np.uint8(result * 255) + + if concept_labels is not None: + px = 1 / plt.rcParams['figure.dpi'] # pixel in inches + fig = plt.figure(figsize=(result.shape[1] * px, result.shape[0] * px)) + plt.rcParams['legend.fontsize'] = int( + 14 * result.shape[0] / 256 / max(1, n_components / 6)) + lw = 5 * result.shape[0] / 256 + lines = [Line2D([0], [0], color=colors[i], lw=lw) + for i in range(n_components)] + plt.legend(lines, + concept_labels, + mode="expand", + fancybox=True, + shadow=True) + + plt.tight_layout(pad=0, w_pad=0, h_pad=0) + plt.axis('off') + fig.canvas.draw() + data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) + plt.close(fig=fig) + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + data = cv2.resize(data, (result.shape[1], result.shape[0])) + result = np.hstack((result, data)) + return result + + +def scale_cam_image(cam, target_size=None): + result = [] + for img in cam: + img = img - np.min(img) + img = img / (1e-7 + np.max(img)) + if target_size is not None: + img = cv2.resize(img, target_size) + result.append(img) + result = np.float32(result) + + return result + + +def scale_accross_batch_and_channels(tensor, target_size): + batch_size, channel_size = tensor.shape[:2] + reshaped_tensor = tensor.reshape( + batch_size * channel_size, *tensor.shape[2:]) + result = scale_cam_image(reshaped_tensor, target_size) + result = result.reshape( + batch_size, + channel_size, + target_size[1], + target_size[0]) + return result diff --git a/pytorch_grad_cam/utils/model_targets.py b/pytorch_grad_cam/utils/model_targets.py new file mode 100644 index 0000000000000000000000000000000000000000..489dd198731f76a5631d4e480d68b93561cf2820 --- /dev/null +++ b/pytorch_grad_cam/utils/model_targets.py @@ -0,0 +1,103 @@ +import numpy as np +import torch +import torchvision + + +class ClassifierOutputTarget: + def __init__(self, category): + self.category = category + + def __call__(self, model_output): + if len(model_output.shape) == 1: + return model_output[self.category] + return model_output[:, self.category] + + +class ClassifierOutputSoftmaxTarget: + def __init__(self, category): + self.category = category + + def __call__(self, model_output): + if len(model_output.shape) == 1: + return torch.softmax(model_output, dim=-1)[self.category] + return torch.softmax(model_output, dim=-1)[:, self.category] + + +class BinaryClassifierOutputTarget: + def __init__(self, category): + self.category = category + + def __call__(self, model_output): + if self.category == 1: + sign = 1 + else: + sign = -1 + return model_output * sign + + +class SoftmaxOutputTarget: + def __init__(self): + pass + + def __call__(self, model_output): + return torch.softmax(model_output, dim=-1) + + +class RawScoresOutputTarget: + def __init__(self): + pass + + def __call__(self, model_output): + return model_output + + +class SemanticSegmentationTarget: + """ Gets a binary spatial mask and a category, + And return the sum of the category scores, + of the pixels in the mask. """ + + def __init__(self, category, mask): + self.category = category + self.mask = torch.from_numpy(mask) + if torch.cuda.is_available(): + self.mask = self.mask.cuda() + + def __call__(self, model_output): + return (model_output[self.category, :, :] * self.mask).sum() + + +class FasterRCNNBoxScoreTarget: + """ For every original detected bounding box specified in "bounding boxes", + assign a score on how the current bounding boxes match it, + 1. In IOU + 2. In the classification score. + If there is not a large enough overlap, or the category changed, + assign a score of 0. + + The total score is the sum of all the box scores. + """ + + def __init__(self, labels, bounding_boxes, iou_threshold=0.5): + self.labels = labels + self.bounding_boxes = bounding_boxes + self.iou_threshold = iou_threshold + + def __call__(self, model_outputs): + output = torch.Tensor([0]) + if torch.cuda.is_available(): + output = output.cuda() + + if len(model_outputs["boxes"]) == 0: + return output + + for box, label in zip(self.bounding_boxes, self.labels): + box = torch.Tensor(box[None, :]) + if torch.cuda.is_available(): + box = box.cuda() + + ious = torchvision.ops.box_iou(box, model_outputs["boxes"]) + index = ious.argmax() + if ious[0, index] > self.iou_threshold and model_outputs["labels"][index] == label: + score = ious[0, index] + model_outputs["scores"][index] + output = output + score + return output diff --git a/pytorch_grad_cam/utils/reshape_transforms.py b/pytorch_grad_cam/utils/reshape_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..509f092d123064fcc75e0208cc8453a75f0ef205 --- /dev/null +++ b/pytorch_grad_cam/utils/reshape_transforms.py @@ -0,0 +1,34 @@ +import torch + + +def fasterrcnn_reshape_transform(x): + target_size = x['pool'].size()[-2:] + activations = [] + for key, value in x.items(): + activations.append( + torch.nn.functional.interpolate( + torch.abs(value), + target_size, + mode='bilinear')) + activations = torch.cat(activations, axis=1) + return activations + + +def swinT_reshape_transform(tensor, height=7, width=7): + result = tensor.reshape(tensor.size(0), + height, width, tensor.size(2)) + + # Bring the channels to the first dimension, + # like in CNNs. + result = result.transpose(2, 3).transpose(1, 2) + return result + + +def vit_reshape_transform(tensor, height=14, width=14): + result = tensor[:, 1:, :].reshape(tensor.size(0), + height, width, tensor.size(2)) + + # Bring the channels to the first dimension, + # like in CNNs. + result = result.transpose(2, 3).transpose(1, 2) + return result diff --git a/pytorch_grad_cam/utils/svd_on_activations.py b/pytorch_grad_cam/utils/svd_on_activations.py new file mode 100644 index 0000000000000000000000000000000000000000..a406aeea85617922e67270a70388256ac214e8e2 --- /dev/null +++ b/pytorch_grad_cam/utils/svd_on_activations.py @@ -0,0 +1,19 @@ +import numpy as np + + +def get_2d_projection(activation_batch): + # TBD: use pytorch batch svd implementation + activation_batch[np.isnan(activation_batch)] = 0 + projections = [] + for activations in activation_batch: + reshaped_activations = (activations).reshape( + activations.shape[0], -1).transpose() + # Centering before the SVD seems to be important here, + # Otherwise the image returned is negative + reshaped_activations = reshaped_activations - \ + reshaped_activations.mean(axis=0) + U, S, VT = np.linalg.svd(reshaped_activations, full_matrices=True) + projection = reshaped_activations @ VT[0, :] + projection = projection.reshape(activations.shape[1:]) + projections.append(projection) + return np.float32(projections) diff --git a/pytorch_grad_cam/xgrad_cam.py b/pytorch_grad_cam/xgrad_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..81a920fe8b81bfb7bce9f317edfcc465c9bffd60 --- /dev/null +++ b/pytorch_grad_cam/xgrad_cam.py @@ -0,0 +1,31 @@ +import numpy as np +from pytorch_grad_cam.base_cam import BaseCAM + + +class XGradCAM(BaseCAM): + def __init__( + self, + model, + target_layers, + use_cuda=False, + reshape_transform=None): + super( + XGradCAM, + self).__init__( + model, + target_layers, + use_cuda, + reshape_transform) + + def get_cam_weights(self, + input_tensor, + target_layer, + target_category, + activations, + grads): + sum_activations = np.sum(activations, axis=(2, 3)) + eps = 1e-7 + weights = grads * activations / \ + (sum_activations[:, :, None, None] + eps) + weights = weights.sum(axis=(2, 3)) + return weights diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ebe5c0d9a24215218e196f69b12d71138bcb227c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +flask +flask_cors +opencv-python +tqdm +numpy +gunicorn +matplotlib +pandas +ttach +scikit-learn \ No newline at end of file diff --git a/tempCodeRunnerFile.py b/tempCodeRunnerFile.py new file mode 100644 index 0000000000000000000000000000000000000000..a8e589857c0221bf16f30e5bf71d21c47c438d7a --- /dev/null +++ b/tempCodeRunnerFile.py @@ -0,0 +1,2 @@ +rms.RandomResizedCrop(224), + transf \ No newline at end of file