Spaces:

probing-vits
/

class-saliency

Running

App Files Files Community

class-saliency / utils.py

sayakpaul HF staff

Upload utils.py

3e32c41 about 2 years ago

raw history blame contribute delete

No virus

2.06 kB

	from typing import Dict

	import numpy as np
	import tensorflow as tf
	from PIL import Image
	from tensorflow import keras

	RESOLUTION = 224
	PATCH_SIZE = 16


	crop_layer = keras.layers.CenterCrop(RESOLUTION, RESOLUTION)
	norm_layer = keras.layers.Normalization(
	mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
	variance=[(0.229 * 255) ** 2, (0.224 * 255) ** 2, (0.225 * 255) ** 2],
	)


	def preprocess_image(orig_image: Image, size: int):
	"""Image preprocessing utility."""
	image = np.array(orig_image)
	image_resized = tf.expand_dims(image, 0)
	resize_size = int((256 / 224) * size)
	image_resized = tf.image.resize(
	image_resized, (resize_size, resize_size), method="bicubic"
	)
	image_resized = crop_layer(image_resized)
	return image_resized.numpy().squeeze(), norm_layer(image_resized).numpy()


	# Reference:
	# https://github.com/facebookresearch/dino/blob/main/visualize_attention.py


	def get_cls_attention_map(
	preprocessed_image: np.ndarray,
	attn_score_dict: Dict[str, np.ndarray],
	block_key="ca_ffn_block_0_att",
	):
	"""Utility to generate class saliency map modeling spatial-class relationships."""
	w_featmap = preprocessed_image.shape[2] // PATCH_SIZE
	h_featmap = preprocessed_image.shape[1] // PATCH_SIZE

	attention_scores = attn_score_dict[block_key]
	nh = attention_scores.shape[1] # Number of attention heads.

	# Taking the representations from CLS token.
	attentions = attention_scores[0, :, 0, 1:].reshape(nh, -1)

	# Reshape the attention scores to resemble mini patches.
	attentions = attentions.reshape(nh, w_featmap, h_featmap)

	attentions = np.mean(attentions, axis=0)
	attentions = (attentions - attentions.min()) / (
	attentions.max() - attentions.min()
	)
	attentions = np.expand_dims(attentions, -1)

	# Resize the attention patches to 224x224 (224: 14x16)
	attentions = tf.image.resize(
	attentions,
	size=(h_featmap * PATCH_SIZE, w_featmap * PATCH_SIZE),
	method="bicubic",
	)

	return attentions.numpy()