Spaces:

nielsr
/

DINO

Runtime error

App Files Files Community

DINO / app.py

nielsr HF staff

First commit

d0b4edc almost 3 years ago

raw history blame

No virus

2.69 kB

	import os
	os.system('pip install gradio --upgrade')
	os.system('pip install git+https://github.com/NielsRogge/transformers.git@add_dino --upgrade')

	import gradio as gr
	from transformers import ViTFeatureExtractor, ViTModel
	import torch
	import torch.nn as nn
	import torchvision
	import matplotlib.pyplot as plt

	def get_attention_maps(pixel_values, attentions, nh):
	threshold = 0.6
	w_featmap = pixel_values.shape[-2] // model.config.patch_size
	h_featmap = pixel_values.shape[-1] // model.config.patch_size

	# we keep only a certain percentage of the mass
	val, idx = torch.sort(attentions)
	val /= torch.sum(val, dim=1, keepdim=True)
	cumval = torch.cumsum(val, dim=1)
	th_attn = cumval > (1 - threshold)
	idx2 = torch.argsort(idx)
	for head in range(nh):
	th_attn[head] = th_attn[head][idx2[head]]
	th_attn = th_attn.reshape(nh, w_featmap, h_featmap).float()
	# interpolate
	th_attn = nn.functional.interpolate(th_attn.unsqueeze(0), scale_factor=model.config.patch_size, mode="nearest")[0].cpu().numpy()

	attentions = attentions.reshape(nh, w_featmap, h_featmap)
	attentions = nn.functional.interpolate(attentions.unsqueeze(0), scale_factor=model.config.patch_size, mode="nearest")[0].cpu()
	attentions = attentions.detach().numpy()

	# save attentions heatmaps and return list of filenames
	output_dir = '.'
	os.makedirs(output_dir, exist_ok=True)
	attention_maps = []
	print("Number of heads:", nh)
	for j in range(nh):
	fname = os.path.join(output_dir, "attn-head" + str(j) + ".png")
	# save the attention map
	plt.imsave(fname=fname, arr=attentions[j], format='png')
	# append file name
	attention_maps.append(fname)

	return attention_maps

	feature_extractor = ViTFeatureExtractor.from_pretrained("facebook/dino-vits8", do_resize=False)
	model = ViTModel.from_pretrained("facebook/dino-vits8", add_pooling_layer=False)

	def visualize_attention(image):
	# normalize channels
	pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values

	# forward pass
	outputs = model(pixel_values, output_attentions=True, interpolate_pos_encoding=True)

	# get attentions of last layer
	attentions = outputs.attentions[-1]
	nh = attentions.shape[1] # number of heads

	# we keep only the output patch attention
	attentions = attentions[0, :, 0, 1:].reshape(nh, -1)

	attention_maps = get_attention_maps(pixel_values, attentions, nh)

	return attention_maps

	iface = gr.Interface(fn=visualize_attention,
	inputs=gr.inputs.Image(shape=(480, 480), type="pil"),
	outputs=[gr.outputs.Image(type='file', label=f'attention_head_{i}') for i in range(6)])
	iface.launch()