Emerging Properties in Self-Supervised Vision Transformers
Paper • 2104.14294 • Published • 4
JaxNN conversion of the timm vit_small_patch16_224.dino Vision Transformer checkpoint.
A Vision Transformer (ViT) image feature model. Trained with Self-Supervised DINO method.
from urllib.request import urlopen
import jax
from PIL import Image
import jaxnn
img = Image.open(urlopen(
"https://huggingface.co/datasets/huggingface/cats-image/resolve/main/cats_image.jpeg"
))
model = jaxnn.create_model("vit_small_patch16_224.dino", pretrained=True)
model.eval()
data_config = jaxnn.data.resolve_model_data_config(model)
transforms = jaxnn.data.create_transform(**data_config, is_training=False)
x = jax.numpy.expand_dims(transforms(img), 0)
output = model(x, deterministic=True)
top5_probabilities, top5_class_indices = jax.lax.top_k(
jax.nn.softmax(output, axis=-1) * 100,
k=5,
)
from urllib.request import urlopen
import jax
from PIL import Image
import jaxnn
img = Image.open(urlopen(
"https://huggingface.co/datasets/huggingface/cats-image/resolve/main/cats_image.jpeg"
))
model = jaxnn.create_model(
"vit_small_patch16_224.dino",
pretrained=True,
num_classes=0,
)
model.eval()
data_config = jaxnn.data.resolve_model_data_config(model)
transforms = jaxnn.data.create_transform(**data_config, is_training=False)
x = jax.numpy.expand_dims(transforms(img), 0)
output = model(x, deterministic=True)
@inproceedings{caron2021emerging,
title={Emerging properties in self-supervised vision transformers},
author={Caron, Mathilde and Touvron, Hugo and Misra, Ishan and J{'e}gou, Herv{'e} and Mairal, Julien and Bojanowski, Piotr and Joulin, Armand},
booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
pages={9650--9660},
year={2021}
}
@article{dosovitskiy2020vit,
title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil},
journal={ICLR},
year={2021}
}
@misc{rw2019timm,
author = {Ross Wightman},
title = {PyTorch Image Models},
year = {2019},
publisher = {GitHub},
journal = {GitHub repository},
doi = {10.5281/zenodo.4414861},
howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
}