import json import gradio as gr import torch from transformers import EfficientFormerImageProcessor, EfficientFormerForImageClassificationWithTeacher # Load preprocessor and pretrained model model_name = "snap-research/efficientformer-l7-300" processor = EfficientFormerImageProcessor.from_pretrained(model_name) model = EfficientFormerForImageClassificationWithTeacher.from_pretrained(model_name) # Load ImageNet idx to label mapping with open("assets/imagenet_1000_idx2labels.json") as f: idx_to_label = json.load(f) def classify_image(img, top_k): # Preprocess input image inputs = processor(images=img, return_tensors="pt") # Inference with torch.no_grad(): outputs = model(**inputs) # Print the top ImageNet1k class prediction logits = outputs.logits scores = torch.nn.functional.softmax(logits, dim=1) top_k_labels = scores.argsort(descending=True)[0][:top_k].cpu().detach().numpy() top_k_labels = list(top_k_labels) return {idx_to_label[str(idx)] : round(float(scores[0, idx]), 4) for idx in top_k_labels} description = """ Gradio demo for EfficientFormer, introduced in EfficientFormer: Vision Transformers at MobileNet Speed. \n\nEfficientFormer is a mobile-friendly image classification model that achieves MobileNet inference speed with impressive performance gains. To use it, simply upload an image and print the top predictions. """ demo = gr.Interface( classify_image, inputs=[gr.Image(), gr.Slider(0, 1000, value=5)], outputs=gr.outputs.Label(), description=description, title="Image Classification with EfficientFormer-L1", examples=[ ["assets/halloween-gaf8ad7ebc_1920.jpeg", 5], ["assets/IMG_4484.jpeg", 5], ["assets/IMG_4737.jpeg", 5], ["assets/IMG_4740.jpeg", 5], ], ) demo.launch()