import torch import os import gradio as gr from model import create_vit from timeit import default_timer as timer from typing import Tuple, Dict class_names = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'] vit, vit_transform = create_vit(output_classes=len(class_names)) vit.load_state_dict(torch.load(f="vit_b_16_dout0.3_10epochs.pth", map_location=torch.device("cpu"))) def predict(img) -> Tuple[Dict, float]: start_time = timer() img = vit_transform(img).unsqueeze(0) vit.eval() with torch.inference_mode(): pred_probs = torch.softmax(vit(img), dim=1) pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} pred_time = round(timer() - start_time, 5) return pred_labels_and_probs, pred_time title = "Garbage Sense" description = "A vision transformer trained to classify garbage into 6 categories on [trashnet](https://github.com/garythung/trashnet)." article = "" example_list = [["examples/" + example] for example in os.listdir("examples")] demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[ gr.Label(num_top_classes=6, label="Predictions"), gr.Number(label="Prediction time (s)"), ], examples=example_list, title=title, description=description ) demo.launch()