import gradio as gr import os import torch from model import create_effnetb2_model from timeit import default_timer as timer from typing import Tuple, Dict # Define the class names class_names = ["pizza", "steak", "sushi"] # Create the pretrained EffNetB2 model effnetb2, effnetb2_transforms = create_effnetb2_model( num_classes=3, ) # Load to the CPU the EffNetB2 model's saved weights effnetb2.load_state_dict( torch.load( f="pretrained_effnetb2_feature_extractor_20_percent.pth", map_location=torch.device("cpu"), ) ) def predict(img) -> Tuple[Dict, float]: """This function transforms and performs a prediction on an image, and returns the prediction and time taken. Returns: the prediction dictionary and prediction time. """ # Begin the prediction's timer start_time = timer() # Transform the target image with the pretrained EffNetB2 model's transforms, and add a batch dimension img = effnetb2_transforms(img).unsqueeze(0) # Set the model to evaluation model effnetb2.eval() # Activate the inference mode with torch.inference_mode(): # Pass the transformed image through the model, and transform the prediction logits (the model's outputs) into prediction probabilities pred_probs = torch.softmax(effnetb2(img), dim=1) # Create a prediction label and prediction probability dictionary for each prediction class pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} # Calculating the prediction's time pred_time = round(timer() - start_time, 5) return pred_labels_and_probs, pred_time # Define the title title = "DishVision - Multi Class Food Image Classifier" # Define the description description = "An EfficientNetB2-based transfer learning model for feature extraction in computer vision, designed to classify images into three distinct food categories." # Define the article article = "Computer Vision Project" # Create the "examples" list from "examples/" directory example_list = [["examples/" + example] for example in os.listdir("examples")] # Create the Gradio deom interface demo = gr.Interface(fn=predict, inputs=gr.Image(type="pil"), outputs=[gr.Label(num_top_classes=3, label="Predictions"), gr.Number(label="Prediction time (s)")], examples=example_list, title=title, description=description, article=article) demo.launch(debug=False, share=True)