Spaces:

mrdbourke
/

foodvision_mini

Running

App Files Files Community

mrdbourke commited on Aug 19, 2022

Commit

ff066c3

•

1 Parent(s): 8e2b2cf

update files

Browse files

Files changed (4) hide show

09_pretrained_effnetb2_feature_extractor_pizza_steak_sushi_20_percent.pth +1 -1
app.py +48 -29
model.py +20 -4
requirements.txt +1 -1

09_pretrained_effnetb2_feature_extractor_pizza_steak_sushi_20_percent.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24a803b0e458a9949a7725d651f780c5c77592042d159c7dcd3e658e95e5b96d
 size 31273033

 version https://git-lfs.github.com/spec/v1
+oid sha256:49f172e8691ca003797f29f904dccfee4dd0d1aa99382313c75915a1fffa7a3b
 size 31273033

app.py CHANGED Viewed

@@ -1,58 +1,77 @@
 import gradio as gr
 import os
 import torch
 from model import create_effnetb2_model
 from timeit import default_timer as timer
 # Setup class names
 class_names = ["pizza", "steak", "sushi"]
-# Create model
-model, transforms = create_effnetb2_model(
-    num_classes=3,
 )
 # Load saved weights
-model.load_state_dict(
     torch.load(
         f="09_pretrained_effnetb2_feature_extractor_pizza_steak_sushi_20_percent.pth",
         map_location=torch.device("cpu"),  # load to CPU
     )
 )
-# Create prediction code
-def predict(img):
     start_time = timer()
-    img = transforms(img).unsqueeze(0)
-    model.eval()
     with torch.inference_mode():
-        pred_probs = torch.softmax(model(img), dim=1)
-    pred_labels_and_probs = {
-        class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))
-    }
     pred_time = round(timer() - start_time, 5)
     return pred_labels_and_probs, pred_time
-# Create Gradio app
 title = "FoodVision Mini 🍕🥩🍣"
 description = "An EfficientNetB2 feature extractor computer vision model to classify images of food as pizza, steak or sushi."
 article = "Created at [09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/)."
-example_dir = "demo/examples"
-demo = gr.Interface(
-    fn=predict,
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Label(num_top_classes=3, label="Predictions"),
-        gr.Number(label="Prediction time (s)"),
-    ],
-    examples=[["examples/" + example] for example in os.listdir("examples")],
-    interpretation="default",
-    title=title,
-    description=description,
-    article=article,
-)
-demo.launch()

+### 1. Imports and class names setup ###
 import gradio as gr
 import os
 import torch
 from model import create_effnetb2_model
 from timeit import default_timer as timer
+from typing import Tuple, Dict
 # Setup class names
 class_names = ["pizza", "steak", "sushi"]
+### 2. Model and transforms preparation ###
+# Create EffNetB2 model
+effnetb2, effnetb2_transforms = create_effnetb2_model(
+    num_classes=3, # len(class_names) would also work
 )
 # Load saved weights
+effnetb2.load_state_dict(
     torch.load(
         f="09_pretrained_effnetb2_feature_extractor_pizza_steak_sushi_20_percent.pth",
         map_location=torch.device("cpu"),  # load to CPU
     )
 )
+### 3. Predict function ###
+# Create predict function
+def predict(img) -> Tuple[Dict, float]:
+    """Transforms and performs a prediction on img and returns prediction and time taken.
+    """
+    # Start the timer
     start_time = timer()
+    # Transform the target image and add a batch dimension
+    img = effnetb2_transforms(img).unsqueeze(0)
+    # Put model into evaluation mode and turn on inference mode
+    effnetb2.eval()
     with torch.inference_mode():
+        # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
+        pred_probs = torch.softmax(effnetb2(img), dim=1)
+    # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
+    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
+    # Calculate the prediction time
     pred_time = round(timer() - start_time, 5)
+    # Return the prediction dictionary and prediction time
     return pred_labels_and_probs, pred_time
+### 4. Gradio app ###
+# Create title, description and article strings
 title = "FoodVision Mini 🍕🥩🍣"
 description = "An EfficientNetB2 feature extractor computer vision model to classify images of food as pizza, steak or sushi."
 article = "Created at [09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/)."
+# Create examples list from "examples/" directory
+example_list = [["examples/" + example] for example in os.listdir("examples")]
+# Create the Gradio demo
+demo = gr.Interface(fn=predict, # mapping function from input to output
+                    inputs=gr.Image(type="pil"), # what are the inputs?
+                    outputs=[gr.Label(num_top_classes=3, label="Predictions"), # what are the outputs?
+                             gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
+                    # Create examples list from "examples/" directory
+                    examples=example_list,
+                    title=title,
+                    description=description,
+                    article=article)
+# Launch the demo!
+demo.launch()

model.py CHANGED Viewed

@@ -1,20 +1,36 @@
 import torchvision
 from torch import nn
-def create_effnetb2_model(num_classes: int):
     weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
     transforms = weights.transforms()
     model = torchvision.models.efficientnet_b2(weights=weights)
-    # Freeze base model
     for param in model.parameters():
         param.requires_grad = False
-    # Change classifier head
     model.classifier = nn.Sequential(
         nn.Dropout(p=0.3, inplace=True),
         nn.Linear(in_features=1408, out_features=num_classes),
     )
-    return model, transforms

+import torch
 import torchvision
 from torch import nn
+def create_effnetb2_model(num_classes:int=3,
+                          seed:int=42):
+    """Creates an EfficientNetB2 feature extractor model and transforms.
+    Args:
+        num_classes (int, optional): number of classes in the classifier head.
+            Defaults to 3.
+        seed (int, optional): random seed value. Defaults to 42.
+    Returns:
+        model (torch.nn.Module): EffNetB2 feature extractor model.
+        transforms (torchvision.transforms): EffNetB2 image transforms.
+    """
+    # Create EffNetB2 pretrained weights, transforms and model
     weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
     transforms = weights.transforms()
     model = torchvision.models.efficientnet_b2(weights=weights)
+    # Freeze all layers in base model
     for param in model.parameters():
         param.requires_grad = False
+    # Change classifier head with random seed for reproducibility
+    torch.manual_seed(seed)
     model.classifier = nn.Sequential(
         nn.Dropout(p=0.3, inplace=True),
         nn.Linear(in_features=1408, out_features=num_classes),
     )
+    return model, transforms

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 torch==1.12.0
 torchvision==0.13.0
-gradio==3.1.4

 torch==1.12.0
 torchvision==0.13.0
+gradio==3.1.4