Add all source files

Browse files

Files changed (10) hide show

ViT_Caltech101_five_epochs.pth +3 -0
app.py +74 -0
class_names.txt +101 -0
examples/image_0012.jpg +0 -0
examples/image_0014.jpg +0 -0
examples/image_0036.jpg +0 -0
examples/image_0171.jpg +0 -0
examples/image_0225.jpg +0 -0
model.py +36 -0
requirements.txt +4 -0

ViT_Caltech101_five_epochs.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fe3cd9c70bf0532eae9315af3c6db81eea1c05a693bd400fee200a618774497
+size 343568662

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+from model import create_vit_instance
+from pathlib import Path
+import torch
+from PIL import Image
+from typing import List, Dict, Tuple
+from timeit import default_timer as timer
+# Reading all available classes
+with open('class_names.txt', 'r') as f:
+  all_classes = [name.replace('\n', '') for name in f.readlines()]
+demo_vit_model, demo_vit_transforms = create_vit_instance(num_classes=len(all_classes),
+                                                          device='cpu')
+weights_path = Path("ViT_Caltech101_five_epochs.pth")
+demo_vit_model.load_state_dict(torch.load(f=weights_path,
+                                          map_location='cpu'))
+## Creating predict method => It returns prediction probability dictionary as well as time taken to do the prediction
+def predict(img_path: str,
+            model:torch.nn.Module=demo_vit_model,
+            transform: torchvision.transforms=demo_vit_transforms,
+            classes:List[str] = all_classes)->Tuple[Dict, int]:
+  pred_prob_dict = dict()
+  model = model.to('cpu')
+  # img_path = Image.open(img_path)
+  transformed_image = transform(img_path)
+  start = timer()
+  model.eval()
+  with torch.inference_mode():
+    batch_img = transformed_image.unsqueeze(dim=0).to(device='cpu')
+    logit = model(batch_img)
+    pred_probs = torch.softmax(input=logit,
+                               dim=1)
+    preds = torch.argmax(input=pred_probs,
+                         dim=1).item()
+  end = timer()
+  total_time = round(end - start, 4)
+  pred_probs = pred_probs[0].tolist()
+  for idx in range(len(pred_probs)):
+    class_name = classes[idx]
+    pred_prob_dict[class_name] = pred_probs[idx]
+  sorted_order = sorted(pred_prob_dict.items(), key=lambda kv: kv[1], reverse=True)
+  return (pred_prob_dict, total_time)
+title = "ObjectVision"
+description = "ViT Feature Extractor trained for Image Classification based on Caltech101 dataset."
+samples = [[path] for path in Path("examples").iterdir()]
+demo = gr.Interface(fn=predict,
+                    title=title,
+                    description=description,
+                    inputs=gr.Image(type="pil"),
+                    examples=samples,
+                    outputs=[
+                        gr.Label(num_top_classes=5,
+                                 label="Model thinks"),
+                        gr.Number(label="Prediction time (in seconds)")
+                    ])
+if __name__ == "__main__":
+  demo.launch(debug=True)

class_names.txt ADDED Viewed

	@@ -0,0 +1,101 @@

+Faces
+Faces_easy
+Leopards
+Motorbikes
+accordion
+airplanes
+anchor
+ant
+barrel
+bass
+beaver
+binocular
+bonsai
+brain
+brontosaurus
+buddha
+butterfly
+camera
+cannon
+car_side
+ceiling_fan
+cellphone
+chair
+chandelier
+cougar_body
+cougar_face
+crab
+crayfish
+crocodile
+crocodile_head
+cup
+dalmatian
+dollar_bill
+dolphin
+dragonfly
+electric_guitar
+elephant
+emu
+euphonium
+ewer
+ferry
+flamingo
+flamingo_head
+garfield
+gerenuk
+gramophone
+grand_piano
+hawksbill
+headphone
+hedgehog
+helicopter
+ibis
+inline_skate
+joshua_tree
+kangaroo
+ketch
+lamp
+laptop
+llama
+lobster
+lotus
+mandolin
+mayfly
+menorah
+metronome
+minaret
+nautilus
+octopus
+okapi
+pagoda
+panda
+pigeon
+pizza
+platypus
+pyramid
+revolver
+rhino
+rooster
+saxophone
+schooner
+scissors
+scorpion
+sea_horse
+snoopy
+soccer_ball
+stapler
+starfish
+stegosaurus
+stop_sign
+strawberry
+sunflower
+tick
+trilobite
+umbrella
+watch
+water_lilly
+wheelchair
+wild_cat
+windsor_chair
+wrench
+yin_yang

examples/image_0012.jpg ADDED Viewed

examples/image_0014.jpg ADDED Viewed

examples/image_0036.jpg ADDED Viewed

examples/image_0171.jpg ADDED Viewed

examples/image_0225.jpg ADDED Viewed

model.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+from torch import nn
+from collections import OrderedDict
+from torchvision import transforms
+from torchvision.models import vit_b_16, ViT_B_16_Weights
+def create_vit_instance(num_classes:int = 1000,
+                        device:torch.device = 'cpu'):
+  vit_weight = ViT_B_16_Weights.DEFAULT
+  vit_transforms = vit_weight.transforms()
+  vit_model = vit_b_16(weights=vit_weight).to(device)
+  for param in vit_model.parameters():
+    param.requires_grad = False
+  vit_model.heads = nn.Sequential(
+      OrderedDict([
+          ('head', nn.Linear(in_features=768,
+                             out_features=num_classes))
+      ])
+  ).to(device)
+  transform = transforms.Compose([
+    transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+    transforms.CenterCrop(224),
+    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+  ])
+  return (vit_model, transform)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==5.0.2
+torch==2.4.0
+torchvision=0.19.0