Spaces:

ioanasong
/

ioanasong-vit-MINC-2500

Sleeping

App Files Files Community

ioanasong commited on Jul 10

Commit

5dd71fd

•

1 Parent(s): e0724b2

added webcam file for huggingface

Browse files

Files changed (1) hide show

app.py +55 -1

app.py CHANGED Viewed

@@ -1,3 +1,57 @@
 import gradio as gr
-gr.load("models/ioanasong/vit-MINC-2500").launch()

+from transformers import ViTFeatureExtractor, ViTForImageClassification
+from PIL import Image
+import torch
 import gradio as gr
+from torch.nn import functional as F
+# gr.load("models/ioanasong/vit-MINC-2500").launch()
+# Load the pre-trained ViT model and feature extractor
+model_name = "ioanasong/vit-MINC-2500"
+model = ViTForImageClassification.from_pretrained(model_name)
+model.eval()
+feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
+# Define the prediction function
+# def predict(image):
+#     print(image)
+#     # Preprocess the image
+#     inputs = feature_extractor(images=image, return_tensors="pt")
+#     # Make prediction
+#     with torch.no_grad():
+#         outputs = model(**inputs)
+#     logits = outputs.logits
+#     # Get predicted label
+#     predicted_class_idx = logits.argmax(-1).item()
+#     predicted_label = model.config.id2label[predicted_class_idx]
+#     return predicted_label
+def predict(image):
+    # Preprocess the image using the feature extractor
+    inputs = feature_extractor(images=image, return_tensors="pt")
+    # Make prediction using the model
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    # Compute softmax probabilities
+    probs = F.softmax(logits, dim=-1)[0]
+    # Create a dictionary of label and probability
+    prob_dict = {model.config.id2label[i]: prob.item() for i, prob in enumerate(probs)}
+    return prob_dict
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(sources=['webcam'], streaming = True),
+    # outputs=gr.Label(num_top_classes=len(model.config.id2label)),
+    outputs=gr.Label(num_top_classes=5),
+    title="ViT Image Classification",
+    description="Capture an image from the camera and classify it using a pre-trained Vision Transformer (ViT) model.",
+)
+# Launch the app
+iface.launch()