Spaces:

Matthijs
/

mobilevit-deeplab-demo

Runtime error

App Files Files Community

Matthijs Hollemans commited on Jul 5, 2022

Commit

c304fb7

1 Parent(s): 8239775

segmentation demo

Browse files

Files changed (3) hide show

README.md +2 -1
app.py +43 -7
requirements.txt +1 -1

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
 title: MobileViT Deeplab Demo
-emoji: 🚀
 colorFrom: red
 colorTo: pink
 sdk: gradio
 sdk_version: 3.0.24
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: MobileViT Deeplab Demo
+emoji: 🍕
 colorFrom: red
 colorTo: pink
 sdk: gradio
 sdk_version: 3.0.24
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,15 +1,51 @@
 import gradio as gr
-from transformers import pipeline
-pipeline = pipeline(task="image-classification", model="apple/mobilevit-small")
 def predict(image):
-  predictions = pipeline(image)
-  return {p["label"]: p["score"] for p in predictions}
 gr.Interface(
     fn=predict,
-    inputs=gr.inputs.Image(label="Upload image", type="filepath"),
-    outputs=gr.outputs.Label(num_top_classes=5),
-    title="This is a title",
 ).launch()

+import numpy as np
 import gradio as gr
+from PIL import Image
+import torch
+from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
+model_checkpoint = "apple/deeplabv3-mobilevit-small"
+feature_extractor = MobileViTFeatureExtractor.from_pretrained(model_checkpoint, do_center_crop=False, size=(512, 512))
+model = MobileViTForSemanticSegmentation.from_pretrained(model_checkpoint).eval()
+# From https://gist.github.com/kaixin96/457cc3d3be699f1f5b2fd4cdb638d4b4
+palette = np.array([
+    [  0,   0,   0], [128,   0,   0], [  0, 128,   0], [128, 128,   0], [  0,   0, 128],
+    [128,   0, 128], [  0, 128, 128], [128, 128, 128], [ 64,   0,   0], [192,   0,   0],
+    [ 64, 128,   0], [192, 128,   0], [ 64,   0, 128], [192,   0, 128], [ 64, 128, 128],
+    [192, 128, 128], [  0,  64,   0], [128,  64,   0], [  0, 192,   0], [128, 192,   0],
+    [ 0, 64, 128]], dtype=np.uint8)
 def predict(image):
+    with torch.no_grad():
+        inputs = feature_extractor(image, return_tensors="pt")
+        outputs = model(**inputs)
+    classes = outputs.logits.argmax(1).squeeze().numpy().astype(np.uint8)
+    # Super slow method but it works
+    colored = np.zeros((classes.shape[0], classes.shape[1], 3), dtype=np.uint8)
+    for y in range(classes.shape[0]):
+        for x in range(classes.shape[1]):
+            colored[y, x] = palette[classes[y, x]]
+    # TODO: overlay mask on image?
+    out_image = Image.fromarray(colored)
+    out_image = out_image.resize((image.shape[1], image.shape[0]), resample=Image.NEAREST)
+    return out_image
 gr.Interface(
     fn=predict,
+    inputs=gr.inputs.Image(label="Upload image"),
+    outputs=gr.outputs.Image(),
+    title="Semantic Segmentation with MobileViT and DeepLabV3",
 ).launch()
+# TODO: combo box with some example images
+# TODO: combo box with classes to show on the output, if none then do argmax

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- transformers
2	torch


1	+ git+https://github.com/huggingface/transformers.git
2	torch