Spaces:

Matthijs
/

mobilevit-deeplab-demo

Runtime error

App Files Files Community

Matthijs Hollemans commited on Jul 7, 2022

Commit

f1cff84

•

1 Parent(s): 6a36cd0

make noice

Browse files

Files changed (7) hide show

.gitattributes +4 -0
README.md +1 -1
app.py +76 -10
cat-3.jpg +3 -0
construction-site.jpg +3 -0
dog-cat.jpg +3 -0
football-match.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cat-3.jpg filter=lfs diff=lfs merge=lfs -text
+construction-site.jpg filter=lfs diff=lfs merge=lfs -text
+dog-cat.jpg filter=lfs diff=lfs merge=lfs -text
+football-match.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 title: MobileViT Deeplab Demo
 emoji: 🍕
 colorFrom: black
-colorTo: black
 sdk: gradio
 sdk_version: 3.0.24
 app_file: app.py

 title: MobileViT Deeplab Demo
 emoji: 🍕
 colorFrom: black
+colorTo: blue
 sdk: gradio
 sdk_version: 3.0.24
 app_file: app.py

app.py CHANGED Viewed

@@ -5,11 +5,11 @@ from PIL import Image
 import torch
 from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
 model_checkpoint = "apple/deeplabv3-mobilevit-small"
-feature_extractor = MobileViTFeatureExtractor.from_pretrained(model_checkpoint) #, do_center_crop=False, size=(512, 512))
 model = MobileViTForSemanticSegmentation.from_pretrained(model_checkpoint).eval()
 palette = np.array(
 [
     [  0,   0,   0], [192,   0,   0], [  0, 192,   0], [192, 192,   0],
@@ -21,6 +21,69 @@ palette = np.array(
 ],
 dtype=np.uint8)
 def predict(image):
@@ -35,7 +98,7 @@ def predict(image):
     # Class predictions for each pixel.
     classes = outputs.logits.argmax(1).squeeze().numpy().astype(np.uint8)
-    # Super slow method but it works
     colored = np.zeros((classes.shape[0], classes.shape[1], 3), dtype=np.uint8)
     for y in range(classes.shape[0]):
         for x in range(classes.shape[1]):
@@ -43,26 +106,29 @@ def predict(image):
     # Resize predictions to input size (not original size).
     colored = Image.fromarray(colored)
-    colored = colored.resize((resized.shape[1], resized.shape[0]), resample=Image.NEAREST)
     # Keep everything that is not background.
     mask = (classes != 0) * 255
     mask = Image.fromarray(mask.astype(np.uint8)).convert("RGB")
-    mask = mask.resize((resized.shape[1], resized.shape[0]), resample=Image.NEAREST)
     # Blend with the input image.
     resized = Image.fromarray(resized)
     highlighted = Image.blend(resized, mask, 0.4)
     return colored, highlighted
 gr.Interface(
     fn=predict,
     inputs=gr.inputs.Image(label="Upload image"),
-    outputs=[gr.outputs.Image(label="Classes"), gr.outputs.Image(label="Highlighted")],
-    title="Semantic Segmentation with MobileViT and DeepLabV3",
 ).launch()
-# TODO: combo box with some example images

 import torch
 from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
 model_checkpoint = "apple/deeplabv3-mobilevit-small"
+feature_extractor = MobileViTFeatureExtractor.from_pretrained(model_checkpoint)
 model = MobileViTForSemanticSegmentation.from_pretrained(model_checkpoint).eval()
 palette = np.array(
 [
     [  0,   0,   0], [192,   0,   0], [  0, 192,   0], [192, 192,   0],
 ],
 dtype=np.uint8)
+labels = [
+    "background",
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+]
+# Draw the labels. Light colors use black text, dark colors use white text.
+inverted = [ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20 ]
+labels_colored = []
+for i in range(len(labels)):
+    r, g, b = palette[i]
+    label = labels[i]
+    color = "white" if i in inverted else "black"
+    text = "<span style='background-color: rgb(%d, %d, %d); color: %s; padding: 2px 4px;'>%s</span>" % (r, g, b, color, label)
+    labels_colored.append(text)
+labels_text = ", ".join(labels_colored)
+title = "Semantic Segmentation with MobileViT and DeepLabV3"
+description = """
+The input image is resized and center cropped to 512×512 pixels. The segmentation output is 32×32 pixels.<br>
+This model has been trained on <a href="http://host.robots.ox.ac.uk/pascal/VOC/">Pascal VOC</a>.
+The classes are:
+""" + labels_text + "</p>"
+article = """
+<div style='margin:20px auto;'>
+<p>Sources:<p>
+<p>📜 <a href="https://arxiv.org/abs/2110.02178">MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer</a></p>
+<p>🏋️ Original pretrained weights from <a href="https://github.com/apple/ml-cvnets">this GitHub repo</a></p>
+<p>🏙 Example images from <a href="https://huggingface.co/datasets/mishig/sample_images">this dataset</a><p>
+</div>
+"""
+examples = [
+    ["cat-3.jpg"],
+    ["construction-site.jpg"],
+    ["dog-cat.jpg"],
+    ["football-match.jpg"],
+]
 def predict(image):
     # Class predictions for each pixel.
     classes = outputs.logits.argmax(1).squeeze().numpy().astype(np.uint8)
+    # Super slow method but it works... should probably improve this.
     colored = np.zeros((classes.shape[0], classes.shape[1], 3), dtype=np.uint8)
     for y in range(classes.shape[0]):
         for x in range(classes.shape[1]):
     # Resize predictions to input size (not original size).
     colored = Image.fromarray(colored)
+    colored = colored.resize((resized.shape[1], resized.shape[0]), resample=Image.Resampling.NEAREST)
     # Keep everything that is not background.
     mask = (classes != 0) * 255
     mask = Image.fromarray(mask.astype(np.uint8)).convert("RGB")
+    mask = mask.resize((resized.shape[1], resized.shape[0]), resample=Image.Resampling.NEAREST)
     # Blend with the input image.
     resized = Image.fromarray(resized)
     highlighted = Image.blend(resized, mask, 0.4)
+    #colored = colored.resize((256, 256), resample=Image.Resampling.BICUBIC)
+    #highlighted = highlighted.resize((256, 256), resample=Image.Resampling.BICUBIC)
     return colored, highlighted
 gr.Interface(
     fn=predict,
     inputs=gr.inputs.Image(label="Upload image"),
+    outputs=[gr.outputs.Image(label="Classes"), gr.outputs.Image(label="Overlay")],
+    title=title,
+    description=description,
+    article=article,
+    examples=examples,
 ).launch()

cat-3.jpg ADDED Viewed

Git LFS Details

SHA256: ca00d7f8f53f03232185c70418d875bc98adfeb7d42238d7f01e2926ecafb3b2
Pointer size: 132 Bytes
Size of remote file: 1.67 MB

construction-site.jpg ADDED Viewed

Git LFS Details

SHA256: 2dec9e542ab0b1ac51894535014bd06c8392eb4da41ffcfa326b1188b6ce8762
Pointer size: 130 Bytes
Size of remote file: 92.7 kB

dog-cat.jpg ADDED Viewed

Git LFS Details

SHA256: e952088e64c1cf3f270137ba38648d2218e138e6c872dd0c8f80497d247d0536
Pointer size: 130 Bytes
Size of remote file: 99.5 kB

football-match.jpg ADDED Viewed

Git LFS Details

SHA256: d65b6f72943d5e2d4f7e5e4dedfb93aea0fbbda140ae7c3ee772124b579e07c4
Pointer size: 130 Bytes
Size of remote file: 55.6 kB