Add files for extra small

Browse files

Files changed (7) hide show

MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -0
MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin +2 -2
MobileViT_DeepLabV3.mlpackage/Manifest.json +3 -3
README.md +8 -8
config.json +10 -10
pytorch_model.bin +2 -2
tf_model.h5 +2 -2

MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

The diff for this file is too large to render. See raw diff

MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e50a89dd6be1e3ba7e4df23be4f2d79a081d443c1e498536377d30b8e5fb3a29
-size 25418432

 version https://git-lfs.github.com/spec/v1
+oid sha256:836d1d6b55ee4dabf7cc8fde30c61f3a2e2a4fe770bab04dd4ac4d738dd74d48
+size 7421184

MobileViT_DeepLabV3.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         },
-        "FBABE180-594F-4894-9881-F3B3D807D27D": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         },
+        "871A3834-A719-4108-9576-7E776094437D": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
+    "rootModelIdentifier": "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE"
 }

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ widget:
   example_title: Cat
 ---
-# MobileViT + DeepLabV3 (small-sized model)
 MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
@@ -38,8 +38,8 @@ import requests
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
-feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
-model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
 inputs = feature_extractor(images=image, return_tensors="pt")
@@ -68,11 +68,11 @@ To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC datase
 ## Evaluation results
-| Model            | PASCAL VOC mIOU | # params  | URL                                                       |
-|------------------|-----------------|-----------|-----------------------------------------------------------|
-| MobileViT-XXS    | 73.6            | 1.9 M     | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
-| MobileViT-XS     | 77.1            | 2.9 M     | https://huggingface.co/apple/deeplabv3-mobilevit-x-small  |
-| **MobileViT-S**  | **79.1**        | **6.4 M** | https://huggingface.co/apple/deeplabv3-mobilevit-small    |
 ### BibTeX entry and citation info

   example_title: Cat
 ---
+# MobileViT + DeepLabV3 (extra extra small-sized model)
 MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
+feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
+model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
 inputs = feature_extractor(images=image, return_tensors="pt")
 ## Evaluation results
+| Model             | PASCAL VOC mIOU | # params  | URL                                                       |
+|-------------------|-----------------|-----------|-----------------------------------------------------------|
+| **MobileViT-XXS** | **73.6**        | **1.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
+| MobileViT-XS      | 77.1            | 2.9 M     | https://huggingface.co/apple/deeplabv3-mobilevit-x-small  |
+| MobileViT-S       | 79.1            | 6.4 M     | https://huggingface.co/apple/deeplabv3-mobilevit-small    |
 ### BibTeX entry and citation info

config.json CHANGED Viewed

@@ -12,13 +12,13 @@
   "attention_probs_dropout_prob": 0.0,
   "classifier_dropout_prob": 0.1,
   "conv_kernel_size": 3,
-  "expand_ratio": 4.0,
   "hidden_act": "silu",
-  "hidden_dropout_prob": 0.1,
   "hidden_sizes": [
-    144,
-    192,
-    240
   ],
   "id2label": {
     "0": "background",
@@ -73,12 +73,12 @@
   "model_type": "mobilevit",
   "neck_hidden_sizes": [
     16,
-    32,
     64,
-    96,
-    128,
-    160,
-    640
   ],
   "num_attention_heads": 4,
   "num_channels": 3,

   "attention_probs_dropout_prob": 0.0,
   "classifier_dropout_prob": 0.1,
   "conv_kernel_size": 3,
+  "expand_ratio": 2.0,
   "hidden_act": "silu",
+  "hidden_dropout_prob": 0.05,
   "hidden_sizes": [
+    64,
+    80,
+    96
   ],
   "id2label": {
     "0": "background",
   "model_type": "mobilevit",
   "neck_hidden_sizes": [
     16,
+    16,
+    24,
+    48,
     64,
+    80,
+    320
   ],
   "num_attention_heads": 4,
   "num_channels": 3,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e68a534df237d8b89aa9209c815976b4b34f49a4e8107f630fd799697e98291
-size 25615631

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f91dba8e66cf725cc0cd987b9bf47b0e95788bf4050032d55de23217d5ffa60
+size 7572751

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e14ab532bd4b573c60e4f4c6639de6176db4c35c803cc7c0ba05fdb16e5b3de
-size 25943848

 version https://git-lfs.github.com/spec/v1
+oid sha256:feec578f1413c5f2a9c759d488bc95548b9741a370ed408b2b0f8f5a921394ee
+size 7898720