Jaime García Villena commited on
Commit
7822a12
1 Parent(s): 3627736

Add files for extra small

Browse files
MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
The diff for this file is too large to render. See raw diff
 
MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e50a89dd6be1e3ba7e4df23be4f2d79a081d443c1e498536377d30b8e5fb3a29
3
- size 25418432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836d1d6b55ee4dabf7cc8fde30c61f3a2e2a4fe770bab04dd4ac4d738dd74d48
3
+ size 7421184
MobileViT_DeepLabV3.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Specification",
7
  "name": "model.mlmodel",
8
  "path": "com.apple.CoreML/model.mlmodel"
9
  },
10
- "FBABE180-594F-4894-9881-F3B3D807D27D": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
15
  }
16
  },
17
- "rootModelIdentifier": "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Specification",
7
  "name": "model.mlmodel",
8
  "path": "com.apple.CoreML/model.mlmodel"
9
  },
10
+ "871A3834-A719-4108-9576-7E776094437D": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
15
  }
16
  },
17
+ "rootModelIdentifier": "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE"
18
  }
README.md CHANGED
@@ -10,7 +10,7 @@ widget:
10
  example_title: Cat
11
  ---
12
 
13
- # MobileViT + DeepLabV3 (small-sized model)
14
 
15
  MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
16
 
@@ -38,8 +38,8 @@ import requests
38
  url = "http://images.cocodataset.org/val2017/000000039769.jpg"
39
  image = Image.open(requests.get(url, stream=True).raw)
40
 
41
- feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
42
- model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
43
 
44
  inputs = feature_extractor(images=image, return_tensors="pt")
45
 
@@ -68,11 +68,11 @@ To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC datase
68
 
69
  ## Evaluation results
70
 
71
- | Model | PASCAL VOC mIOU | # params | URL |
72
- |------------------|-----------------|-----------|-----------------------------------------------------------|
73
- | MobileViT-XXS | 73.6 | 1.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
74
- | MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small |
75
- | **MobileViT-S** | **79.1** | **6.4 M** | https://huggingface.co/apple/deeplabv3-mobilevit-small |
76
 
77
  ### BibTeX entry and citation info
78
 
 
10
  example_title: Cat
11
  ---
12
 
13
+ # MobileViT + DeepLabV3 (extra extra small-sized model)
14
 
15
  MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
16
 
 
38
  url = "http://images.cocodataset.org/val2017/000000039769.jpg"
39
  image = Image.open(requests.get(url, stream=True).raw)
40
 
41
+ feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
42
+ model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
43
 
44
  inputs = feature_extractor(images=image, return_tensors="pt")
45
 
 
68
 
69
  ## Evaluation results
70
 
71
+ | Model | PASCAL VOC mIOU | # params | URL |
72
+ |-------------------|-----------------|-----------|-----------------------------------------------------------|
73
+ | **MobileViT-XXS** | **73.6** | **1.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
74
+ | MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small |
75
+ | MobileViT-S | 79.1 | 6.4 M | https://huggingface.co/apple/deeplabv3-mobilevit-small |
76
 
77
  ### BibTeX entry and citation info
78
 
config.json CHANGED
@@ -12,13 +12,13 @@
12
  "attention_probs_dropout_prob": 0.0,
13
  "classifier_dropout_prob": 0.1,
14
  "conv_kernel_size": 3,
15
- "expand_ratio": 4.0,
16
  "hidden_act": "silu",
17
- "hidden_dropout_prob": 0.1,
18
  "hidden_sizes": [
19
- 144,
20
- 192,
21
- 240
22
  ],
23
  "id2label": {
24
  "0": "background",
@@ -73,12 +73,12 @@
73
  "model_type": "mobilevit",
74
  "neck_hidden_sizes": [
75
  16,
76
- 32,
 
 
77
  64,
78
- 96,
79
- 128,
80
- 160,
81
- 640
82
  ],
83
  "num_attention_heads": 4,
84
  "num_channels": 3,
 
12
  "attention_probs_dropout_prob": 0.0,
13
  "classifier_dropout_prob": 0.1,
14
  "conv_kernel_size": 3,
15
+ "expand_ratio": 2.0,
16
  "hidden_act": "silu",
17
+ "hidden_dropout_prob": 0.05,
18
  "hidden_sizes": [
19
+ 64,
20
+ 80,
21
+ 96
22
  ],
23
  "id2label": {
24
  "0": "background",
 
73
  "model_type": "mobilevit",
74
  "neck_hidden_sizes": [
75
  16,
76
+ 16,
77
+ 24,
78
+ 48,
79
  64,
80
+ 80,
81
+ 320
 
 
82
  ],
83
  "num_attention_heads": 4,
84
  "num_channels": 3,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e68a534df237d8b89aa9209c815976b4b34f49a4e8107f630fd799697e98291
3
- size 25615631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f91dba8e66cf725cc0cd987b9bf47b0e95788bf4050032d55de23217d5ffa60
3
+ size 7572751
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e14ab532bd4b573c60e4f4c6639de6176db4c35c803cc7c0ba05fdb16e5b3de
3
- size 25943848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feec578f1413c5f2a9c759d488bc95548b9741a370ed408b2b0f8f5a921394ee
3
+ size 7898720