Jaime García Villena
commited on
Commit
•
7822a12
1
Parent(s):
3627736
Add files for extra small
Browse files
MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:836d1d6b55ee4dabf7cc8fde30c61f3a2e2a4fe770bab04dd4ac4d738dd74d48
|
3 |
+
size 7421184
|
MobileViT_DeepLabV3.mlpackage/Manifest.json
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
{
|
2 |
"fileFormatVersion": "1.0.0",
|
3 |
"itemInfoEntries": {
|
4 |
-
"
|
5 |
"author": "com.apple.CoreML",
|
6 |
"description": "CoreML Model Specification",
|
7 |
"name": "model.mlmodel",
|
8 |
"path": "com.apple.CoreML/model.mlmodel"
|
9 |
},
|
10 |
-
"
|
11 |
"author": "com.apple.CoreML",
|
12 |
"description": "CoreML Model Weights",
|
13 |
"name": "weights",
|
14 |
"path": "com.apple.CoreML/weights"
|
15 |
}
|
16 |
},
|
17 |
-
"rootModelIdentifier": "
|
18 |
}
|
|
|
1 |
{
|
2 |
"fileFormatVersion": "1.0.0",
|
3 |
"itemInfoEntries": {
|
4 |
+
"5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE": {
|
5 |
"author": "com.apple.CoreML",
|
6 |
"description": "CoreML Model Specification",
|
7 |
"name": "model.mlmodel",
|
8 |
"path": "com.apple.CoreML/model.mlmodel"
|
9 |
},
|
10 |
+
"871A3834-A719-4108-9576-7E776094437D": {
|
11 |
"author": "com.apple.CoreML",
|
12 |
"description": "CoreML Model Weights",
|
13 |
"name": "weights",
|
14 |
"path": "com.apple.CoreML/weights"
|
15 |
}
|
16 |
},
|
17 |
+
"rootModelIdentifier": "5D3AF255-5F4D-4EE3-BC12-3B38690DA1DE"
|
18 |
}
|
README.md
CHANGED
@@ -10,7 +10,7 @@ widget:
|
|
10 |
example_title: Cat
|
11 |
---
|
12 |
|
13 |
-
# MobileViT + DeepLabV3 (small-sized model)
|
14 |
|
15 |
MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
|
16 |
|
@@ -38,8 +38,8 @@ import requests
|
|
38 |
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
39 |
image = Image.open(requests.get(url, stream=True).raw)
|
40 |
|
41 |
-
feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
|
42 |
-
model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
|
43 |
|
44 |
inputs = feature_extractor(images=image, return_tensors="pt")
|
45 |
|
@@ -68,11 +68,11 @@ To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC datase
|
|
68 |
|
69 |
## Evaluation results
|
70 |
|
71 |
-
| Model
|
72 |
-
|
73 |
-
| MobileViT-XXS
|
74 |
-
| MobileViT-XS
|
75 |
-
|
|
76 |
|
77 |
### BibTeX entry and citation info
|
78 |
|
|
|
10 |
example_title: Cat
|
11 |
---
|
12 |
|
13 |
+
# MobileViT + DeepLabV3 (extra extra small-sized model)
|
14 |
|
15 |
MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
|
16 |
|
|
|
38 |
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
39 |
image = Image.open(requests.get(url, stream=True).raw)
|
40 |
|
41 |
+
feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
|
42 |
+
model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-xx-small")
|
43 |
|
44 |
inputs = feature_extractor(images=image, return_tensors="pt")
|
45 |
|
|
|
68 |
|
69 |
## Evaluation results
|
70 |
|
71 |
+
| Model | PASCAL VOC mIOU | # params | URL |
|
72 |
+
|-------------------|-----------------|-----------|-----------------------------------------------------------|
|
73 |
+
| **MobileViT-XXS** | **73.6** | **1.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
|
74 |
+
| MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small |
|
75 |
+
| MobileViT-S | 79.1 | 6.4 M | https://huggingface.co/apple/deeplabv3-mobilevit-small |
|
76 |
|
77 |
### BibTeX entry and citation info
|
78 |
|
config.json
CHANGED
@@ -12,13 +12,13 @@
|
|
12 |
"attention_probs_dropout_prob": 0.0,
|
13 |
"classifier_dropout_prob": 0.1,
|
14 |
"conv_kernel_size": 3,
|
15 |
-
"expand_ratio":
|
16 |
"hidden_act": "silu",
|
17 |
-
"hidden_dropout_prob": 0.
|
18 |
"hidden_sizes": [
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
],
|
23 |
"id2label": {
|
24 |
"0": "background",
|
@@ -73,12 +73,12 @@
|
|
73 |
"model_type": "mobilevit",
|
74 |
"neck_hidden_sizes": [
|
75 |
16,
|
76 |
-
|
|
|
|
|
77 |
64,
|
78 |
-
|
79 |
-
|
80 |
-
160,
|
81 |
-
640
|
82 |
],
|
83 |
"num_attention_heads": 4,
|
84 |
"num_channels": 3,
|
|
|
12 |
"attention_probs_dropout_prob": 0.0,
|
13 |
"classifier_dropout_prob": 0.1,
|
14 |
"conv_kernel_size": 3,
|
15 |
+
"expand_ratio": 2.0,
|
16 |
"hidden_act": "silu",
|
17 |
+
"hidden_dropout_prob": 0.05,
|
18 |
"hidden_sizes": [
|
19 |
+
64,
|
20 |
+
80,
|
21 |
+
96
|
22 |
],
|
23 |
"id2label": {
|
24 |
"0": "background",
|
|
|
73 |
"model_type": "mobilevit",
|
74 |
"neck_hidden_sizes": [
|
75 |
16,
|
76 |
+
16,
|
77 |
+
24,
|
78 |
+
48,
|
79 |
64,
|
80 |
+
80,
|
81 |
+
320
|
|
|
|
|
82 |
],
|
83 |
"num_attention_heads": 4,
|
84 |
"num_channels": 3,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f91dba8e66cf725cc0cd987b9bf47b0e95788bf4050032d55de23217d5ffa60
|
3 |
+
size 7572751
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feec578f1413c5f2a9c759d488bc95548b9741a370ed408b2b0f8f5a921394ee
|
3 |
+
size 7898720
|