niobures commited on
Commit
772fbdb
·
verified ·
1 Parent(s): 651b148

DeepLab (code, models, paper)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. DeepLab. A Deep Dive into Advanced Visual Processing.pdf +3 -0
  3. DeepLab. Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf +3 -0
  4. code/Deeplab_Tensorflow.zip +3 -0
  5. code/deeplab-pytorch.zip +3 -0
  6. code/deeplab2.zip +3 -0
  7. code/deeplab_v3.zip +3 -0
  8. code/deeplabv3-plus-pytorch.zip +3 -0
  9. code/deeplabv3.zip +3 -0
  10. code/deeplabv3plus-pytorch.zip +3 -0
  11. code/keras-deeplab-v3-plus.zip +3 -0
  12. code/pytorch-deeplab-xception.zip +3 -0
  13. code/semantic-segmentation-codebase.zip +3 -0
  14. code/tensorflow-deeplab-resnet.zip +3 -0
  15. models/deeplab_v3/.gitattributes +35 -0
  16. models/deeplab_v3/checkpoints/train/checkpoint +2 -0
  17. models/deeplab_v3/checkpoints/train/data.json +19 -0
  18. models/deeplab_v3/checkpoints/train/events.out.tfevents.1516966190.DIGITS-1 +3 -0
  19. models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 +3 -0
  20. models/deeplab_v3/checkpoints/train/model.ckpt.index +0 -0
  21. models/deeplab_v3/checkpoints/train/model.ckpt.meta +3 -0
  22. models/deeplab_v3/source.txt +2 -0
  23. models/deeplabv3-mobilevit-small (apple)/.gitattributes +27 -0
  24. models/deeplabv3-mobilevit-small (apple)/LICENSE +88 -0
  25. models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  26. models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  27. models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json +18 -0
  28. models/deeplabv3-mobilevit-small (apple)/README.md +86 -0
  29. models/deeplabv3-mobilevit-small (apple)/config.json +91 -0
  30. models/deeplabv3-mobilevit-small (apple)/preprocessor_config.json +9 -0
  31. models/deeplabv3-mobilevit-small (apple)/pytorch_model.bin +3 -0
  32. models/deeplabv3-mobilevit-small (apple)/source.txt +1 -0
  33. models/deeplabv3-mobilevit-small (apple)/tf_model.h5 +3 -0
  34. models/deeplabv3-mobilevit-small/.gitattributes +35 -0
  35. models/deeplabv3-mobilevit-small/README.md +9 -0
  36. models/deeplabv3-mobilevit-small/config.json +91 -0
  37. models/deeplabv3-mobilevit-small/onnx/model.onnx +3 -0
  38. models/deeplabv3-mobilevit-small/onnx/model_fp16.onnx +3 -0
  39. models/deeplabv3-mobilevit-small/onnx/model_quantized.onnx +3 -0
  40. models/deeplabv3-mobilevit-small/preprocessor_config.json +18 -0
  41. models/deeplabv3-mobilevit-small/quant_config.json +34 -0
  42. models/deeplabv3-mobilevit-small/source.txt +1 -0
  43. models/deeplabv3-mobilevit-x-small (apple)/.gitattributes +27 -0
  44. models/deeplabv3-mobilevit-x-small (apple)/LICENSE +88 -0
  45. models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  46. models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  47. models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json +18 -0
  48. models/deeplabv3-mobilevit-x-small (apple)/README.md +86 -0
  49. models/deeplabv3-mobilevit-x-small (apple)/config.json +91 -0
  50. models/deeplabv3-mobilevit-x-small (apple)/preprocessor_config.json +9 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DeepLab.[[:space:]]A[[:space:]]Deep[[:space:]]Dive[[:space:]]into[[:space:]]Advanced[[:space:]]Visual[[:space:]]Processing.pdf filter=lfs diff=lfs merge=lfs -text
37
+ DeepLab.[[:space:]]Semantic[[:space:]]Image[[:space:]]Segmentation[[:space:]]with[[:space:]]Deep[[:space:]]Convolutional[[:space:]]Nets,[[:space:]]Atrous[[:space:]]Convolution,[[:space:]]and[[:space:]]Fully[[:space:]]Connected[[:space:]]CRFs.pdf filter=lfs diff=lfs merge=lfs -text
38
+ models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
39
+ models/deeplab_v3/checkpoints/train/model.ckpt.meta filter=lfs diff=lfs merge=lfs -text
DeepLab. A Deep Dive into Advanced Visual Processing.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94cca8e020fe74d1a046e2328469f2cb25a5ef36e2610337382cda664e4ad1c
3
+ size 293640
DeepLab. Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b80b304869e8dbd0ce9505b4f874ca4b18dca354967348cd104aff62a6dcd25d
3
+ size 6101934
code/Deeplab_Tensorflow.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3827ddebd58f923638182c0f4b82a9313a8820adeec030c1ae47428722e4888a
3
+ size 5200061
code/deeplab-pytorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fed9aad9947755c49bb4d974a1ba91041f7e92b01167c7f29d0361fe8d3bbe70
3
+ size 139271448
code/deeplab2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d32b82b601b3ce06961f6eca24647477cecc546689d5c9109c83e126db3f6571
3
+ size 17433472
code/deeplab_v3.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b2385a33d66b29bb5169186d5a2d9419604a6abce588f9aa8c67be696facbf
3
+ size 987849
code/deeplabv3-plus-pytorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb84a431024cd16187b88d127f29dcb409911c602e7ea17ef4e273147a7167a8
3
+ size 66513814
code/deeplabv3.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a4c05b38b8284fd7a9dd4257c7b6c9d40e398024feab02192ed34472c79ee0
3
+ size 541821384
code/deeplabv3plus-pytorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8626774f15d83cf780f7dc78cbe38d0bbd0f21ed7943bd22fa57ebdcff7ce88e
3
+ size 437976
code/keras-deeplab-v3-plus.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bf8d61aa1155aad56a23d5e4006bbeeb8e458d4fddc9409318ab0515d13b2a
3
+ size 8187786
code/pytorch-deeplab-xception.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ac9b5e36df312eccabce385efb0a1123271a5493d570c995fb0068c995445e
3
+ size 1569855
code/semantic-segmentation-codebase.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2630452a10b7adeb3efc19788a017add2de3fe244cebf867e639e3d0e18c4b7
3
+ size 180146
code/tensorflow-deeplab-resnet.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6140b810eb4211204f30dae17019a296f25fa10cac852eae8baf7af27a2047b
3
+ size 2996246
models/deeplab_v3/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/deeplab_v3/checkpoints/train/checkpoint ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_checkpoint_path: "model.ckpt"
2
+ all_model_checkpoint_paths: "model.ckpt"
models/deeplab_v3/checkpoints/train/data.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accumulated_validation_miou": 0,
3
+ "batch_norm_decay": 0.997,
4
+ "batch_norm_epsilon": 1e-05,
5
+ "batch_size": 16,
6
+ "current_best_val_loss": "0.294389428197",
7
+ "gpu_id": 1,
8
+ "l2_regularizer": 0.0001,
9
+ "multi_grid": [
10
+ 1,
11
+ 2,
12
+ 4
13
+ ],
14
+ "number_of_classes": 21,
15
+ "output_stride": 16,
16
+ "starting_learning_rate": 1e-05,
17
+ "resnet_model": "resnet_v2_50",
18
+ "crop_size": 512
19
+ }
models/deeplab_v3/checkpoints/train/events.out.tfevents.1516966190.DIGITS-1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3733c938c519b433537ce40c3be607b6a67978f855f6dceb5fcb684a6a7b55b0
3
+ size 55620454
models/deeplab_v3/checkpoints/train/model.ckpt.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77391ecec1ab80a707e2a06da9742cff0f6e4c321f9f6965a49079f96f91e44
3
+ size 469051740
models/deeplab_v3/checkpoints/train/model.ckpt.index ADDED
Binary file (29.1 kB). View file
 
models/deeplab_v3/checkpoints/train/model.ckpt.meta ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd13d4322781ff1034bc7fbf98ad80e52d68eec8e8d3707a252402b54760122
3
+ size 5094997
models/deeplab_v3/source.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ https://github.com/sthalles/deeplab_v3
2
+ https://www.dropbox.com/scl/fo/bf33snucsueb0pu4bwu74/AMkUv-quobAUFB_nz6LSQTg
models/deeplabv3-mobilevit-small (apple)/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/deeplabv3-mobilevit-small (apple)/LICENSE ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is
2
+ specifically developed and released by Apple Inc. ("Apple") for the sole purpose
3
+ of scientific research of artificial intelligence and machine-learning
4
+ technology. “Apple Machine Learning Research Model” means the model, including
5
+ but not limited to algorithms, formulas, trained model weights, parameters,
6
+ configurations, checkpoints, and any related materials (including
7
+ documentation).
8
+
9
+ This Apple Machine Learning Research Model is provided to You by
10
+ Apple in consideration of your agreement to the following terms, and your use,
11
+ modification, creation of Model Derivatives, and or redistribution of the Apple
12
+ Machine Learning Research Model constitutes acceptance of this Agreement. If You
13
+ do not agree with these terms, please do not use, modify, create Model
14
+ Derivatives of, or distribute this Apple Machine Learning Research Model or
15
+ Model Derivatives.
16
+
17
+ * License Scope: In consideration of your agreement to abide by the following
18
+ terms, and subject to these terms, Apple hereby grants you a personal,
19
+ non-exclusive, worldwide, non-transferable, royalty-free, revocable, and
20
+ limited license, to use, copy, modify, distribute, and create Model
21
+ Derivatives (defined below) of the Apple Machine Learning Research Model
22
+ exclusively for Research Purposes. You agree that any Model Derivatives You
23
+ may create or that may be created for You will be limited to Research Purposes
24
+ as well. “Research Purposes” means non-commercial scientific research and
25
+ academic development activities, such as experimentation, analysis, testing
26
+ conducted by You with the sole intent to advance scientific knowledge and
27
+ research. “Research Purposes” does not include any commercial exploitation,
28
+ product development or use in any commercial product or service.
29
+
30
+ * Distribution of Apple Machine Learning Research Model and Model Derivatives:
31
+ If you choose to redistribute Apple Machine Learning Research Model or its
32
+ Model Derivatives, you must provide a copy of this Agreement to such third
33
+ party, and ensure that the following attribution notice be provided: “Apple
34
+ Machine Learning Research Model is licensed under the Apple Machine Learning
35
+ Research Model License Agreement.” Additionally, all Model Derivatives must
36
+ clearly be identified as such, including disclosure of modifications and
37
+ changes made to the Apple Machine Learning Research Model. The name,
38
+ trademarks, service marks or logos of Apple may not be used to endorse or
39
+ promote Model Derivatives or the relationship between You and Apple. “Model
40
+ Derivatives” means any models or any other artifacts created by modifications,
41
+ improvements, adaptations, alterations to the architecture, algorithm or
42
+ training processes of the Apple Machine Learning Research Model, or by any
43
+ retraining, fine-tuning of the Apple Machine Learning Research Model.
44
+
45
+ * No Other License: Except as expressly stated in this notice, no other rights
46
+ or licenses, express or implied, are granted by Apple herein, including but
47
+ not limited to any patent, trademark, and similar intellectual property rights
48
+ worldwide that may be infringed by the Apple Machine Learning Research Model,
49
+ the Model Derivatives or by other works in which the Apple Machine Learning
50
+ Research Model may be incorporated.
51
+
52
+ * Compliance with Laws: Your use of Apple Machine Learning Research Model must
53
+ be in compliance with all applicable laws and regulations.
54
+
55
+ * Term and Termination: The term of this Agreement will begin upon your
56
+ acceptance of this Agreement or use of the Apple Machine Learning Research
57
+ Model and will continue until terminated in accordance with the following
58
+ terms. Apple may terminate this Agreement at any time if You are in breach of
59
+ any term or condition of this Agreement. Upon termination of this Agreement,
60
+ You must cease to use all Apple Machine Learning Research Models and Model
61
+ Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will
62
+ survive termination.
63
+
64
+ * Disclaimer and Limitation of Liability: This Apple Machine Learning Research
65
+ Model and any outputs generated by the Apple Machine Learning Research Model
66
+ are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR
67
+ IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
68
+ NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE,
69
+ REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY
70
+ THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for
71
+ determining the appropriateness of using or redistributing the Apple Machine
72
+ Learning Research Model and any outputs of the Apple Machine Learning Research
73
+ Model and assume any risks associated with Your use of the Apple Machine
74
+ Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE
75
+ LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
76
+ IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF
77
+ THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE
78
+ LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT,
79
+ TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS
80
+ BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
81
+
82
+ * Governing Law: This Agreement will be governed by and construed under the laws
83
+ of the State of California without regard to its choice of law principles. The
84
+ Convention on Contracts for the International Sale of Goods shall not apply to
85
+ the Agreement except that the arbitration clause and any arbitration hereunder
86
+ shall be governed by the Federal Arbitration Act, Chapters 1 and 2. 
87
+
88
+ Copyright (C) 2025 Apple Inc. All Rights Reserved.
models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f34ba0fd085efa3e1ea9c2217bd201c5b28b4a458748553f7a4ccfed1274b56
3
+ size 147826
models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e50a89dd6be1e3ba7e4df23be4f2d79a081d443c1e498536377d30b8e5fb3a29
3
+ size 25418432
models/deeplabv3-mobilevit-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "FBABE180-594F-4894-9881-F3B3D807D27D": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "4D7D9A73-AEEC-412D-A20C-7AA2C0F806EF"
18
+ }
models/deeplabv3-mobilevit-small (apple)/README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apple-amlr
3
+ tags:
4
+ - vision
5
+ - image-segmentation
6
+ datasets:
7
+ - pascal-voc
8
+ widget:
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-2.jpg
10
+ example_title: Cat
11
+ ---
12
+
13
+ # MobileViT + DeepLabV3 (small-sized model)
14
+
15
+ MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
16
+
17
+ Disclaimer: The team releasing MobileViT did not write a model card for this model so this model card has been written by the Hugging Face team.
18
+
19
+ ## Model description
20
+
21
+ MobileViT is a light-weight, low latency convolutional neural network that combines MobileNetV2-style layers with a new block that replaces local processing in convolutions with global processing using transformers. As with ViT (Vision Transformer), the image data is converted into flattened patches before it is processed by the transformer layers. Afterwards, the patches are "unflattened" back into feature maps. This allows the MobileViT-block to be placed anywhere inside a CNN. MobileViT does not require any positional embeddings.
22
+
23
+ The model in this repo adds a [DeepLabV3](https://arxiv.org/abs/1706.05587) head to the MobileViT backbone for semantic segmentation.
24
+
25
+ ## Intended uses & limitations
26
+
27
+ You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?search=mobilevit) to look for fine-tuned versions on a task that interests you.
28
+
29
+ ### How to use
30
+
31
+ Here is how to use this model:
32
+
33
+ ```python
34
+ from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
35
+ from PIL import Image
36
+ import requests
37
+
38
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
39
+ image = Image.open(requests.get(url, stream=True).raw)
40
+
41
+ feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
42
+ model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
43
+
44
+ inputs = feature_extractor(images=image, return_tensors="pt")
45
+
46
+ outputs = model(**inputs)
47
+ logits = outputs.logits
48
+ predicted_mask = logits.argmax(1).squeeze(0)
49
+ ```
50
+
51
+ Currently, both the feature extractor and model support PyTorch.
52
+
53
+ ## Training data
54
+
55
+ The MobileViT + DeepLabV3 model was pretrained on [ImageNet-1k](https://huggingface.co/datasets/imagenet-1k), a dataset consisting of 1 million images and 1,000 classes, and then fine-tuned on the [PASCAL VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/) dataset.
56
+
57
+ ## Training procedure
58
+
59
+ ### Preprocessing
60
+
61
+ At inference time, images are center-cropped at 512x512. Pixels are normalized to the range [0, 1]. Images are expected to be in BGR pixel order, not RGB.
62
+
63
+ ### Pretraining
64
+
65
+ The MobileViT networks are trained from scratch for 300 epochs on ImageNet-1k on 8 NVIDIA GPUs with an effective batch size of 1024 and learning rate warmup for 3k steps, followed by cosine annealing. Also used were label smoothing cross-entropy loss and L2 weight decay. Training resolution varies from 160x160 to 320x320, using multi-scale sampling.
66
+
67
+ To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC dataset using 4 NVIDIA A100 GPUs.
68
+
69
+ ## Evaluation results
70
+
71
+ | Model | PASCAL VOC mIOU | # params | URL |
72
+ |------------------|-----------------|-----------|-----------------------------------------------------------|
73
+ | MobileViT-XXS | 73.6 | 1.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
74
+ | MobileViT-XS | 77.1 | 2.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-x-small |
75
+ | **MobileViT-S** | **79.1** | **6.4 M** | https://huggingface.co/apple/deeplabv3-mobilevit-small |
76
+
77
+ ### BibTeX entry and citation info
78
+
79
+ ```bibtex
80
+ @inproceedings{vision-transformer,
81
+ title = {MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer},
82
+ author = {Sachin Mehta and Mohammad Rastegari},
83
+ year = {2022},
84
+ URL = {https://arxiv.org/abs/2110.02178}
85
+ }
86
+ ```
models/deeplabv3-mobilevit-small (apple)/config.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MobileViTForSemanticSegmentation"
4
+ ],
5
+ "aspp_dropout_prob": 0.1,
6
+ "aspp_out_channels": 256,
7
+ "atrous_rates": [
8
+ 6,
9
+ 12,
10
+ 18
11
+ ],
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "classifier_dropout_prob": 0.1,
14
+ "conv_kernel_size": 3,
15
+ "expand_ratio": 4.0,
16
+ "hidden_act": "silu",
17
+ "hidden_dropout_prob": 0.1,
18
+ "hidden_sizes": [
19
+ 144,
20
+ 192,
21
+ 240
22
+ ],
23
+ "id2label": {
24
+ "0": "background",
25
+ "1": "aeroplane",
26
+ "2": "bicycle",
27
+ "3": "bird",
28
+ "4": "boat",
29
+ "5": "bottle",
30
+ "6": "bus",
31
+ "7": "car",
32
+ "8": "cat",
33
+ "9": "chair",
34
+ "10": "cow",
35
+ "11": "diningtable",
36
+ "12": "dog",
37
+ "13": "horse",
38
+ "14": "motorbike",
39
+ "15": "person",
40
+ "16": "pottedplant",
41
+ "17": "sheep",
42
+ "18": "sofa",
43
+ "19": "train",
44
+ "20": "tvmonitor"
45
+ },
46
+ "image_size": 512,
47
+ "initializer_range": 0.02,
48
+ "label2id": {
49
+ "aeroplane": 1,
50
+ "background": 0,
51
+ "bicycle": 2,
52
+ "bird": 3,
53
+ "boat": 4,
54
+ "bottle": 5,
55
+ "bus": 6,
56
+ "car": 7,
57
+ "cat": 8,
58
+ "chair": 9,
59
+ "cow": 10,
60
+ "diningtable": 11,
61
+ "dog": 12,
62
+ "horse": 13,
63
+ "motorbike": 14,
64
+ "person": 15,
65
+ "pottedplant": 16,
66
+ "sheep": 17,
67
+ "sofa": 18,
68
+ "train": 19,
69
+ "tvmonitor": 20
70
+ },
71
+ "layer_norm_eps": 1e-05,
72
+ "mlp_ratio": 2.0,
73
+ "model_type": "mobilevit",
74
+ "neck_hidden_sizes": [
75
+ 16,
76
+ 32,
77
+ 64,
78
+ 96,
79
+ 128,
80
+ 160,
81
+ 640
82
+ ],
83
+ "num_attention_heads": 4,
84
+ "num_channels": 3,
85
+ "output_stride": 16,
86
+ "patch_size": 2,
87
+ "qkv_bias": true,
88
+ "semantic_loss_ignore_index": 255,
89
+ "torch_dtype": "float32",
90
+ "transformers_version": "4.20.0.dev0"
91
+ }
models/deeplabv3-mobilevit-small (apple)/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 512,
3
+ "do_center_crop": true,
4
+ "do_flip_channels": true,
5
+ "do_resize": true,
6
+ "feature_extractor_type": "MobileViTFeatureExtractor",
7
+ "resample": 2,
8
+ "size": 544
9
+ }
models/deeplabv3-mobilevit-small (apple)/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e68a534df237d8b89aa9209c815976b4b34f49a4e8107f630fd799697e98291
3
+ size 25615631
models/deeplabv3-mobilevit-small (apple)/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/apple/deeplabv3-mobilevit-small
models/deeplabv3-mobilevit-small (apple)/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e14ab532bd4b573c60e4f4c6639de6176db4c35c803cc7c0ba05fdb16e5b3de
3
+ size 25943848
models/deeplabv3-mobilevit-small/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/deeplabv3-mobilevit-small/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: apple/deeplabv3-mobilevit-small
3
+ library_name: transformers.js
4
+ pipeline_tag: image-segmentation
5
+ ---
6
+
7
+ https://huggingface.co/apple/deeplabv3-mobilevit-small with ONNX weights to be compatible with Transformers.js.
8
+
9
+ Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
models/deeplabv3-mobilevit-small/config.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "apple/deeplabv3-mobilevit-small",
3
+ "architectures": [
4
+ "MobileViTForSemanticSegmentation"
5
+ ],
6
+ "aspp_dropout_prob": 0.1,
7
+ "aspp_out_channels": 256,
8
+ "atrous_rates": [
9
+ 6,
10
+ 12,
11
+ 18
12
+ ],
13
+ "attention_probs_dropout_prob": 0.0,
14
+ "classifier_dropout_prob": 0.1,
15
+ "conv_kernel_size": 3,
16
+ "expand_ratio": 4.0,
17
+ "hidden_act": "silu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_sizes": [
20
+ 144,
21
+ 192,
22
+ 240
23
+ ],
24
+ "id2label": {
25
+ "0": "background",
26
+ "1": "aeroplane",
27
+ "2": "bicycle",
28
+ "3": "bird",
29
+ "4": "boat",
30
+ "5": "bottle",
31
+ "6": "bus",
32
+ "7": "car",
33
+ "8": "cat",
34
+ "9": "chair",
35
+ "10": "cow",
36
+ "11": "diningtable",
37
+ "12": "dog",
38
+ "13": "horse",
39
+ "14": "motorbike",
40
+ "15": "person",
41
+ "16": "pottedplant",
42
+ "17": "sheep",
43
+ "18": "sofa",
44
+ "19": "train",
45
+ "20": "tvmonitor"
46
+ },
47
+ "image_size": 512,
48
+ "initializer_range": 0.02,
49
+ "label2id": {
50
+ "aeroplane": 1,
51
+ "background": 0,
52
+ "bicycle": 2,
53
+ "bird": 3,
54
+ "boat": 4,
55
+ "bottle": 5,
56
+ "bus": 6,
57
+ "car": 7,
58
+ "cat": 8,
59
+ "chair": 9,
60
+ "cow": 10,
61
+ "diningtable": 11,
62
+ "dog": 12,
63
+ "horse": 13,
64
+ "motorbike": 14,
65
+ "person": 15,
66
+ "pottedplant": 16,
67
+ "sheep": 17,
68
+ "sofa": 18,
69
+ "train": 19,
70
+ "tvmonitor": 20
71
+ },
72
+ "layer_norm_eps": 1e-05,
73
+ "mlp_ratio": 2.0,
74
+ "model_type": "mobilevit",
75
+ "neck_hidden_sizes": [
76
+ 16,
77
+ 32,
78
+ 64,
79
+ 96,
80
+ 128,
81
+ 160,
82
+ 640
83
+ ],
84
+ "num_attention_heads": 4,
85
+ "num_channels": 3,
86
+ "output_stride": 16,
87
+ "patch_size": 2,
88
+ "qkv_bias": true,
89
+ "semantic_loss_ignore_index": 255,
90
+ "transformers_version": "4.30.2"
91
+ }
models/deeplabv3-mobilevit-small/onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ebc9436f8387fac8595e99566c3b4eae2802bd614924468d7a3d0948e4dbd7
3
+ size 25725066
models/deeplabv3-mobilevit-small/onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef568c975bfcaee201535f4b27855a370cd88891000f5d5573b686d69caa49b
3
+ size 13127014
models/deeplabv3-mobilevit-small/onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb155ceb71ffad0b6785f8101cfa177e4dba18b906511044348c4dd094117598
3
+ size 7095228
models/deeplabv3-mobilevit-small/preprocessor_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 512,
4
+ "width": 512
5
+ },
6
+ "do_center_crop": true,
7
+ "do_flip_channel_order": true,
8
+ "do_flip_channels": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "MobileViTFeatureExtractor",
12
+ "image_processor_type": "MobileViTFeatureExtractor",
13
+ "resample": 2,
14
+ "rescale_factor": 0.00392156862745098,
15
+ "size": {
16
+ "shortest_edge": 544
17
+ }
18
+ }
models/deeplabv3-mobilevit-small/quant_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Gather",
9
+ "Softmax",
10
+ "GlobalAveragePool",
11
+ "Transpose",
12
+ "Relu",
13
+ "Concat",
14
+ "ReduceMean",
15
+ "Resize",
16
+ "Cast",
17
+ "Shape",
18
+ "Div",
19
+ "Constant",
20
+ "Slice",
21
+ "Pow",
22
+ "Sqrt",
23
+ "Reshape",
24
+ "Unsqueeze",
25
+ "MatMul",
26
+ "Conv",
27
+ "Mul",
28
+ "Sub",
29
+ "Sigmoid"
30
+ ],
31
+ "weight_type": "QUInt8"
32
+ }
33
+ }
34
+ }
models/deeplabv3-mobilevit-small/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Xenova/deeplabv3-mobilevit-small
models/deeplabv3-mobilevit-x-small (apple)/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/deeplabv3-mobilevit-x-small (apple)/LICENSE ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is
2
+ specifically developed and released by Apple Inc. ("Apple") for the sole purpose
3
+ of scientific research of artificial intelligence and machine-learning
4
+ technology. “Apple Machine Learning Research Model” means the model, including
5
+ but not limited to algorithms, formulas, trained model weights, parameters,
6
+ configurations, checkpoints, and any related materials (including
7
+ documentation).
8
+
9
+ This Apple Machine Learning Research Model is provided to You by
10
+ Apple in consideration of your agreement to the following terms, and your use,
11
+ modification, creation of Model Derivatives, and or redistribution of the Apple
12
+ Machine Learning Research Model constitutes acceptance of this Agreement. If You
13
+ do not agree with these terms, please do not use, modify, create Model
14
+ Derivatives of, or distribute this Apple Machine Learning Research Model or
15
+ Model Derivatives.
16
+
17
+ * License Scope: In consideration of your agreement to abide by the following
18
+ terms, and subject to these terms, Apple hereby grants you a personal,
19
+ non-exclusive, worldwide, non-transferable, royalty-free, revocable, and
20
+ limited license, to use, copy, modify, distribute, and create Model
21
+ Derivatives (defined below) of the Apple Machine Learning Research Model
22
+ exclusively for Research Purposes. You agree that any Model Derivatives You
23
+ may create or that may be created for You will be limited to Research Purposes
24
+ as well. “Research Purposes” means non-commercial scientific research and
25
+ academic development activities, such as experimentation, analysis, testing
26
+ conducted by You with the sole intent to advance scientific knowledge and
27
+ research. “Research Purposes” does not include any commercial exploitation,
28
+ product development or use in any commercial product or service.
29
+
30
+ * Distribution of Apple Machine Learning Research Model and Model Derivatives:
31
+ If you choose to redistribute Apple Machine Learning Research Model or its
32
+ Model Derivatives, you must provide a copy of this Agreement to such third
33
+ party, and ensure that the following attribution notice be provided: “Apple
34
+ Machine Learning Research Model is licensed under the Apple Machine Learning
35
+ Research Model License Agreement.” Additionally, all Model Derivatives must
36
+ clearly be identified as such, including disclosure of modifications and
37
+ changes made to the Apple Machine Learning Research Model. The name,
38
+ trademarks, service marks or logos of Apple may not be used to endorse or
39
+ promote Model Derivatives or the relationship between You and Apple. “Model
40
+ Derivatives” means any models or any other artifacts created by modifications,
41
+ improvements, adaptations, alterations to the architecture, algorithm or
42
+ training processes of the Apple Machine Learning Research Model, or by any
43
+ retraining, fine-tuning of the Apple Machine Learning Research Model.
44
+
45
+ * No Other License: Except as expressly stated in this notice, no other rights
46
+ or licenses, express or implied, are granted by Apple herein, including but
47
+ not limited to any patent, trademark, and similar intellectual property rights
48
+ worldwide that may be infringed by the Apple Machine Learning Research Model,
49
+ the Model Derivatives or by other works in which the Apple Machine Learning
50
+ Research Model may be incorporated.
51
+
52
+ * Compliance with Laws: Your use of Apple Machine Learning Research Model must
53
+ be in compliance with all applicable laws and regulations.
54
+
55
+ * Term and Termination: The term of this Agreement will begin upon your
56
+ acceptance of this Agreement or use of the Apple Machine Learning Research
57
+ Model and will continue until terminated in accordance with the following
58
+ terms. Apple may terminate this Agreement at any time if You are in breach of
59
+ any term or condition of this Agreement. Upon termination of this Agreement,
60
+ You must cease to use all Apple Machine Learning Research Models and Model
61
+ Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will
62
+ survive termination.
63
+
64
+ * Disclaimer and Limitation of Liability: This Apple Machine Learning Research
65
+ Model and any outputs generated by the Apple Machine Learning Research Model
66
+ are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR
67
+ IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
68
+ NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE,
69
+ REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY
70
+ THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for
71
+ determining the appropriateness of using or redistributing the Apple Machine
72
+ Learning Research Model and any outputs of the Apple Machine Learning Research
73
+ Model and assume any risks associated with Your use of the Apple Machine
74
+ Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE
75
+ LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
76
+ IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF
77
+ THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE
78
+ LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT,
79
+ TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS
80
+ BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
81
+
82
+ * Governing Law: This Agreement will be governed by and construed under the laws
83
+ of the State of California without regard to its choice of law principles. The
84
+ Convention on Contracts for the International Sale of Goods shall not apply to
85
+ the Agreement except that the arbitration clause and any arbitration hereunder
86
+ shall be governed by the Federal Arbitration Act, Chapters 1 and 2. 
87
+
88
+ Copyright (C) 2025 Apple Inc. All Rights Reserved.
models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956d0bcc1ee6a542a38da38b41c336c67133d1a0d042cc25e2d5c614b8204a2e
3
+ size 147391
models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a953a528a0d96f99cb90985edb269343c3388448f085cbdb674b73a8e801bf5
3
+ size 11770752
models/deeplabv3-mobilevit-x-small (apple)/MobileViT_DeepLabV3.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "68772B66-C952-4603-A56D-D7A693B54D42": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "A74A3117-90A0-44A8-A884-981A9F31DC56": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "A74A3117-90A0-44A8-A884-981A9F31DC56"
18
+ }
models/deeplabv3-mobilevit-x-small (apple)/README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - vision
5
+ - image-segmentation
6
+ datasets:
7
+ - pascal-voc
8
+ widget:
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-2.jpg
10
+ example_title: Cat
11
+ ---
12
+
13
+ # MobileViT + DeepLabV3 (extra small-sized model)
14
+
15
+ MobileViT model pre-trained on PASCAL VOC at resolution 512x512. It was introduced in [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari, and first released in [this repository](https://github.com/apple/ml-cvnets). The license used is [Apple sample code license](https://github.com/apple/ml-cvnets/blob/main/LICENSE).
16
+
17
+ Disclaimer: The team releasing MobileViT did not write a model card for this model so this model card has been written by the Hugging Face team.
18
+
19
+ ## Model description
20
+
21
+ MobileViT is a light-weight, low latency convolutional neural network that combines MobileNetV2-style layers with a new block that replaces local processing in convolutions with global processing using transformers. As with ViT (Vision Transformer), the image data is converted into flattened patches before it is processed by the transformer layers. Afterwards, the patches are "unflattened" back into feature maps. This allows the MobileViT-block to be placed anywhere inside a CNN. MobileViT does not require any positional embeddings.
22
+
23
+ The model in this repo adds a [DeepLabV3](https://arxiv.org/abs/1706.05587) head to the MobileViT backbone for semantic segmentation.
24
+
25
+ ## Intended uses & limitations
26
+
27
+ You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?search=mobilevit) to look for fine-tuned versions on a task that interests you.
28
+
29
+ ### How to use
30
+
31
+ Here is how to use this model:
32
+
33
+ ```python
34
+ from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
35
+ from PIL import Image
36
+ import requests
37
+
38
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
39
+ image = Image.open(requests.get(url, stream=True).raw)
40
+
41
+ feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-x-small")
42
+ model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-x-small")
43
+
44
+ inputs = feature_extractor(images=image, return_tensors="pt")
45
+
46
+ outputs = model(**inputs)
47
+ logits = outputs.logits
48
+ predicted_mask = logits.argmax(1).squeeze(0)
49
+ ```
50
+
51
+ Currently, both the feature extractor and model support PyTorch.
52
+
53
+ ## Training data
54
+
55
+ The MobileViT + DeepLabV3 model was pretrained on [ImageNet-1k](https://huggingface.co/datasets/imagenet-1k), a dataset consisting of 1 million images and 1,000 classes, and then fine-tuned on the [PASCAL VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/) dataset.
56
+
57
+ ## Training procedure
58
+
59
+ ### Preprocessing
60
+
61
+ At inference time, images are center-cropped at 512x512. Pixels are normalized to the range [0, 1]. Images are expected to be in BGR pixel order, not RGB.
62
+
63
+ ### Pretraining
64
+
65
+ The MobileViT networks are trained from scratch for 300 epochs on ImageNet-1k on 8 NVIDIA GPUs with an effective batch size of 1024 and learning rate warmup for 3k steps, followed by cosine annealing. Also used were label smoothing cross-entropy loss and L2 weight decay. Training resolution varies from 160x160 to 320x320, using multi-scale sampling.
66
+
67
+ To obtain the DeepLabV3 model, MobileViT was fine-tuned on the PASCAL VOC dataset using 4 NVIDIA A100 GPUs.
68
+
69
+ ## Evaluation results
70
+
71
+ | Model | PASCAL VOC mIOU | # params | URL |
72
+ |------------------|-----------------|-----------|-----------------------------------------------------------|
73
+ | MobileViT-XXS | 73.6 | 1.9 M | https://huggingface.co/apple/deeplabv3-mobilevit-xx-small |
74
+ | **MobileViT-XS** | **77.1** | **2.9 M** | https://huggingface.co/apple/deeplabv3-mobilevit-x-small |
75
+ | MobileViT-S | 79.1 | 6.4 M | https://huggingface.co/apple/deeplabv3-mobilevit-small |
76
+
77
+ ### BibTeX entry and citation info
78
+
79
+ ```bibtex
80
+ @inproceedings{vision-transformer,
81
+ title = {MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer},
82
+ author = {Sachin Mehta and Mohammad Rastegari},
83
+ year = {2022},
84
+ URL = {https://arxiv.org/abs/2110.02178}
85
+ }
86
+ ```
models/deeplabv3-mobilevit-x-small (apple)/config.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MobileViTForSemanticSegmentation"
4
+ ],
5
+ "aspp_dropout_prob": 0.1,
6
+ "aspp_out_channels": 256,
7
+ "atrous_rates": [
8
+ 6,
9
+ 12,
10
+ 18
11
+ ],
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "classifier_dropout_prob": 0.1,
14
+ "conv_kernel_size": 3,
15
+ "expand_ratio": 4.0,
16
+ "hidden_act": "silu",
17
+ "hidden_dropout_prob": 0.1,
18
+ "hidden_sizes": [
19
+ 96,
20
+ 120,
21
+ 144
22
+ ],
23
+ "id2label": {
24
+ "0": "background",
25
+ "1": "aeroplane",
26
+ "2": "bicycle",
27
+ "3": "bird",
28
+ "4": "boat",
29
+ "5": "bottle",
30
+ "6": "bus",
31
+ "7": "car",
32
+ "8": "cat",
33
+ "9": "chair",
34
+ "10": "cow",
35
+ "11": "diningtable",
36
+ "12": "dog",
37
+ "13": "horse",
38
+ "14": "motorbike",
39
+ "15": "person",
40
+ "16": "pottedplant",
41
+ "17": "sheep",
42
+ "18": "sofa",
43
+ "19": "train",
44
+ "20": "tvmonitor"
45
+ },
46
+ "image_size": 512,
47
+ "initializer_range": 0.02,
48
+ "label2id": {
49
+ "aeroplane": 1,
50
+ "background": 0,
51
+ "bicycle": 2,
52
+ "bird": 3,
53
+ "boat": 4,
54
+ "bottle": 5,
55
+ "bus": 6,
56
+ "car": 7,
57
+ "cat": 8,
58
+ "chair": 9,
59
+ "cow": 10,
60
+ "diningtable": 11,
61
+ "dog": 12,
62
+ "horse": 13,
63
+ "motorbike": 14,
64
+ "person": 15,
65
+ "pottedplant": 16,
66
+ "sheep": 17,
67
+ "sofa": 18,
68
+ "train": 19,
69
+ "tvmonitor": 20
70
+ },
71
+ "layer_norm_eps": 1e-05,
72
+ "mlp_ratio": 2.0,
73
+ "model_type": "mobilevit",
74
+ "neck_hidden_sizes": [
75
+ 16,
76
+ 32,
77
+ 48,
78
+ 64,
79
+ 80,
80
+ 96,
81
+ 384
82
+ ],
83
+ "num_attention_heads": 4,
84
+ "num_channels": 3,
85
+ "output_stride": 16,
86
+ "patch_size": 2,
87
+ "qkv_bias": true,
88
+ "semantic_loss_ignore_index": 255,
89
+ "torch_dtype": "float32",
90
+ "transformers_version": "4.20.0.dev0"
91
+ }
models/deeplabv3-mobilevit-x-small (apple)/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 512,
3
+ "do_center_crop": true,
4
+ "do_flip_channels": true,
5
+ "do_resize": true,
6
+ "feature_extractor_type": "MobileViTFeatureExtractor",
7
+ "resample": 2,
8
+ "size": 544
9
+ }