rwightman HF staff commited on
Commit
df7520b
1 Parent(s): ce178da

Update model config and README

Browse files
Files changed (2) hide show
  1. README.md +21 -17
  2. model.safetensors +3 -0
README.md CHANGED
@@ -2,7 +2,7 @@
2
  tags:
3
  - image-classification
4
  - timm
5
- library_tag: timm
6
  license: apache-2.0
7
  datasets:
8
  - imagenet-1k
@@ -14,7 +14,7 @@ A timm specific MaxxViT-V2 (w/ a MLP Log-CPB (continuous log-coordinate relative
14
 
15
  ImageNet-12k pretraining and ImageNet-1k fine-tuning performed on 8x GPU [Lambda Labs](https://lambdalabs.com/) cloud instances..
16
 
17
- ### Model Variants in [maxxvit.py](https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/maxxvit.py)
18
 
19
  MaxxViT covers a number of related model architectures that share a common structure including:
20
  - CoAtNet - Combining MBConv (depthwise-separable) convolutional blocks in early stages with self-attention transformer blocks in later stages.
@@ -47,8 +47,9 @@ from urllib.request import urlopen
47
  from PIL import Image
48
  import timm
49
 
50
- img = Image.open(
51
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
52
 
53
  model = timm.create_model('maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k', pretrained=True)
54
  model = model.eval()
@@ -68,8 +69,9 @@ from urllib.request import urlopen
68
  from PIL import Image
69
  import timm
70
 
71
- img = Image.open(
72
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
73
 
74
  model = timm.create_model(
75
  'maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k',
@@ -86,12 +88,13 @@ output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batc
86
 
87
  for o in output:
88
  # print shape of each feature map in output
89
- # e.g.:
90
- # torch.Size([1, 128, 192, 192])
91
- # torch.Size([1, 128, 96, 96])
92
- # torch.Size([1, 256, 48, 48])
93
- # torch.Size([1, 512, 24, 24])
94
- # torch.Size([1, 1024, 12, 12])
 
95
  print(o.shape)
96
  ```
97
 
@@ -101,8 +104,9 @@ from urllib.request import urlopen
101
  from PIL import Image
102
  import timm
103
 
104
- img = Image.open(
105
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
106
 
107
  model = timm.create_model(
108
  'maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k',
@@ -120,10 +124,10 @@ output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_featu
120
  # or equivalently (without needing to set num_classes=0)
121
 
122
  output = model.forward_features(transforms(img).unsqueeze(0))
123
- # output is unpooled (ie.e a (batch_size, num_features, H, W) tensor
124
 
125
  output = model.forward_head(output, pre_logits=True)
126
- # output is (batch_size, num_features) tensor
127
  ```
128
 
129
  ## Model Comparison
@@ -231,7 +235,7 @@ output = model.forward_head(output, pre_logits=True)
231
  publisher = {GitHub},
232
  journal = {GitHub repository},
233
  doi = {10.5281/zenodo.4414861},
234
- howpublished = {\url{https://github.com/rwightman/pytorch-image-models}}
235
  }
236
  ```
237
  ```bibtex
2
  tags:
3
  - image-classification
4
  - timm
5
+ library_name: timm
6
  license: apache-2.0
7
  datasets:
8
  - imagenet-1k
14
 
15
  ImageNet-12k pretraining and ImageNet-1k fine-tuning performed on 8x GPU [Lambda Labs](https://lambdalabs.com/) cloud instances..
16
 
17
+ ### Model Variants in [maxxvit.py](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/maxxvit.py)
18
 
19
  MaxxViT covers a number of related model architectures that share a common structure including:
20
  - CoAtNet - Combining MBConv (depthwise-separable) convolutional blocks in early stages with self-attention transformer blocks in later stages.
47
  from PIL import Image
48
  import timm
49
 
50
+ img = Image.open(urlopen(
51
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
52
+ ))
53
 
54
  model = timm.create_model('maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k', pretrained=True)
55
  model = model.eval()
69
  from PIL import Image
70
  import timm
71
 
72
+ img = Image.open(urlopen(
73
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
74
+ ))
75
 
76
  model = timm.create_model(
77
  'maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k',
88
 
89
  for o in output:
90
  # print shape of each feature map in output
91
+ # e.g.:
92
+ # torch.Size([1, 128, 112, 112])
93
+ # torch.Size([1, 128, 56, 56])
94
+ # torch.Size([1, 256, 28, 28])
95
+ # torch.Size([1, 512, 14, 14])
96
+ # torch.Size([1, 1024, 7, 7])
97
+
98
  print(o.shape)
99
  ```
100
 
104
  from PIL import Image
105
  import timm
106
 
107
+ img = Image.open(urlopen(
108
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
109
+ ))
110
 
111
  model = timm.create_model(
112
  'maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k',
124
  # or equivalently (without needing to set num_classes=0)
125
 
126
  output = model.forward_features(transforms(img).unsqueeze(0))
127
+ # output is unpooled, a (1, 1024, 7, 7) shaped tensor
128
 
129
  output = model.forward_head(output, pre_logits=True)
130
+ # output is a (1, num_features) shaped tensor
131
  ```
132
 
133
  ## Model Comparison
235
  publisher = {GitHub},
236
  journal = {GitHub repository},
237
  doi = {10.5281/zenodo.4414861},
238
+ howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
239
  }
240
  ```
241
  ```bibtex
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9e591e228878cb29d33e438c1e18e664b3c8a9cbdad6c88924c42fcafbff1a
3
+ size 464437480