Someshfengde
commited on
Commit
•
38a21f4
1
Parent(s):
e42285e
Upload folder using huggingface_hub
Browse files- .gitattributes +0 -1
- README.md +144 -0
- SnakeCLEF2024_TestMetadata.csv +0 -0
- config.json +37 -0
- model.safetensors +3 -0
- pytorch_model.bin +3 -0
- script.py +96 -0
.gitattributes
CHANGED
@@ -25,7 +25,6 @@
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
library_name: timm
|
4 |
+
tags:
|
5 |
+
- image-classification
|
6 |
+
- timm
|
7 |
+
datasets:
|
8 |
+
- imagenet-1k
|
9 |
+
---
|
10 |
+
# Model card for swinv2_tiny_window16_256.ms_in1k
|
11 |
+
|
12 |
+
A Swin Transformer V2 image classification model. Pretrained on ImageNet-1k by paper authors.
|
13 |
+
|
14 |
+
|
15 |
+
## Model Details
|
16 |
+
- **Model Type:** Image classification / feature backbone
|
17 |
+
- **Model Stats:**
|
18 |
+
- Params (M): 28.3
|
19 |
+
- GMACs: 6.7
|
20 |
+
- Activations (M): 39.0
|
21 |
+
- Image size: 256 x 256
|
22 |
+
- **Papers:**
|
23 |
+
- Swin Transformer V2: Scaling Up Capacity and Resolution: https://arxiv.org/abs/2111.09883
|
24 |
+
- **Original:** https://github.com/microsoft/Swin-Transformer
|
25 |
+
- **Dataset:** ImageNet-1k
|
26 |
+
|
27 |
+
## Model Usage
|
28 |
+
### Image Classification
|
29 |
+
```python
|
30 |
+
from urllib.request import urlopen
|
31 |
+
from PIL import Image
|
32 |
+
import timm
|
33 |
+
|
34 |
+
img = Image.open(urlopen(
|
35 |
+
'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
|
36 |
+
))
|
37 |
+
|
38 |
+
model = timm.create_model('swinv2_tiny_window16_256.ms_in1k', pretrained=True)
|
39 |
+
model = model.eval()
|
40 |
+
|
41 |
+
# get model specific transforms (normalization, resize)
|
42 |
+
data_config = timm.data.resolve_model_data_config(model)
|
43 |
+
transforms = timm.data.create_transform(**data_config, is_training=False)
|
44 |
+
|
45 |
+
output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1
|
46 |
+
|
47 |
+
top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
|
48 |
+
```
|
49 |
+
|
50 |
+
### Feature Map Extraction
|
51 |
+
```python
|
52 |
+
from urllib.request import urlopen
|
53 |
+
from PIL import Image
|
54 |
+
import timm
|
55 |
+
|
56 |
+
img = Image.open(urlopen(
|
57 |
+
'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
|
58 |
+
))
|
59 |
+
|
60 |
+
model = timm.create_model(
|
61 |
+
'swinv2_tiny_window16_256.ms_in1k',
|
62 |
+
pretrained=True,
|
63 |
+
features_only=True,
|
64 |
+
)
|
65 |
+
model = model.eval()
|
66 |
+
|
67 |
+
# get model specific transforms (normalization, resize)
|
68 |
+
data_config = timm.data.resolve_model_data_config(model)
|
69 |
+
transforms = timm.data.create_transform(**data_config, is_training=False)
|
70 |
+
|
71 |
+
output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1
|
72 |
+
|
73 |
+
for o in output:
|
74 |
+
# print shape of each feature map in output
|
75 |
+
# e.g. for swin_base_patch4_window7_224 (NHWC output)
|
76 |
+
# torch.Size([1, 56, 56, 128])
|
77 |
+
# torch.Size([1, 28, 28, 256])
|
78 |
+
# torch.Size([1, 14, 14, 512])
|
79 |
+
# torch.Size([1, 7, 7, 1024])
|
80 |
+
# e.g. for swinv2_cr_small_ns_224 (NCHW output)
|
81 |
+
# torch.Size([1, 96, 56, 56])
|
82 |
+
# torch.Size([1, 192, 28, 28])
|
83 |
+
# torch.Size([1, 384, 14, 14])
|
84 |
+
# torch.Size([1, 768, 7, 7])
|
85 |
+
print(o.shape)
|
86 |
+
```
|
87 |
+
|
88 |
+
### Image Embeddings
|
89 |
+
```python
|
90 |
+
from urllib.request import urlopen
|
91 |
+
from PIL import Image
|
92 |
+
import timm
|
93 |
+
|
94 |
+
img = Image.open(urlopen(
|
95 |
+
'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
|
96 |
+
))
|
97 |
+
|
98 |
+
model = timm.create_model(
|
99 |
+
'swinv2_tiny_window16_256.ms_in1k',
|
100 |
+
pretrained=True,
|
101 |
+
num_classes=0, # remove classifier nn.Linear
|
102 |
+
)
|
103 |
+
model = model.eval()
|
104 |
+
|
105 |
+
# get model specific transforms (normalization, resize)
|
106 |
+
data_config = timm.data.resolve_model_data_config(model)
|
107 |
+
transforms = timm.data.create_transform(**data_config, is_training=False)
|
108 |
+
|
109 |
+
output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_features) shaped tensor
|
110 |
+
|
111 |
+
# or equivalently (without needing to set num_classes=0)
|
112 |
+
|
113 |
+
output = model.forward_features(transforms(img).unsqueeze(0))
|
114 |
+
# output is unpooled (ie.e a (batch_size, H, W, num_features) tensor for swin / swinv2
|
115 |
+
# or (batch_size, num_features, H, W) for swinv2_cr
|
116 |
+
|
117 |
+
output = model.forward_head(output, pre_logits=True)
|
118 |
+
# output is (batch_size, num_features) tensor
|
119 |
+
```
|
120 |
+
|
121 |
+
## Model Comparison
|
122 |
+
Explore the dataset and runtime metrics of this model in timm [model results](https://github.com/huggingface/pytorch-image-models/tree/main/results).
|
123 |
+
|
124 |
+
|
125 |
+
## Citation
|
126 |
+
```bibtex
|
127 |
+
@inproceedings{liu2021swinv2,
|
128 |
+
title={Swin Transformer V2: Scaling Up Capacity and Resolution},
|
129 |
+
author={Ze Liu and Han Hu and Yutong Lin and Zhuliang Yao and Zhenda Xie and Yixuan Wei and Jia Ning and Yue Cao and Zheng Zhang and Li Dong and Furu Wei and Baining Guo},
|
130 |
+
booktitle={International Conference on Computer Vision and Pattern Recognition (CVPR)},
|
131 |
+
year={2022}
|
132 |
+
}
|
133 |
+
```
|
134 |
+
```bibtex
|
135 |
+
@misc{rw2019timm,
|
136 |
+
author = {Ross Wightman},
|
137 |
+
title = {PyTorch Image Models},
|
138 |
+
year = {2019},
|
139 |
+
publisher = {GitHub},
|
140 |
+
journal = {GitHub repository},
|
141 |
+
doi = {10.5281/zenodo.4414861},
|
142 |
+
howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
|
143 |
+
}
|
144 |
+
```
|
SnakeCLEF2024_TestMetadata.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architecture": "swinv2_tiny_window16_256",
|
3 |
+
"num_classes": 1000,
|
4 |
+
"num_features": 768,
|
5 |
+
"global_pool": "avg",
|
6 |
+
"pretrained_cfg": {
|
7 |
+
"tag": "ms_in1k",
|
8 |
+
"custom_load": false,
|
9 |
+
"input_size": [
|
10 |
+
3,
|
11 |
+
256,
|
12 |
+
256
|
13 |
+
],
|
14 |
+
"fixed_input_size": true,
|
15 |
+
"interpolation": "bicubic",
|
16 |
+
"crop_pct": 0.9,
|
17 |
+
"crop_mode": "center",
|
18 |
+
"mean": [
|
19 |
+
0.485,
|
20 |
+
0.456,
|
21 |
+
0.406
|
22 |
+
],
|
23 |
+
"std": [
|
24 |
+
0.229,
|
25 |
+
0.224,
|
26 |
+
0.225
|
27 |
+
],
|
28 |
+
"num_classes": 1000,
|
29 |
+
"pool_size": [
|
30 |
+
8,
|
31 |
+
8
|
32 |
+
],
|
33 |
+
"first_conv": "patch_embed.proj",
|
34 |
+
"classifier": "head.fc",
|
35 |
+
"license": "mit"
|
36 |
+
}
|
37 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc77e7d880e04279619716383a676d8ab39866d3848981c740ac774c0fc2adf2
|
3 |
+
size 118653926
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5431a638f8d44f1ea1d67cd65f9d46e05c28eb34b35d697b9739df9beb7fc4d2
|
3 |
+
size 118714633
|
script.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import onnxruntime as ort
|
4 |
+
import os
|
5 |
+
from tqdm import tqdm
|
6 |
+
import timm
|
7 |
+
import torchvision.transforms as T
|
8 |
+
from PIL import Image
|
9 |
+
import torch
|
10 |
+
|
11 |
+
def is_gpu_available():
|
12 |
+
"""Check if the python package `onnxruntime-gpu` is installed."""
|
13 |
+
return torch.cuda.is_available()
|
14 |
+
|
15 |
+
|
16 |
+
class PytorchWorker:
|
17 |
+
"""Run inference using ONNX runtime."""
|
18 |
+
|
19 |
+
def __init__(self, model_path: str, model_name: str, number_of_categories: int = 1604):
|
20 |
+
|
21 |
+
def _load_model(model_name, model_path):
|
22 |
+
|
23 |
+
print("Setting up Pytorch Model")
|
24 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
25 |
+
print(f"Using devide: {self.device}")
|
26 |
+
|
27 |
+
model = timm.create_model(model_name, num_classes=number_of_categories, pretrained=False)
|
28 |
+
|
29 |
+
# if not torch.cuda.is_available():
|
30 |
+
# model_ckpt = torch.load(model_path, map_location=torch.device("cpu"))
|
31 |
+
# else:
|
32 |
+
# model_ckpt = torch.load(model_path)
|
33 |
+
|
34 |
+
model_ckpt = torch.load(model_path, map_location=self.device)
|
35 |
+
model.load_state_dict(model_ckpt)
|
36 |
+
|
37 |
+
return model.to(self.device).eval()
|
38 |
+
|
39 |
+
self.model = _load_model(model_name, model_path)
|
40 |
+
|
41 |
+
self.transforms = T.Compose([T.Resize((299, 299)),
|
42 |
+
T.ToTensor(),
|
43 |
+
T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
|
44 |
+
|
45 |
+
|
46 |
+
def predict_image(self, image: np.ndarray) -> list():
|
47 |
+
"""Run inference using ONNX runtime.
|
48 |
+
:param image: Input image as numpy array.
|
49 |
+
:return: A list with logits and confidences.
|
50 |
+
"""
|
51 |
+
|
52 |
+
logits = self.model(self.transforms(image).unsqueeze(0).to(self.device))
|
53 |
+
|
54 |
+
return logits.tolist()
|
55 |
+
|
56 |
+
|
57 |
+
def make_submission(test_metadata, model_path, model_name, output_csv_path="./submission.csv", images_root_path="/tmp/data/private_testset"):
|
58 |
+
"""Make submission with given """
|
59 |
+
|
60 |
+
model = PytorchWorker(model_path, model_name)
|
61 |
+
|
62 |
+
predictions = []
|
63 |
+
|
64 |
+
for _, row in tqdm(test_metadata.iterrows(), total=len(test_metadata)):
|
65 |
+
image_path = os.path.join(images_root_path, row.image_path)
|
66 |
+
|
67 |
+
test_image = Image.open(image_path).convert("RGB")
|
68 |
+
|
69 |
+
logits = model.predict_image(test_image)
|
70 |
+
|
71 |
+
predictions.append(np.argmax(logits))
|
72 |
+
|
73 |
+
test_metadata["class_id"] = predictions
|
74 |
+
|
75 |
+
user_pred_df = test_metadata.drop_duplicates("observation_id", keep="first")
|
76 |
+
user_pred_df[["observation_id", "class_id"]].to_csv(output_csv_path, index=None)
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
|
81 |
+
import zipfile
|
82 |
+
|
83 |
+
with zipfile.ZipFile("/tmp/data/private_testset.zip", 'r') as zip_ref:
|
84 |
+
zip_ref.extractall("/tmp/data")
|
85 |
+
|
86 |
+
MODEL_PATH = "pytorch_model.bin"
|
87 |
+
MODEL_NAME = "swinv2_tiny_window16_256.ms_in1k"
|
88 |
+
|
89 |
+
metadata_file_path = "./SnakeCLEF2024_TestMetadata.csv"
|
90 |
+
test_metadata = pd.read_csv(metadata_file_path)
|
91 |
+
|
92 |
+
make_submission(
|
93 |
+
test_metadata=test_metadata,
|
94 |
+
model_path=MODEL_PATH,
|
95 |
+
model_name=MODEL_NAME
|
96 |
+
)
|