Someshfengde commited on
Commit
38a21f4
1 Parent(s): e42285e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: timm
4
+ tags:
5
+ - image-classification
6
+ - timm
7
+ datasets:
8
+ - imagenet-1k
9
+ ---
10
+ # Model card for swinv2_tiny_window16_256.ms_in1k
11
+
12
+ A Swin Transformer V2 image classification model. Pretrained on ImageNet-1k by paper authors.
13
+
14
+
15
+ ## Model Details
16
+ - **Model Type:** Image classification / feature backbone
17
+ - **Model Stats:**
18
+ - Params (M): 28.3
19
+ - GMACs: 6.7
20
+ - Activations (M): 39.0
21
+ - Image size: 256 x 256
22
+ - **Papers:**
23
+ - Swin Transformer V2: Scaling Up Capacity and Resolution: https://arxiv.org/abs/2111.09883
24
+ - **Original:** https://github.com/microsoft/Swin-Transformer
25
+ - **Dataset:** ImageNet-1k
26
+
27
+ ## Model Usage
28
+ ### Image Classification
29
+ ```python
30
+ from urllib.request import urlopen
31
+ from PIL import Image
32
+ import timm
33
+
34
+ img = Image.open(urlopen(
35
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
36
+ ))
37
+
38
+ model = timm.create_model('swinv2_tiny_window16_256.ms_in1k', pretrained=True)
39
+ model = model.eval()
40
+
41
+ # get model specific transforms (normalization, resize)
42
+ data_config = timm.data.resolve_model_data_config(model)
43
+ transforms = timm.data.create_transform(**data_config, is_training=False)
44
+
45
+ output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1
46
+
47
+ top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
48
+ ```
49
+
50
+ ### Feature Map Extraction
51
+ ```python
52
+ from urllib.request import urlopen
53
+ from PIL import Image
54
+ import timm
55
+
56
+ img = Image.open(urlopen(
57
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
58
+ ))
59
+
60
+ model = timm.create_model(
61
+ 'swinv2_tiny_window16_256.ms_in1k',
62
+ pretrained=True,
63
+ features_only=True,
64
+ )
65
+ model = model.eval()
66
+
67
+ # get model specific transforms (normalization, resize)
68
+ data_config = timm.data.resolve_model_data_config(model)
69
+ transforms = timm.data.create_transform(**data_config, is_training=False)
70
+
71
+ output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1
72
+
73
+ for o in output:
74
+ # print shape of each feature map in output
75
+ # e.g. for swin_base_patch4_window7_224 (NHWC output)
76
+ # torch.Size([1, 56, 56, 128])
77
+ # torch.Size([1, 28, 28, 256])
78
+ # torch.Size([1, 14, 14, 512])
79
+ # torch.Size([1, 7, 7, 1024])
80
+ # e.g. for swinv2_cr_small_ns_224 (NCHW output)
81
+ # torch.Size([1, 96, 56, 56])
82
+ # torch.Size([1, 192, 28, 28])
83
+ # torch.Size([1, 384, 14, 14])
84
+ # torch.Size([1, 768, 7, 7])
85
+ print(o.shape)
86
+ ```
87
+
88
+ ### Image Embeddings
89
+ ```python
90
+ from urllib.request import urlopen
91
+ from PIL import Image
92
+ import timm
93
+
94
+ img = Image.open(urlopen(
95
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
96
+ ))
97
+
98
+ model = timm.create_model(
99
+ 'swinv2_tiny_window16_256.ms_in1k',
100
+ pretrained=True,
101
+ num_classes=0, # remove classifier nn.Linear
102
+ )
103
+ model = model.eval()
104
+
105
+ # get model specific transforms (normalization, resize)
106
+ data_config = timm.data.resolve_model_data_config(model)
107
+ transforms = timm.data.create_transform(**data_config, is_training=False)
108
+
109
+ output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_features) shaped tensor
110
+
111
+ # or equivalently (without needing to set num_classes=0)
112
+
113
+ output = model.forward_features(transforms(img).unsqueeze(0))
114
+ # output is unpooled (ie.e a (batch_size, H, W, num_features) tensor for swin / swinv2
115
+ # or (batch_size, num_features, H, W) for swinv2_cr
116
+
117
+ output = model.forward_head(output, pre_logits=True)
118
+ # output is (batch_size, num_features) tensor
119
+ ```
120
+
121
+ ## Model Comparison
122
+ Explore the dataset and runtime metrics of this model in timm [model results](https://github.com/huggingface/pytorch-image-models/tree/main/results).
123
+
124
+
125
+ ## Citation
126
+ ```bibtex
127
+ @inproceedings{liu2021swinv2,
128
+ title={Swin Transformer V2: Scaling Up Capacity and Resolution},
129
+ author={Ze Liu and Han Hu and Yutong Lin and Zhuliang Yao and Zhenda Xie and Yixuan Wei and Jia Ning and Yue Cao and Zheng Zhang and Li Dong and Furu Wei and Baining Guo},
130
+ booktitle={International Conference on Computer Vision and Pattern Recognition (CVPR)},
131
+ year={2022}
132
+ }
133
+ ```
134
+ ```bibtex
135
+ @misc{rw2019timm,
136
+ author = {Ross Wightman},
137
+ title = {PyTorch Image Models},
138
+ year = {2019},
139
+ publisher = {GitHub},
140
+ journal = {GitHub repository},
141
+ doi = {10.5281/zenodo.4414861},
142
+ howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
143
+ }
144
+ ```
SnakeCLEF2024_TestMetadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "swinv2_tiny_window16_256",
3
+ "num_classes": 1000,
4
+ "num_features": 768,
5
+ "global_pool": "avg",
6
+ "pretrained_cfg": {
7
+ "tag": "ms_in1k",
8
+ "custom_load": false,
9
+ "input_size": [
10
+ 3,
11
+ 256,
12
+ 256
13
+ ],
14
+ "fixed_input_size": true,
15
+ "interpolation": "bicubic",
16
+ "crop_pct": 0.9,
17
+ "crop_mode": "center",
18
+ "mean": [
19
+ 0.485,
20
+ 0.456,
21
+ 0.406
22
+ ],
23
+ "std": [
24
+ 0.229,
25
+ 0.224,
26
+ 0.225
27
+ ],
28
+ "num_classes": 1000,
29
+ "pool_size": [
30
+ 8,
31
+ 8
32
+ ],
33
+ "first_conv": "patch_embed.proj",
34
+ "classifier": "head.fc",
35
+ "license": "mit"
36
+ }
37
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc77e7d880e04279619716383a676d8ab39866d3848981c740ac774c0fc2adf2
3
+ size 118653926
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5431a638f8d44f1ea1d67cd65f9d46e05c28eb34b35d697b9739df9beb7fc4d2
3
+ size 118714633
script.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import onnxruntime as ort
4
+ import os
5
+ from tqdm import tqdm
6
+ import timm
7
+ import torchvision.transforms as T
8
+ from PIL import Image
9
+ import torch
10
+
11
+ def is_gpu_available():
12
+ """Check if the python package `onnxruntime-gpu` is installed."""
13
+ return torch.cuda.is_available()
14
+
15
+
16
+ class PytorchWorker:
17
+ """Run inference using ONNX runtime."""
18
+
19
+ def __init__(self, model_path: str, model_name: str, number_of_categories: int = 1604):
20
+
21
+ def _load_model(model_name, model_path):
22
+
23
+ print("Setting up Pytorch Model")
24
+ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
25
+ print(f"Using devide: {self.device}")
26
+
27
+ model = timm.create_model(model_name, num_classes=number_of_categories, pretrained=False)
28
+
29
+ # if not torch.cuda.is_available():
30
+ # model_ckpt = torch.load(model_path, map_location=torch.device("cpu"))
31
+ # else:
32
+ # model_ckpt = torch.load(model_path)
33
+
34
+ model_ckpt = torch.load(model_path, map_location=self.device)
35
+ model.load_state_dict(model_ckpt)
36
+
37
+ return model.to(self.device).eval()
38
+
39
+ self.model = _load_model(model_name, model_path)
40
+
41
+ self.transforms = T.Compose([T.Resize((299, 299)),
42
+ T.ToTensor(),
43
+ T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
44
+
45
+
46
+ def predict_image(self, image: np.ndarray) -> list():
47
+ """Run inference using ONNX runtime.
48
+ :param image: Input image as numpy array.
49
+ :return: A list with logits and confidences.
50
+ """
51
+
52
+ logits = self.model(self.transforms(image).unsqueeze(0).to(self.device))
53
+
54
+ return logits.tolist()
55
+
56
+
57
+ def make_submission(test_metadata, model_path, model_name, output_csv_path="./submission.csv", images_root_path="/tmp/data/private_testset"):
58
+ """Make submission with given """
59
+
60
+ model = PytorchWorker(model_path, model_name)
61
+
62
+ predictions = []
63
+
64
+ for _, row in tqdm(test_metadata.iterrows(), total=len(test_metadata)):
65
+ image_path = os.path.join(images_root_path, row.image_path)
66
+
67
+ test_image = Image.open(image_path).convert("RGB")
68
+
69
+ logits = model.predict_image(test_image)
70
+
71
+ predictions.append(np.argmax(logits))
72
+
73
+ test_metadata["class_id"] = predictions
74
+
75
+ user_pred_df = test_metadata.drop_duplicates("observation_id", keep="first")
76
+ user_pred_df[["observation_id", "class_id"]].to_csv(output_csv_path, index=None)
77
+
78
+
79
+ if __name__ == "__main__":
80
+
81
+ import zipfile
82
+
83
+ with zipfile.ZipFile("/tmp/data/private_testset.zip", 'r') as zip_ref:
84
+ zip_ref.extractall("/tmp/data")
85
+
86
+ MODEL_PATH = "pytorch_model.bin"
87
+ MODEL_NAME = "swinv2_tiny_window16_256.ms_in1k"
88
+
89
+ metadata_file_path = "./SnakeCLEF2024_TestMetadata.csv"
90
+ test_metadata = pd.read_csv(metadata_file_path)
91
+
92
+ make_submission(
93
+ test_metadata=test_metadata,
94
+ model_path=MODEL_PATH,
95
+ model_name=MODEL_NAME
96
+ )