RitwikGupta
commited on
Commit
•
5107f82
1
Parent(s):
d1302ce
Add weights and configs
Browse files- hiera/configs/hiera_base_1e-4_hyper_224_448.yaml +63 -0
- hiera/configs/hiera_base_1e-4_nonxl_224.yaml +63 -0
- hiera/configs/hiera_baseplus_1e-4_nonxl_224.yaml +63 -0
- hiera/weights/hiera-b-hyper-224-448-top1.ckpt +3 -0
- hiera/weights/hiera-b-nonxl-224-top1.ckpt +3 -0
- hiera/weights/hiera-bplus-nonxl-224-top1.ckpt +3 -0
- swin-b/configs/swin_v2_base_1e-4_nonxl_256.yaml +69 -0
- swin-b/configs/swin_v2_base_1e-4_xl_512_256_context_2chips.yaml +75 -0
- swin-b/configs/swin_v2_base_1e-5_hyper-1layer_512_256.yaml +71 -0
- swin-b/configs/swin_v2_base_1e-5_hyper_256_256.yaml +71 -0
- swin-b/configs/swin_v2_base_1e-5_hyper_512_256.yaml +71 -0
- swin-b/configs/swin_v2_base_1e-5_mamba_512_256.yaml +75 -0
- swin-b/weights/swin-b-hyper-1layer-512-256-top1.ckpt +3 -0
- swin-b/weights/swin-b-hyper-256-256-top1.ckpt +3 -0
- swin-b/weights/swin-b-hyper-512-256-top1.ckpt +3 -0
- swin-b/weights/swin-b-mamba-512-256-top1.ckpt +3 -0
- swin-b/weights/swin-b-nonxl-256-top1.ckpt +3 -0
- swin-l/configs/swin_v2_large_1e-5_hyper-1layer_512_256.yaml +71 -0
- swin-l/configs/swin_v2_large_1e-5_hyper_256_256.yaml +71 -0
- swin-l/configs/swin_v2_large_1e-5_hyper_512_256.yaml +71 -0
- swin-l/configs/swin_v2_large_1e-5_mamba_512_256.yaml +75 -0
- swin-l/configs/swin_v2_large_1e-5_nonxl_256.yaml +70 -0
- swin-l/configs/swin_v2_large_1e-5_xl_512_256_context_2chips.yaml +75 -0
- swin-l/weights/swin-l-hyper-1layer-512-256-top1.ckpt +3 -0
- swin-l/weights/swin-l-hyper-256-256-top1.ckpt +3 -0
- swin-l/weights/swin-l-hyper-512-256-top1.ckpt +3 -0
- swin-l/weights/swin-l-mamba-512-256-top1.ckpt +3 -0
- swin-l/weights/swin-l-nonxl-256-top1.ckpt +3 -0
- swin-s/configs/swin_v2_small_1e-4_hyper-1layer_512_256.yaml +71 -0
- swin-s/configs/swin_v2_small_1e-4_hyper_256_256.yaml +71 -0
- swin-s/configs/swin_v2_small_1e-4_hyper_512_256.yaml +71 -0
- swin-s/configs/swin_v2_small_1e-4_nonxl_256.yaml +69 -0
- swin-s/configs/swin_v2_small_1e-4_xl_512_256_context_2chips.yaml +75 -0
- swin-s/weights/swin-s-hyper-1layer-512-256-top1.ckpt +3 -0
- swin-s/weights/swin-s-hyper-256-256-top1.ckpt +3 -0
- swin-s/weights/swin-s-hyper-512-256-top1.ckpt +3 -0
- swin-s/weights/swin-s-nonxl-256-top1.ckpt +3 -0
- swin-s/weights/swin-s-xl-512-256-top1.ckpt +3 -0
- swin-t/configs/swin_v2_tiny_1e-4_hyper-1layer_512_256.yaml +71 -0
- swin-t/configs/swin_v2_tiny_1e-4_hyper_256_256.yaml +71 -0
- swin-t/configs/swin_v2_tiny_1e-4_hyper_512_256.yaml +71 -0
- swin-t/configs/swin_v2_tiny_1e-4_mamba_512_256.yaml +75 -0
- swin-t/configs/swin_v2_tiny_1e-4_nonxl_256.yaml +69 -0
- swin-t/configs/swin_v2_tiny_1e-4_xl_512_256_context_2chips.yaml +75 -0
- swin-t/weights/swin-t-hyper-1layer-512-256-top1.ckpt +3 -0
- swin-t/weights/swin-t-hyper-256-256-top1.ckpt +3 -0
- swin-t/weights/swin-t-hyper-512-256-top1.ckpt +3 -0
- swin-t/weights/swin-t-mamba-512-256-top1.ckpt +3 -0
- swin-t/weights/swin-t-nonxl-256-top1.ckpt +3 -0
hiera/configs/hiera_base_1e-4_hyper_224_448.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/cluster_paths_inat.yaml
|
4 |
+
name: hiera_base_1e-4_hyper_224_448
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 448
|
8 |
+
val_crop_size: 448
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: false
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoder
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: get_hiera_model
|
34 |
+
backbone:
|
35 |
+
img_size: 224
|
36 |
+
input_dim: 3
|
37 |
+
cls_head: xl
|
38 |
+
context:
|
39 |
+
classification_mode: on
|
40 |
+
tiling: naive_two_stream
|
41 |
+
n_layer: 2
|
42 |
+
optimizer:
|
43 |
+
name: adamw
|
44 |
+
base_lr: 1e-4
|
45 |
+
classifier_ratio: 1.0
|
46 |
+
warmup_epochs: 0
|
47 |
+
train:
|
48 |
+
epochs: 100
|
49 |
+
batch_size: 16
|
50 |
+
val_batch_size: 1
|
51 |
+
freeze_epochs: 0
|
52 |
+
freeze_bn: false
|
53 |
+
test_every: 1
|
54 |
+
test_reset: true
|
55 |
+
clip_grad: 5.0
|
56 |
+
losses:
|
57 |
+
losses:
|
58 |
+
- name: cls
|
59 |
+
type: CrossEntropy
|
60 |
+
params:
|
61 |
+
field: label
|
62 |
+
weight: 1.0
|
63 |
+
display: on
|
hiera/configs/hiera_base_1e-4_nonxl_224.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/cluster_paths_inat.yaml
|
4 |
+
name: hiera_base_1e-4_nonxl_224
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 224
|
8 |
+
val_crop_size: 224
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: false
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoder
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: get_hiera_model
|
34 |
+
backbone:
|
35 |
+
img_size: 224
|
36 |
+
input_dim: 3
|
37 |
+
cls_head: naive
|
38 |
+
context:
|
39 |
+
classification_mode: on
|
40 |
+
tiling: naive_two_stream
|
41 |
+
n_layer: 2
|
42 |
+
optimizer:
|
43 |
+
name: adamw
|
44 |
+
base_lr: 1e-4
|
45 |
+
classifier_ratio: 1.0
|
46 |
+
warmup_epochs: 0
|
47 |
+
train:
|
48 |
+
epochs: 100
|
49 |
+
batch_size: 16
|
50 |
+
val_batch_size: 1
|
51 |
+
freeze_epochs: 0
|
52 |
+
freeze_bn: false
|
53 |
+
test_every: 1
|
54 |
+
test_reset: true
|
55 |
+
clip_grad: 5.0
|
56 |
+
losses:
|
57 |
+
losses:
|
58 |
+
- name: cls
|
59 |
+
type: CrossEntropy
|
60 |
+
params:
|
61 |
+
field: label
|
62 |
+
weight: 1.0
|
63 |
+
display: on
|
hiera/configs/hiera_baseplus_1e-4_nonxl_224.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/cluster_paths_inat.yaml
|
4 |
+
name: hiera_baseplus_1e-4_nonxl_224
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 224
|
8 |
+
val_crop_size: 224
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: false
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoder
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: get_hiera_model_base_plus
|
34 |
+
backbone:
|
35 |
+
img_size: 224
|
36 |
+
input_dim: 3
|
37 |
+
cls_head: naive
|
38 |
+
context:
|
39 |
+
classification_mode: on
|
40 |
+
tiling: naive_two_stream
|
41 |
+
n_layer: 2
|
42 |
+
optimizer:
|
43 |
+
name: adamw
|
44 |
+
base_lr: 1e-4
|
45 |
+
classifier_ratio: 1.0
|
46 |
+
warmup_epochs: 0
|
47 |
+
train:
|
48 |
+
epochs: 100
|
49 |
+
batch_size: 16
|
50 |
+
val_batch_size: 1
|
51 |
+
freeze_epochs: 0
|
52 |
+
freeze_bn: false
|
53 |
+
test_every: 1
|
54 |
+
test_reset: true
|
55 |
+
clip_grad: 5.0
|
56 |
+
losses:
|
57 |
+
losses:
|
58 |
+
- name: cls
|
59 |
+
type: CrossEntropy
|
60 |
+
params:
|
61 |
+
field: label
|
62 |
+
weight: 1.0
|
63 |
+
display: on
|
hiera/weights/hiera-b-hyper-224-448-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf7c07c912366183353161ecf2bb1ebe5bb4b003add32940aa3624c21965c453
|
3 |
+
size 281939643
|
hiera/weights/hiera-b-nonxl-224-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff2e83379a7ecb4d068999831f9dbefcaad6082145ffb66ec05a4f4c36d107bc
|
3 |
+
size 244166884
|
hiera/weights/hiera-bplus-nonxl-224-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:012cc98fd445a02d2a8902fec1b8486263f5fcaf5807953aacc8e88b6e683201
|
3 |
+
size 322400822
|
swin-b/configs/swin_v2_base_1e-4_nonxl_256.yaml
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/paths.yaml
|
4 |
+
name: swin_v2_base_1e-4_nonxl_256
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 256
|
8 |
+
val_crop_size: 256
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: False
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoderV2
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: swinv2_base_window16_256_timm
|
34 |
+
backbone:
|
35 |
+
img_size: 256
|
36 |
+
input_dim: 3
|
37 |
+
use_vanilla_backward: False
|
38 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
39 |
+
upsample: False
|
40 |
+
cls_head: naive
|
41 |
+
xl_context:
|
42 |
+
skip_connection: off
|
43 |
+
enabled: off
|
44 |
+
classification_mode: on
|
45 |
+
tiling: naive_two_stream
|
46 |
+
n_layer: 2
|
47 |
+
resume: ''
|
48 |
+
optimizer:
|
49 |
+
name: adamw
|
50 |
+
base_lr: 1e-4
|
51 |
+
classifier_ratio: 1.0
|
52 |
+
warmup_epochs: 0
|
53 |
+
train:
|
54 |
+
epochs: 100
|
55 |
+
batch_size: 60
|
56 |
+
val_batch_size: 1
|
57 |
+
freeze_epochs: 0
|
58 |
+
test_every: 1
|
59 |
+
test_reset: True
|
60 |
+
clip_grad: 5.0
|
61 |
+
val: False
|
62 |
+
losses:
|
63 |
+
losses:
|
64 |
+
- name: cls
|
65 |
+
type: CrossEntropy
|
66 |
+
params:
|
67 |
+
field: label
|
68 |
+
weight: 1.0
|
69 |
+
display: on
|
swin-b/configs/swin_v2_base_1e-4_xl_512_256_context_2chips.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/cluster_paths_inat.yaml
|
5 |
+
name: swin_v2_base_1e-4_xl_512_256_context_2chips
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_base_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
in_context_patches: 128
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 2
|
52 |
+
mem_chip: 2
|
53 |
+
resume: ''
|
54 |
+
optimizer:
|
55 |
+
name: adamw
|
56 |
+
lr: 1e-4
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 14
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-b/configs/swin_v2_base_1e-5_hyper-1layer_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_base_1e-5_hyper-1layer_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 512
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_base_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 1
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 14
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-b/configs/swin_v2_base_1e-5_hyper_256_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_base_1e-5_hyper_256_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 256
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_base_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 14
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-b/configs/swin_v2_base_1e-5_hyper_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_base_1e-5_hyper_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 512
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_base_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 14
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-b/configs/swin_v2_base_1e-5_mamba_512_256.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_base_1e-5_mamba_512_256
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_base_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
attention_method: mamba
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 4
|
52 |
+
resume: ''
|
53 |
+
optimizer:
|
54 |
+
name: adamw
|
55 |
+
base_lr: 1e-5
|
56 |
+
lr: 1e-5
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 16
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-b/weights/swin-b-hyper-1layer-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f78ad762b9e6862c262402f128d9e3638279ac3fb2b09ff3a862123291857cdc
|
3 |
+
size 389571612
|
swin-b/weights/swin-b-hyper-256-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51d688d4f7eebba35371b5b55f77b52e9bac671a8e6a458422beaffea0f679b8
|
3 |
+
size 427339589
|
swin-b/weights/swin-b-hyper-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b655efd85c4ffd6605bbb5e40edd0c27810e7daf52fd71d213858417f09d59
|
3 |
+
size 427339589
|
swin-b/weights/swin-b-mamba-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5dde760dd280ca06440ce561969c8f9af795fae5b11a0ff8ed955b997c40d18
|
3 |
+
size 412174185
|
swin-b/weights/swin-b-nonxl-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5acf64cb87dea60dd49618bfc58987a217ba45fa8c3369b1667a8ba4f1ad6d1d
|
3 |
+
size 369244131
|
swin-l/configs/swin_v2_large_1e-5_hyper-1layer_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_large_1e-5_hyper-1layer_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 512
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_large_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 1
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 8
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-l/configs/swin_v2_large_1e-5_hyper_256_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_large_1e-5_hyper_256_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 256
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_large_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 32
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-l/configs/swin_v2_large_1e-5_hyper_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_large_1e-5_hyper_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 512
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_large_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-5
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 8
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-l/configs/swin_v2_large_1e-5_mamba_512_256.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_large_1e-5_mamba_512_256
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_large_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_large_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
attention_method: mamba
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 4
|
52 |
+
resume: ''
|
53 |
+
optimizer:
|
54 |
+
name: adamw
|
55 |
+
base_lr: 1e-5
|
56 |
+
lr: 1e-5
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 8
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-l/configs/swin_v2_large_1e-5_nonxl_256.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/paths.yaml
|
4 |
+
name: swin_v2_large_1e-5_nonxl_256
|
5 |
+
fsdp: off
|
6 |
+
data:
|
7 |
+
dataset: inaturalist
|
8 |
+
crop_size: 256
|
9 |
+
val_crop_size: 256
|
10 |
+
batch_size: ${train.batch_size}
|
11 |
+
val_batch_size: ${train.val_batch_size}
|
12 |
+
num_workers: 1
|
13 |
+
num_classes: 284
|
14 |
+
interpolation: bilinear
|
15 |
+
test_crop: False
|
16 |
+
aug:
|
17 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
18 |
+
color_jitter: 0.4
|
19 |
+
reprob: 0.0
|
20 |
+
remode: pixel
|
21 |
+
recount: 1
|
22 |
+
mixup: 0.0
|
23 |
+
cutmix: 0.0
|
24 |
+
label_smoothing: 0.3
|
25 |
+
random_resized_crop: False
|
26 |
+
mean: [0.485, 0.456, 0.406]
|
27 |
+
std: [0.229, 0.224, 0.225]
|
28 |
+
supercategories:
|
29 |
+
- Reptilia
|
30 |
+
model:
|
31 |
+
name: EncoderDecoderV2
|
32 |
+
num_classes: ${data.num_classes}
|
33 |
+
mlp_ratio: 4
|
34 |
+
backbone_class: swinv2_large_window16_256_timm
|
35 |
+
backbone:
|
36 |
+
img_size: 256
|
37 |
+
input_dim: 3
|
38 |
+
use_vanilla_backward: False
|
39 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
40 |
+
upsample: False
|
41 |
+
cls_head: naive
|
42 |
+
xl_context:
|
43 |
+
skip_connection: off
|
44 |
+
enabled: off
|
45 |
+
classification_mode: on
|
46 |
+
tiling: naive_two_stream
|
47 |
+
n_layer: 2
|
48 |
+
resume: ''
|
49 |
+
optimizer:
|
50 |
+
name: adamw
|
51 |
+
base_lr: 1e-5
|
52 |
+
classifier_ratio: 1.0
|
53 |
+
warmup_epochs: 0
|
54 |
+
train:
|
55 |
+
epochs: 100
|
56 |
+
batch_size: 2
|
57 |
+
val_batch_size: 1
|
58 |
+
freeze_epochs: 0
|
59 |
+
test_every: 1
|
60 |
+
test_reset: True
|
61 |
+
clip_grad: 5.0
|
62 |
+
val: False
|
63 |
+
losses:
|
64 |
+
losses:
|
65 |
+
- name: cls
|
66 |
+
type: CrossEntropy
|
67 |
+
params:
|
68 |
+
field: label
|
69 |
+
weight: 1.0
|
70 |
+
display: on
|
swin-l/configs/swin_v2_large_1e-5_xl_512_256_context_2chips.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/cluster_paths_inat.yaml
|
5 |
+
name: swin_v2_large_1e-5_xl_512_256_context_2chips
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_large_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
in_context_patches: 128
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 2
|
52 |
+
mem_chip: 2
|
53 |
+
resume: ''
|
54 |
+
optimizer:
|
55 |
+
name: adamw
|
56 |
+
lr: 1e-5
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 8
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-l/weights/swin-l-hyper-1layer-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f6c66c2cb5b50e5624317f5ed9b38c95d2242b05ef69e7b90d97125bf760a12
|
3 |
+
size 824377372
|
swin-l/weights/swin-l-hyper-256-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:138028b3ebe1136e54b0cf9e2465a723a4cc80c852bad1e96d82ba9e225e78b2
|
3 |
+
size 862144325
|
swin-l/weights/swin-l-hyper-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:741ade1df8e6af59fff9ab7b10a87243f88365fdf16c612d2698b5712d8b7477
|
3 |
+
size 862144325
|
swin-l/weights/swin-l-mamba-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aecf734bc26d77012e3b7148f9396bc9ca5e7c5d225e3136589170453f19a9c1
|
3 |
+
size 846978921
|
swin-l/weights/swin-l-nonxl-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53b5e9cfdb075bc422128002318f86a494235754e8e34ab4589f5fd40c2fb9c5
|
3 |
+
size 825798627
|
swin-s/configs/swin_v2_small_1e-4_hyper-1layer_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_small_1e-4_hyper-1layer_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_small_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 1
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 20
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-s/configs/swin_v2_small_1e-4_hyper_256_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_small_1e-4_hyper_256_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 256
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_small_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 80
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-s/configs/swin_v2_small_1e-4_hyper_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_small_1e-4_hyper_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_small_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 20
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-s/configs/swin_v2_small_1e-4_nonxl_256.yaml
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/paths.yaml
|
4 |
+
name: swin_v2_small_1e-4_nonxl_256
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 256
|
8 |
+
val_crop_size: 256
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: False
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoderV2
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: swinv2_small_window16_256_timm
|
34 |
+
backbone:
|
35 |
+
img_size: 256
|
36 |
+
input_dim: 3
|
37 |
+
use_vanilla_backward: False
|
38 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
|
39 |
+
upsample: False
|
40 |
+
cls_head: naive
|
41 |
+
xl_context:
|
42 |
+
skip_connection: off
|
43 |
+
enabled: off
|
44 |
+
classification_mode: on
|
45 |
+
tiling: naive_two_stream
|
46 |
+
n_layer: 2
|
47 |
+
resume: ''
|
48 |
+
optimizer:
|
49 |
+
name: adamw
|
50 |
+
base_lr: 1e-4
|
51 |
+
classifier_ratio: 1.0
|
52 |
+
warmup_epochs: 0
|
53 |
+
train:
|
54 |
+
epochs: 100
|
55 |
+
batch_size: 80
|
56 |
+
val_batch_size: 1
|
57 |
+
freeze_epochs: 0
|
58 |
+
test_every: 1
|
59 |
+
test_reset: True
|
60 |
+
clip_grad: 5.0
|
61 |
+
val: False
|
62 |
+
losses:
|
63 |
+
losses:
|
64 |
+
- name: cls
|
65 |
+
type: CrossEntropy
|
66 |
+
params:
|
67 |
+
field: label
|
68 |
+
weight: 1.0
|
69 |
+
display: on
|
swin-s/configs/swin_v2_small_1e-4_xl_512_256_context_2chips.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/cluster_paths_inat.yaml
|
5 |
+
name: swin_v2_small_1e-4_xl_512_256_context_2chips
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_small_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
in_context_patches: 128
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 2
|
52 |
+
mem_chip: 2
|
53 |
+
resume: ''
|
54 |
+
optimizer:
|
55 |
+
name: adamw
|
56 |
+
lr: 1e-4
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 20
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-s/weights/swin-s-hyper-1layer-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:281efaeaa28fcb2209f89dd1aa073acb65e7caf319091b7e9ea00876b819992a
|
3 |
+
size 237049820
|
swin-s/weights/swin-s-hyper-256-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a91cd7dee38c2ff336b0303250a568d9c61ed0101ef4c2b788457d1beb5781a6
|
3 |
+
size 274817797
|
swin-s/weights/swin-s-hyper-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b78ca95892d7e1e3e1cc2719d476bd06db961ba4935ae01cf4556192d4f5b82
|
3 |
+
size 274817797
|
swin-s/weights/swin-s-nonxl-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b045b5f3e67f4345a693d7f3f5654fc2d3a89d1d2943b1169f280b363537851a
|
3 |
+
size 208993187
|
swin-s/weights/swin-s-xl-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fcee8718f03ec05e2f85627509303581309b197a9f5b5096eea62eb2d61f67e
|
3 |
+
size 274816453
|
swin-t/configs/swin_v2_tiny_1e-4_hyper-1layer_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_tiny_1e-4_hyper-1layer_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 1
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 32
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-t/configs/swin_v2_tiny_1e-4_hyper_256_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_tiny_1e-4_hyper_256_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 256
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 120
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-t/configs/swin_v2_tiny_1e-4_hyper_512_256.yaml
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_tiny_1e-4_hyper_512_256
|
6 |
+
fsdp: off
|
7 |
+
data:
|
8 |
+
dataset: inaturalist
|
9 |
+
crop_size: 512
|
10 |
+
val_crop_size: 256
|
11 |
+
batch_size: ${train.batch_size}
|
12 |
+
val_batch_size: ${train.val_batch_size}
|
13 |
+
num_workers: 1
|
14 |
+
num_classes: 284
|
15 |
+
interpolation: bilinear
|
16 |
+
test_crop: False
|
17 |
+
aug:
|
18 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
19 |
+
color_jitter: 0.4
|
20 |
+
reprob: 0.0
|
21 |
+
remode: pixel
|
22 |
+
recount: 1
|
23 |
+
mixup: 0.0
|
24 |
+
cutmix: 0.0
|
25 |
+
label_smoothing: 0.3
|
26 |
+
random_resized_crop: False
|
27 |
+
mean: [0.485, 0.456, 0.406]
|
28 |
+
std: [0.229, 0.224, 0.225]
|
29 |
+
supercategories:
|
30 |
+
- Reptilia
|
31 |
+
model:
|
32 |
+
name: EncoderDecoderV2
|
33 |
+
num_classes: ${data.num_classes}
|
34 |
+
mlp_ratio: 4
|
35 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
36 |
+
backbone:
|
37 |
+
img_size: 256
|
38 |
+
input_dim: 3
|
39 |
+
use_vanilla_backward: False
|
40 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
|
41 |
+
upsample: False
|
42 |
+
cls_head: xl
|
43 |
+
xl_context:
|
44 |
+
skip_connection: off
|
45 |
+
enabled: off
|
46 |
+
classification_mode: on
|
47 |
+
tiling: naive_two_stream
|
48 |
+
n_layer: 2
|
49 |
+
resume: ''
|
50 |
+
optimizer:
|
51 |
+
name: adamw
|
52 |
+
base_lr: 1e-4
|
53 |
+
classifier_ratio: 1.0
|
54 |
+
warmup_epochs: 0
|
55 |
+
train:
|
56 |
+
epochs: 100
|
57 |
+
batch_size: 32
|
58 |
+
val_batch_size: 1
|
59 |
+
freeze_epochs: 0
|
60 |
+
test_every: 1
|
61 |
+
test_reset: True
|
62 |
+
clip_grad: 5.0
|
63 |
+
val: False
|
64 |
+
losses:
|
65 |
+
losses:
|
66 |
+
- name: cls
|
67 |
+
type: CrossEntropy
|
68 |
+
params:
|
69 |
+
field: label
|
70 |
+
weight: 1.0
|
71 |
+
display: on
|
swin-t/configs/swin_v2_tiny_1e-4_mamba_512_256.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/paths.yaml
|
5 |
+
name: swin_v2_tiny_1e-4_mamba_512_256
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
attention_method: mamba
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 4
|
52 |
+
resume: ''
|
53 |
+
optimizer:
|
54 |
+
name: adamw
|
55 |
+
base_lr: 1e-4
|
56 |
+
lr: 1e-4
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 32
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-t/configs/swin_v2_tiny_1e-4_nonxl_256.yaml
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_configs:
|
2 |
+
- config/base.yaml
|
3 |
+
- config/paths.yaml
|
4 |
+
name: swin_v2_tiny_1e-4_nonxl_256
|
5 |
+
data:
|
6 |
+
dataset: inaturalist
|
7 |
+
crop_size: 256
|
8 |
+
val_crop_size: 256
|
9 |
+
batch_size: ${train.batch_size}
|
10 |
+
val_batch_size: ${train.val_batch_size}
|
11 |
+
num_workers: 1
|
12 |
+
num_classes: 284
|
13 |
+
interpolation: bilinear
|
14 |
+
test_crop: False
|
15 |
+
aug:
|
16 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
17 |
+
color_jitter: 0.4
|
18 |
+
reprob: 0.0
|
19 |
+
remode: pixel
|
20 |
+
recount: 1
|
21 |
+
mixup: 0.0
|
22 |
+
cutmix: 0.0
|
23 |
+
label_smoothing: 0.3
|
24 |
+
random_resized_crop: False
|
25 |
+
mean: [0.485, 0.456, 0.406]
|
26 |
+
std: [0.229, 0.224, 0.225]
|
27 |
+
supercategories:
|
28 |
+
- Reptilia
|
29 |
+
model:
|
30 |
+
name: EncoderDecoderV2
|
31 |
+
num_classes: ${data.num_classes}
|
32 |
+
mlp_ratio: 4
|
33 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
34 |
+
backbone:
|
35 |
+
img_size: 256
|
36 |
+
input_dim: 3
|
37 |
+
use_vanilla_backward: False
|
38 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
|
39 |
+
upsample: False
|
40 |
+
cls_head: naive
|
41 |
+
xl_context:
|
42 |
+
skip_connection: off
|
43 |
+
enabled: off
|
44 |
+
classification_mode: on
|
45 |
+
tiling: naive_two_stream
|
46 |
+
n_layer: 2
|
47 |
+
resume: ''
|
48 |
+
optimizer:
|
49 |
+
name: adamw
|
50 |
+
base_lr: 1e-4
|
51 |
+
classifier_ratio: 1.0
|
52 |
+
warmup_epochs: 0
|
53 |
+
train:
|
54 |
+
epochs: 100
|
55 |
+
batch_size: 120
|
56 |
+
val_batch_size: 1
|
57 |
+
freeze_epochs: 0
|
58 |
+
test_every: 1
|
59 |
+
test_reset: True
|
60 |
+
clip_grad: 5.0
|
61 |
+
val: False
|
62 |
+
losses:
|
63 |
+
losses:
|
64 |
+
- name: cls
|
65 |
+
type: CrossEntropy
|
66 |
+
params:
|
67 |
+
field: label
|
68 |
+
weight: 1.0
|
69 |
+
display: on
|
swin-t/configs/swin_v2_tiny_1e-4_xl_512_256_context_2chips.yaml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
base_configs:
|
3 |
+
- config/base.yaml
|
4 |
+
- config/cluster_paths_inat.yaml
|
5 |
+
name: swin_v2_tiny_1e-4_xl_512_256_context_2chips
|
6 |
+
fsdp: off
|
7 |
+
fp16: off
|
8 |
+
data:
|
9 |
+
dataset: inaturalist
|
10 |
+
crop_size: 512
|
11 |
+
val_crop_size: 512
|
12 |
+
batch_size: ${train.batch_size}
|
13 |
+
val_batch_size: ${train.val_batch_size}
|
14 |
+
num_workers: 1
|
15 |
+
num_classes: 284
|
16 |
+
interpolation: bilinear
|
17 |
+
test_crop: False
|
18 |
+
aug:
|
19 |
+
auto_augment: rand-m9-mstd0.5-inc1
|
20 |
+
color_jitter: 0.4
|
21 |
+
reprob: 0.0
|
22 |
+
remode: pixel
|
23 |
+
recount: 1
|
24 |
+
mixup: 0.0
|
25 |
+
cutmix: 0.0
|
26 |
+
label_smoothing: 0.3
|
27 |
+
random_resized_crop: False
|
28 |
+
mean: [0.485, 0.456, 0.406]
|
29 |
+
std: [0.229, 0.224, 0.225]
|
30 |
+
supercategories:
|
31 |
+
- Reptilia
|
32 |
+
model:
|
33 |
+
name: EncoderDecoderV2
|
34 |
+
num_classes: ${data.num_classes}
|
35 |
+
mlp_ratio: 4
|
36 |
+
backbone_class: swinv2_tiny_window16_256_timm
|
37 |
+
backbone:
|
38 |
+
img_size: 256
|
39 |
+
input_dim: 3
|
40 |
+
use_vanilla_backward: False
|
41 |
+
pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
|
42 |
+
upsample: False
|
43 |
+
cls_head: xl
|
44 |
+
xl_context:
|
45 |
+
skip_connection: off
|
46 |
+
enabled: off
|
47 |
+
hidden_size: 768
|
48 |
+
classification_mode: off
|
49 |
+
in_context_patches: 128
|
50 |
+
tiling: naive_two_stream
|
51 |
+
n_layer: 2
|
52 |
+
mem_chip: 2
|
53 |
+
resume: ''
|
54 |
+
optimizer:
|
55 |
+
name: adamw
|
56 |
+
lr: 1e-4
|
57 |
+
classifier_ratio: 1.0
|
58 |
+
warmup_epochs: 0
|
59 |
+
train:
|
60 |
+
epochs: 100
|
61 |
+
batch_size: 30
|
62 |
+
val_batch_size: 1
|
63 |
+
freeze_epochs: 0
|
64 |
+
test_every: 1
|
65 |
+
test_reset: True
|
66 |
+
clip_grad: 5.0
|
67 |
+
val: False
|
68 |
+
losses:
|
69 |
+
losses:
|
70 |
+
- name: cls
|
71 |
+
type: CrossEntropy
|
72 |
+
params:
|
73 |
+
field: label
|
74 |
+
weight: 1.0
|
75 |
+
display: on
|
swin-t/weights/swin-t-hyper-1layer-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ba925dd7f4d0ce068de1d500d3628ae96a9f38311c5c0dd5302221bd345e8bb
|
3 |
+
size 151428048
|
swin-t/weights/swin-t-hyper-256-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95db1742267db95e0d0b24337f3ddb14787675b8b4ec5456b98a85837ebe576a
|
3 |
+
size 189197453
|
swin-t/weights/swin-t-hyper-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c71ac4871b2989d159418e8de49430e9a162e163727fb196e73f3b4739e65811
|
3 |
+
size 189197453
|
swin-t/weights/swin-t-mamba-512-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ad07cf9a5ddc2865bdc6a88aaebc205b4ec14dac308dda4c59ac82bc2143104
|
3 |
+
size 174032049
|
swin-t/weights/swin-t-nonxl-256-top1.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e12d61f071a805dd57b299f4dd7df9ada90c1b2d243f8f7f5780345320f4c6c
|
3 |
+
size 123373595
|