RitwikGupta commited on
Commit
5107f82
1 Parent(s): d1302ce

Add weights and configs

Browse files
Files changed (49) hide show
  1. hiera/configs/hiera_base_1e-4_hyper_224_448.yaml +63 -0
  2. hiera/configs/hiera_base_1e-4_nonxl_224.yaml +63 -0
  3. hiera/configs/hiera_baseplus_1e-4_nonxl_224.yaml +63 -0
  4. hiera/weights/hiera-b-hyper-224-448-top1.ckpt +3 -0
  5. hiera/weights/hiera-b-nonxl-224-top1.ckpt +3 -0
  6. hiera/weights/hiera-bplus-nonxl-224-top1.ckpt +3 -0
  7. swin-b/configs/swin_v2_base_1e-4_nonxl_256.yaml +69 -0
  8. swin-b/configs/swin_v2_base_1e-4_xl_512_256_context_2chips.yaml +75 -0
  9. swin-b/configs/swin_v2_base_1e-5_hyper-1layer_512_256.yaml +71 -0
  10. swin-b/configs/swin_v2_base_1e-5_hyper_256_256.yaml +71 -0
  11. swin-b/configs/swin_v2_base_1e-5_hyper_512_256.yaml +71 -0
  12. swin-b/configs/swin_v2_base_1e-5_mamba_512_256.yaml +75 -0
  13. swin-b/weights/swin-b-hyper-1layer-512-256-top1.ckpt +3 -0
  14. swin-b/weights/swin-b-hyper-256-256-top1.ckpt +3 -0
  15. swin-b/weights/swin-b-hyper-512-256-top1.ckpt +3 -0
  16. swin-b/weights/swin-b-mamba-512-256-top1.ckpt +3 -0
  17. swin-b/weights/swin-b-nonxl-256-top1.ckpt +3 -0
  18. swin-l/configs/swin_v2_large_1e-5_hyper-1layer_512_256.yaml +71 -0
  19. swin-l/configs/swin_v2_large_1e-5_hyper_256_256.yaml +71 -0
  20. swin-l/configs/swin_v2_large_1e-5_hyper_512_256.yaml +71 -0
  21. swin-l/configs/swin_v2_large_1e-5_mamba_512_256.yaml +75 -0
  22. swin-l/configs/swin_v2_large_1e-5_nonxl_256.yaml +70 -0
  23. swin-l/configs/swin_v2_large_1e-5_xl_512_256_context_2chips.yaml +75 -0
  24. swin-l/weights/swin-l-hyper-1layer-512-256-top1.ckpt +3 -0
  25. swin-l/weights/swin-l-hyper-256-256-top1.ckpt +3 -0
  26. swin-l/weights/swin-l-hyper-512-256-top1.ckpt +3 -0
  27. swin-l/weights/swin-l-mamba-512-256-top1.ckpt +3 -0
  28. swin-l/weights/swin-l-nonxl-256-top1.ckpt +3 -0
  29. swin-s/configs/swin_v2_small_1e-4_hyper-1layer_512_256.yaml +71 -0
  30. swin-s/configs/swin_v2_small_1e-4_hyper_256_256.yaml +71 -0
  31. swin-s/configs/swin_v2_small_1e-4_hyper_512_256.yaml +71 -0
  32. swin-s/configs/swin_v2_small_1e-4_nonxl_256.yaml +69 -0
  33. swin-s/configs/swin_v2_small_1e-4_xl_512_256_context_2chips.yaml +75 -0
  34. swin-s/weights/swin-s-hyper-1layer-512-256-top1.ckpt +3 -0
  35. swin-s/weights/swin-s-hyper-256-256-top1.ckpt +3 -0
  36. swin-s/weights/swin-s-hyper-512-256-top1.ckpt +3 -0
  37. swin-s/weights/swin-s-nonxl-256-top1.ckpt +3 -0
  38. swin-s/weights/swin-s-xl-512-256-top1.ckpt +3 -0
  39. swin-t/configs/swin_v2_tiny_1e-4_hyper-1layer_512_256.yaml +71 -0
  40. swin-t/configs/swin_v2_tiny_1e-4_hyper_256_256.yaml +71 -0
  41. swin-t/configs/swin_v2_tiny_1e-4_hyper_512_256.yaml +71 -0
  42. swin-t/configs/swin_v2_tiny_1e-4_mamba_512_256.yaml +75 -0
  43. swin-t/configs/swin_v2_tiny_1e-4_nonxl_256.yaml +69 -0
  44. swin-t/configs/swin_v2_tiny_1e-4_xl_512_256_context_2chips.yaml +75 -0
  45. swin-t/weights/swin-t-hyper-1layer-512-256-top1.ckpt +3 -0
  46. swin-t/weights/swin-t-hyper-256-256-top1.ckpt +3 -0
  47. swin-t/weights/swin-t-hyper-512-256-top1.ckpt +3 -0
  48. swin-t/weights/swin-t-mamba-512-256-top1.ckpt +3 -0
  49. swin-t/weights/swin-t-nonxl-256-top1.ckpt +3 -0
hiera/configs/hiera_base_1e-4_hyper_224_448.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/cluster_paths_inat.yaml
4
+ name: hiera_base_1e-4_hyper_224_448
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 448
8
+ val_crop_size: 448
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: false
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoder
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: get_hiera_model
34
+ backbone:
35
+ img_size: 224
36
+ input_dim: 3
37
+ cls_head: xl
38
+ context:
39
+ classification_mode: on
40
+ tiling: naive_two_stream
41
+ n_layer: 2
42
+ optimizer:
43
+ name: adamw
44
+ base_lr: 1e-4
45
+ classifier_ratio: 1.0
46
+ warmup_epochs: 0
47
+ train:
48
+ epochs: 100
49
+ batch_size: 16
50
+ val_batch_size: 1
51
+ freeze_epochs: 0
52
+ freeze_bn: false
53
+ test_every: 1
54
+ test_reset: true
55
+ clip_grad: 5.0
56
+ losses:
57
+ losses:
58
+ - name: cls
59
+ type: CrossEntropy
60
+ params:
61
+ field: label
62
+ weight: 1.0
63
+ display: on
hiera/configs/hiera_base_1e-4_nonxl_224.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/cluster_paths_inat.yaml
4
+ name: hiera_base_1e-4_nonxl_224
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 224
8
+ val_crop_size: 224
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: false
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoder
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: get_hiera_model
34
+ backbone:
35
+ img_size: 224
36
+ input_dim: 3
37
+ cls_head: naive
38
+ context:
39
+ classification_mode: on
40
+ tiling: naive_two_stream
41
+ n_layer: 2
42
+ optimizer:
43
+ name: adamw
44
+ base_lr: 1e-4
45
+ classifier_ratio: 1.0
46
+ warmup_epochs: 0
47
+ train:
48
+ epochs: 100
49
+ batch_size: 16
50
+ val_batch_size: 1
51
+ freeze_epochs: 0
52
+ freeze_bn: false
53
+ test_every: 1
54
+ test_reset: true
55
+ clip_grad: 5.0
56
+ losses:
57
+ losses:
58
+ - name: cls
59
+ type: CrossEntropy
60
+ params:
61
+ field: label
62
+ weight: 1.0
63
+ display: on
hiera/configs/hiera_baseplus_1e-4_nonxl_224.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/cluster_paths_inat.yaml
4
+ name: hiera_baseplus_1e-4_nonxl_224
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 224
8
+ val_crop_size: 224
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: false
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoder
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: get_hiera_model_base_plus
34
+ backbone:
35
+ img_size: 224
36
+ input_dim: 3
37
+ cls_head: naive
38
+ context:
39
+ classification_mode: on
40
+ tiling: naive_two_stream
41
+ n_layer: 2
42
+ optimizer:
43
+ name: adamw
44
+ base_lr: 1e-4
45
+ classifier_ratio: 1.0
46
+ warmup_epochs: 0
47
+ train:
48
+ epochs: 100
49
+ batch_size: 16
50
+ val_batch_size: 1
51
+ freeze_epochs: 0
52
+ freeze_bn: false
53
+ test_every: 1
54
+ test_reset: true
55
+ clip_grad: 5.0
56
+ losses:
57
+ losses:
58
+ - name: cls
59
+ type: CrossEntropy
60
+ params:
61
+ field: label
62
+ weight: 1.0
63
+ display: on
hiera/weights/hiera-b-hyper-224-448-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf7c07c912366183353161ecf2bb1ebe5bb4b003add32940aa3624c21965c453
3
+ size 281939643
hiera/weights/hiera-b-nonxl-224-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2e83379a7ecb4d068999831f9dbefcaad6082145ffb66ec05a4f4c36d107bc
3
+ size 244166884
hiera/weights/hiera-bplus-nonxl-224-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012cc98fd445a02d2a8902fec1b8486263f5fcaf5807953aacc8e88b6e683201
3
+ size 322400822
swin-b/configs/swin_v2_base_1e-4_nonxl_256.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/paths.yaml
4
+ name: swin_v2_base_1e-4_nonxl_256
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 256
8
+ val_crop_size: 256
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: False
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoderV2
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: swinv2_base_window16_256_timm
34
+ backbone:
35
+ img_size: 256
36
+ input_dim: 3
37
+ use_vanilla_backward: False
38
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
39
+ upsample: False
40
+ cls_head: naive
41
+ xl_context:
42
+ skip_connection: off
43
+ enabled: off
44
+ classification_mode: on
45
+ tiling: naive_two_stream
46
+ n_layer: 2
47
+ resume: ''
48
+ optimizer:
49
+ name: adamw
50
+ base_lr: 1e-4
51
+ classifier_ratio: 1.0
52
+ warmup_epochs: 0
53
+ train:
54
+ epochs: 100
55
+ batch_size: 60
56
+ val_batch_size: 1
57
+ freeze_epochs: 0
58
+ test_every: 1
59
+ test_reset: True
60
+ clip_grad: 5.0
61
+ val: False
62
+ losses:
63
+ losses:
64
+ - name: cls
65
+ type: CrossEntropy
66
+ params:
67
+ field: label
68
+ weight: 1.0
69
+ display: on
swin-b/configs/swin_v2_base_1e-4_xl_512_256_context_2chips.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/cluster_paths_inat.yaml
5
+ name: swin_v2_base_1e-4_xl_512_256_context_2chips
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_base_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ in_context_patches: 128
50
+ tiling: naive_two_stream
51
+ n_layer: 2
52
+ mem_chip: 2
53
+ resume: ''
54
+ optimizer:
55
+ name: adamw
56
+ lr: 1e-4
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 14
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-b/configs/swin_v2_base_1e-5_hyper-1layer_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_base_1e-5_hyper-1layer_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 512
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_base_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 1
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 14
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-b/configs/swin_v2_base_1e-5_hyper_256_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_base_1e-5_hyper_256_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 256
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_base_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 14
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-b/configs/swin_v2_base_1e-5_hyper_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_base_1e-5_hyper_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 512
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_base_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 14
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-b/configs/swin_v2_base_1e-5_mamba_512_256.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_base_1e-5_mamba_512_256
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_base_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ attention_method: mamba
50
+ tiling: naive_two_stream
51
+ n_layer: 4
52
+ resume: ''
53
+ optimizer:
54
+ name: adamw
55
+ base_lr: 1e-5
56
+ lr: 1e-5
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 16
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-b/weights/swin-b-hyper-1layer-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78ad762b9e6862c262402f128d9e3638279ac3fb2b09ff3a862123291857cdc
3
+ size 389571612
swin-b/weights/swin-b-hyper-256-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d688d4f7eebba35371b5b55f77b52e9bac671a8e6a458422beaffea0f679b8
3
+ size 427339589
swin-b/weights/swin-b-hyper-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b655efd85c4ffd6605bbb5e40edd0c27810e7daf52fd71d213858417f09d59
3
+ size 427339589
swin-b/weights/swin-b-mamba-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5dde760dd280ca06440ce561969c8f9af795fae5b11a0ff8ed955b997c40d18
3
+ size 412174185
swin-b/weights/swin-b-nonxl-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5acf64cb87dea60dd49618bfc58987a217ba45fa8c3369b1667a8ba4f1ad6d1d
3
+ size 369244131
swin-l/configs/swin_v2_large_1e-5_hyper-1layer_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_large_1e-5_hyper-1layer_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 512
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_large_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 1
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 8
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-l/configs/swin_v2_large_1e-5_hyper_256_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_large_1e-5_hyper_256_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 256
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_large_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 32
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-l/configs/swin_v2_large_1e-5_hyper_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_large_1e-5_hyper_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 512
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_large_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-5
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 8
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-l/configs/swin_v2_large_1e-5_mamba_512_256.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_large_1e-5_mamba_512_256
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_large_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_large_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ attention_method: mamba
50
+ tiling: naive_two_stream
51
+ n_layer: 4
52
+ resume: ''
53
+ optimizer:
54
+ name: adamw
55
+ base_lr: 1e-5
56
+ lr: 1e-5
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 8
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-l/configs/swin_v2_large_1e-5_nonxl_256.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/paths.yaml
4
+ name: swin_v2_large_1e-5_nonxl_256
5
+ fsdp: off
6
+ data:
7
+ dataset: inaturalist
8
+ crop_size: 256
9
+ val_crop_size: 256
10
+ batch_size: ${train.batch_size}
11
+ val_batch_size: ${train.val_batch_size}
12
+ num_workers: 1
13
+ num_classes: 284
14
+ interpolation: bilinear
15
+ test_crop: False
16
+ aug:
17
+ auto_augment: rand-m9-mstd0.5-inc1
18
+ color_jitter: 0.4
19
+ reprob: 0.0
20
+ remode: pixel
21
+ recount: 1
22
+ mixup: 0.0
23
+ cutmix: 0.0
24
+ label_smoothing: 0.3
25
+ random_resized_crop: False
26
+ mean: [0.485, 0.456, 0.406]
27
+ std: [0.229, 0.224, 0.225]
28
+ supercategories:
29
+ - Reptilia
30
+ model:
31
+ name: EncoderDecoderV2
32
+ num_classes: ${data.num_classes}
33
+ mlp_ratio: 4
34
+ backbone_class: swinv2_large_window16_256_timm
35
+ backbone:
36
+ img_size: 256
37
+ input_dim: 3
38
+ use_vanilla_backward: False
39
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
40
+ upsample: False
41
+ cls_head: naive
42
+ xl_context:
43
+ skip_connection: off
44
+ enabled: off
45
+ classification_mode: on
46
+ tiling: naive_two_stream
47
+ n_layer: 2
48
+ resume: ''
49
+ optimizer:
50
+ name: adamw
51
+ base_lr: 1e-5
52
+ classifier_ratio: 1.0
53
+ warmup_epochs: 0
54
+ train:
55
+ epochs: 100
56
+ batch_size: 2
57
+ val_batch_size: 1
58
+ freeze_epochs: 0
59
+ test_every: 1
60
+ test_reset: True
61
+ clip_grad: 5.0
62
+ val: False
63
+ losses:
64
+ losses:
65
+ - name: cls
66
+ type: CrossEntropy
67
+ params:
68
+ field: label
69
+ weight: 1.0
70
+ display: on
swin-l/configs/swin_v2_large_1e-5_xl_512_256_context_2chips.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/cluster_paths_inat.yaml
5
+ name: swin_v2_large_1e-5_xl_512_256_context_2chips
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_large_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ in_context_patches: 128
50
+ tiling: naive_two_stream
51
+ n_layer: 2
52
+ mem_chip: 2
53
+ resume: ''
54
+ optimizer:
55
+ name: adamw
56
+ lr: 1e-5
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 8
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-l/weights/swin-l-hyper-1layer-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6c66c2cb5b50e5624317f5ed9b38c95d2242b05ef69e7b90d97125bf760a12
3
+ size 824377372
swin-l/weights/swin-l-hyper-256-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138028b3ebe1136e54b0cf9e2465a723a4cc80c852bad1e96d82ba9e225e78b2
3
+ size 862144325
swin-l/weights/swin-l-hyper-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741ade1df8e6af59fff9ab7b10a87243f88365fdf16c612d2698b5712d8b7477
3
+ size 862144325
swin-l/weights/swin-l-mamba-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aecf734bc26d77012e3b7148f9396bc9ca5e7c5d225e3136589170453f19a9c1
3
+ size 846978921
swin-l/weights/swin-l-nonxl-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b5e9cfdb075bc422128002318f86a494235754e8e34ab4589f5fd40c2fb9c5
3
+ size 825798627
swin-s/configs/swin_v2_small_1e-4_hyper-1layer_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_small_1e-4_hyper-1layer_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_small_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 1
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 20
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-s/configs/swin_v2_small_1e-4_hyper_256_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_small_1e-4_hyper_256_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 256
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_small_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 80
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-s/configs/swin_v2_small_1e-4_hyper_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_small_1e-4_hyper_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_small_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 20
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-s/configs/swin_v2_small_1e-4_nonxl_256.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/paths.yaml
4
+ name: swin_v2_small_1e-4_nonxl_256
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 256
8
+ val_crop_size: 256
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: False
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoderV2
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: swinv2_small_window16_256_timm
34
+ backbone:
35
+ img_size: 256
36
+ input_dim: 3
37
+ use_vanilla_backward: False
38
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_small_patch4_window16_256.pth
39
+ upsample: False
40
+ cls_head: naive
41
+ xl_context:
42
+ skip_connection: off
43
+ enabled: off
44
+ classification_mode: on
45
+ tiling: naive_two_stream
46
+ n_layer: 2
47
+ resume: ''
48
+ optimizer:
49
+ name: adamw
50
+ base_lr: 1e-4
51
+ classifier_ratio: 1.0
52
+ warmup_epochs: 0
53
+ train:
54
+ epochs: 100
55
+ batch_size: 80
56
+ val_batch_size: 1
57
+ freeze_epochs: 0
58
+ test_every: 1
59
+ test_reset: True
60
+ clip_grad: 5.0
61
+ val: False
62
+ losses:
63
+ losses:
64
+ - name: cls
65
+ type: CrossEntropy
66
+ params:
67
+ field: label
68
+ weight: 1.0
69
+ display: on
swin-s/configs/swin_v2_small_1e-4_xl_512_256_context_2chips.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/cluster_paths_inat.yaml
5
+ name: swin_v2_small_1e-4_xl_512_256_context_2chips
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_small_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ in_context_patches: 128
50
+ tiling: naive_two_stream
51
+ n_layer: 2
52
+ mem_chip: 2
53
+ resume: ''
54
+ optimizer:
55
+ name: adamw
56
+ lr: 1e-4
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 20
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-s/weights/swin-s-hyper-1layer-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:281efaeaa28fcb2209f89dd1aa073acb65e7caf319091b7e9ea00876b819992a
3
+ size 237049820
swin-s/weights/swin-s-hyper-256-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91cd7dee38c2ff336b0303250a568d9c61ed0101ef4c2b788457d1beb5781a6
3
+ size 274817797
swin-s/weights/swin-s-hyper-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b78ca95892d7e1e3e1cc2719d476bd06db961ba4935ae01cf4556192d4f5b82
3
+ size 274817797
swin-s/weights/swin-s-nonxl-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b045b5f3e67f4345a693d7f3f5654fc2d3a89d1d2943b1169f280b363537851a
3
+ size 208993187
swin-s/weights/swin-s-xl-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fcee8718f03ec05e2f85627509303581309b197a9f5b5096eea62eb2d61f67e
3
+ size 274816453
swin-t/configs/swin_v2_tiny_1e-4_hyper-1layer_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_tiny_1e-4_hyper-1layer_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_tiny_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 1
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 32
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-t/configs/swin_v2_tiny_1e-4_hyper_256_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_tiny_1e-4_hyper_256_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 256
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_tiny_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 120
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-t/configs/swin_v2_tiny_1e-4_hyper_512_256.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_tiny_1e-4_hyper_512_256
6
+ fsdp: off
7
+ data:
8
+ dataset: inaturalist
9
+ crop_size: 512
10
+ val_crop_size: 256
11
+ batch_size: ${train.batch_size}
12
+ val_batch_size: ${train.val_batch_size}
13
+ num_workers: 1
14
+ num_classes: 284
15
+ interpolation: bilinear
16
+ test_crop: False
17
+ aug:
18
+ auto_augment: rand-m9-mstd0.5-inc1
19
+ color_jitter: 0.4
20
+ reprob: 0.0
21
+ remode: pixel
22
+ recount: 1
23
+ mixup: 0.0
24
+ cutmix: 0.0
25
+ label_smoothing: 0.3
26
+ random_resized_crop: False
27
+ mean: [0.485, 0.456, 0.406]
28
+ std: [0.229, 0.224, 0.225]
29
+ supercategories:
30
+ - Reptilia
31
+ model:
32
+ name: EncoderDecoderV2
33
+ num_classes: ${data.num_classes}
34
+ mlp_ratio: 4
35
+ backbone_class: swinv2_tiny_window16_256_timm
36
+ backbone:
37
+ img_size: 256
38
+ input_dim: 3
39
+ use_vanilla_backward: False
40
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
41
+ upsample: False
42
+ cls_head: xl
43
+ xl_context:
44
+ skip_connection: off
45
+ enabled: off
46
+ classification_mode: on
47
+ tiling: naive_two_stream
48
+ n_layer: 2
49
+ resume: ''
50
+ optimizer:
51
+ name: adamw
52
+ base_lr: 1e-4
53
+ classifier_ratio: 1.0
54
+ warmup_epochs: 0
55
+ train:
56
+ epochs: 100
57
+ batch_size: 32
58
+ val_batch_size: 1
59
+ freeze_epochs: 0
60
+ test_every: 1
61
+ test_reset: True
62
+ clip_grad: 5.0
63
+ val: False
64
+ losses:
65
+ losses:
66
+ - name: cls
67
+ type: CrossEntropy
68
+ params:
69
+ field: label
70
+ weight: 1.0
71
+ display: on
swin-t/configs/swin_v2_tiny_1e-4_mamba_512_256.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/paths.yaml
5
+ name: swin_v2_tiny_1e-4_mamba_512_256
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_tiny_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ attention_method: mamba
50
+ tiling: naive_two_stream
51
+ n_layer: 4
52
+ resume: ''
53
+ optimizer:
54
+ name: adamw
55
+ base_lr: 1e-4
56
+ lr: 1e-4
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 32
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-t/configs/swin_v2_tiny_1e-4_nonxl_256.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_configs:
2
+ - config/base.yaml
3
+ - config/paths.yaml
4
+ name: swin_v2_tiny_1e-4_nonxl_256
5
+ data:
6
+ dataset: inaturalist
7
+ crop_size: 256
8
+ val_crop_size: 256
9
+ batch_size: ${train.batch_size}
10
+ val_batch_size: ${train.val_batch_size}
11
+ num_workers: 1
12
+ num_classes: 284
13
+ interpolation: bilinear
14
+ test_crop: False
15
+ aug:
16
+ auto_augment: rand-m9-mstd0.5-inc1
17
+ color_jitter: 0.4
18
+ reprob: 0.0
19
+ remode: pixel
20
+ recount: 1
21
+ mixup: 0.0
22
+ cutmix: 0.0
23
+ label_smoothing: 0.3
24
+ random_resized_crop: False
25
+ mean: [0.485, 0.456, 0.406]
26
+ std: [0.229, 0.224, 0.225]
27
+ supercategories:
28
+ - Reptilia
29
+ model:
30
+ name: EncoderDecoderV2
31
+ num_classes: ${data.num_classes}
32
+ mlp_ratio: 4
33
+ backbone_class: swinv2_tiny_window16_256_timm
34
+ backbone:
35
+ img_size: 256
36
+ input_dim: 3
37
+ use_vanilla_backward: False
38
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_tiny_patch4_window16_256.pth
39
+ upsample: False
40
+ cls_head: naive
41
+ xl_context:
42
+ skip_connection: off
43
+ enabled: off
44
+ classification_mode: on
45
+ tiling: naive_two_stream
46
+ n_layer: 2
47
+ resume: ''
48
+ optimizer:
49
+ name: adamw
50
+ base_lr: 1e-4
51
+ classifier_ratio: 1.0
52
+ warmup_epochs: 0
53
+ train:
54
+ epochs: 100
55
+ batch_size: 120
56
+ val_batch_size: 1
57
+ freeze_epochs: 0
58
+ test_every: 1
59
+ test_reset: True
60
+ clip_grad: 5.0
61
+ val: False
62
+ losses:
63
+ losses:
64
+ - name: cls
65
+ type: CrossEntropy
66
+ params:
67
+ field: label
68
+ weight: 1.0
69
+ display: on
swin-t/configs/swin_v2_tiny_1e-4_xl_512_256_context_2chips.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ base_configs:
3
+ - config/base.yaml
4
+ - config/cluster_paths_inat.yaml
5
+ name: swin_v2_tiny_1e-4_xl_512_256_context_2chips
6
+ fsdp: off
7
+ fp16: off
8
+ data:
9
+ dataset: inaturalist
10
+ crop_size: 512
11
+ val_crop_size: 512
12
+ batch_size: ${train.batch_size}
13
+ val_batch_size: ${train.val_batch_size}
14
+ num_workers: 1
15
+ num_classes: 284
16
+ interpolation: bilinear
17
+ test_crop: False
18
+ aug:
19
+ auto_augment: rand-m9-mstd0.5-inc1
20
+ color_jitter: 0.4
21
+ reprob: 0.0
22
+ remode: pixel
23
+ recount: 1
24
+ mixup: 0.0
25
+ cutmix: 0.0
26
+ label_smoothing: 0.3
27
+ random_resized_crop: False
28
+ mean: [0.485, 0.456, 0.406]
29
+ std: [0.229, 0.224, 0.225]
30
+ supercategories:
31
+ - Reptilia
32
+ model:
33
+ name: EncoderDecoderV2
34
+ num_classes: ${data.num_classes}
35
+ mlp_ratio: 4
36
+ backbone_class: swinv2_tiny_window16_256_timm
37
+ backbone:
38
+ img_size: 256
39
+ input_dim: 3
40
+ use_vanilla_backward: False
41
+ pretrained: ${oc.env:PRETRAINED_CKPT_PATH, "./ckpts"}/swinv2_base_patch4_window16_256.pth
42
+ upsample: False
43
+ cls_head: xl
44
+ xl_context:
45
+ skip_connection: off
46
+ enabled: off
47
+ hidden_size: 768
48
+ classification_mode: off
49
+ in_context_patches: 128
50
+ tiling: naive_two_stream
51
+ n_layer: 2
52
+ mem_chip: 2
53
+ resume: ''
54
+ optimizer:
55
+ name: adamw
56
+ lr: 1e-4
57
+ classifier_ratio: 1.0
58
+ warmup_epochs: 0
59
+ train:
60
+ epochs: 100
61
+ batch_size: 30
62
+ val_batch_size: 1
63
+ freeze_epochs: 0
64
+ test_every: 1
65
+ test_reset: True
66
+ clip_grad: 5.0
67
+ val: False
68
+ losses:
69
+ losses:
70
+ - name: cls
71
+ type: CrossEntropy
72
+ params:
73
+ field: label
74
+ weight: 1.0
75
+ display: on
swin-t/weights/swin-t-hyper-1layer-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba925dd7f4d0ce068de1d500d3628ae96a9f38311c5c0dd5302221bd345e8bb
3
+ size 151428048
swin-t/weights/swin-t-hyper-256-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95db1742267db95e0d0b24337f3ddb14787675b8b4ec5456b98a85837ebe576a
3
+ size 189197453
swin-t/weights/swin-t-hyper-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71ac4871b2989d159418e8de49430e9a162e163727fb196e73f3b4739e65811
3
+ size 189197453
swin-t/weights/swin-t-mamba-512-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad07cf9a5ddc2865bdc6a88aaebc205b4ec14dac308dda4c59ac82bc2143104
3
+ size 174032049
swin-t/weights/swin-t-nonxl-256-top1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e12d61f071a805dd57b299f4dd7df9ada90c1b2d243f8f7f5780345320f4c6c
3
+ size 123373595