ermu2001 commited on
Commit
fe3247f
·
verified ·
1 Parent(s): 9b9786f

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. OUTPUTS/cifnet-18-tiny-lr0.01-attention/all_results.json +1 -0
  2. OUTPUTS/cifnet-18-tiny-lr0.01-attention/config.json +67 -0
  3. OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/1712011059.4757543/events.out.tfevents.1712011059.ids-ws-06.3573944.1 +3 -0
  4. OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/1712011059.478225/hparams.yml +30 -0
  5. OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712011313.ids-ws-06.3573944.2 +3 -0
  6. OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/events.out.tfevents.1712011059.ids-ws-06.3573944.0 +3 -0
  7. OUTPUTS/cifnet-18-tiny-lr0.01-attention/model.safetensors +3 -0
  8. OUTPUTS/cifnet-18-tiny-lr0.01-attention/preprocessor_config.json +37 -0
  9. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/all_results.json +1 -0
  10. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/config.json +60 -0
  11. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/1712011004.1205187/events.out.tfevents.1712011004.ids-ws-06.3571203.1 +3 -0
  12. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/1712011004.1220295/hparams.yml +30 -0
  13. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712011108.ids-ws-06.3571203.2 +3 -0
  14. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/events.out.tfevents.1712011004.ids-ws-06.3571203.0 +3 -0
  15. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/model.safetensors +3 -0
  16. OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/preprocessor_config.json +37 -0
  17. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/all_results.json +1 -0
  18. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/config.json +58 -0
  19. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/1711990816.257815/events.out.tfevents.1711990816.ids-ws-06.2843209.1 +3 -0
  20. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/1711990816.2593696/hparams.yml +30 -0
  21. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1711990897.ids-ws-06.2843209.2 +3 -0
  22. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1711990816.ids-ws-06.2843209.0 +3 -0
  23. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005878.ids-ws-06.3270132.0 +3 -0
  24. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005910.ids-ws-06.3270878.0 +3 -0
  25. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005974.ids-ws-06.3271814.0 +3 -0
  26. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/model.safetensors +3 -0
  27. OUTPUTS/cifnet-18-tiny-lr0.1-baseline/preprocessor_config.json +37 -0
  28. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/all_results.json +1 -0
  29. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/config.json +67 -0
  30. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/1712166496.7234852/events.out.tfevents.1712166496.ids-ws-06.1309582.1 +3 -0
  31. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/1712166496.724743/hparams.yml +30 -0
  32. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712166664.ids-ws-06.1309582.2 +3 -0
  33. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/events.out.tfevents.1712166496.ids-ws-06.1309582.0 +3 -0
  34. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/model.safetensors +3 -0
  35. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/preprocessor_config.json +37 -0
  36. OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/test_model.log +244 -0
OUTPUTS/cifnet-18-tiny-lr0.01-attention/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.5882666666666667}
OUTPUTS/cifnet-18-tiny-lr0.01-attention/config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-18",
3
+ "activation": "silu",
4
+ "architectures": [
5
+ "CifNetForImageClassification"
6
+ ],
7
+ "attention_kwargs": {
8
+ "attention_bias": true,
9
+ "attention_dropout": 0.1,
10
+ "attn_channels": 8,
11
+ "attn_kernel_size": 1,
12
+ "attn_stride": 1,
13
+ "max_position_embeddings": 784,
14
+ "num_heads": 4
15
+ },
16
+ "bottleneck_kwargs": {},
17
+ "depths": [
18
+ 2,
19
+ 2,
20
+ 2,
21
+ 2
22
+ ],
23
+ "embedding_kwargs": {
24
+ "embedding_kernel_size_1": 7,
25
+ "embedding_kernel_size_2": 2,
26
+ "embedding_size": 64,
27
+ "embedding_stride_1": 2,
28
+ "embedding_stride_2": 2
29
+ },
30
+ "hidden_sizes": [
31
+ 128,
32
+ 128,
33
+ 128,
34
+ 128
35
+ ],
36
+ "id2label": {
37
+ "0": "airplane",
38
+ "1": "automobile",
39
+ "2": "bird",
40
+ "3": "cat",
41
+ "4": "deer",
42
+ "5": "dog",
43
+ "6": "frog",
44
+ "7": "horse",
45
+ "8": "ship",
46
+ "9": "truck"
47
+ },
48
+ "label2id": {
49
+ "airplane": "0",
50
+ "automobile": "1",
51
+ "bird": "2",
52
+ "cat": "3",
53
+ "deer": "4",
54
+ "dog": "5",
55
+ "frog": "6",
56
+ "horse": "7",
57
+ "ship": "8",
58
+ "truck": "9"
59
+ },
60
+ "layer_type": "attention",
61
+ "main_kernel_size": 3,
62
+ "model_type": "resnet",
63
+ "num_channels": 3,
64
+ "problem_type": "single_label_classification",
65
+ "torch_dtype": "float32",
66
+ "transformers_version": "4.39.2"
67
+ }
OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/1712011059.4757543/events.out.tfevents.1712011059.ids-ws-06.3573944.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e78aa7985cd8002a5736ee3f597751cf153da26720997318a5cc03ba2e48cc0
3
+ size 1527
OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/1712011059.478225/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpointing_steps: null
2
+ dataset_name: cifar10
3
+ gradient_accumulation_steps: 1
4
+ hub_model_id: null
5
+ hub_token: null
6
+ ignore_mismatched_sizes: false
7
+ image_column_name: img
8
+ label_column_name: label
9
+ learning_rate: 0.01
10
+ lr_scheduler_type: cosine
11
+ max_eval_samples: null
12
+ max_train_samples: null
13
+ max_train_steps: 64000
14
+ model_name_or_path: MODELS/cifnet-18-tiny_attention
15
+ num_train_epochs: 193
16
+ num_warmup_steps: 6400
17
+ num_workers: 32
18
+ output_dir: OUTPUTS/cifnet-18-tiny-lr0.01-attention
19
+ per_device_eval_batch_size: 8
20
+ per_device_train_batch_size: 128
21
+ push_to_hub: false
22
+ report_to: tensorboard
23
+ resume_from_checkpoint: null
24
+ seed: 42
25
+ train_dir: null
26
+ train_val_split: 0.15
27
+ trust_remote_code: false
28
+ validation_dir: null
29
+ weight_decay: 0.0
30
+ with_tracking: true
OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712011313.ids-ws-06.3573944.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1d30768486cf990d9c456f8835bfd8e0377d4b915ba6130a672ec506e7e3f0
3
+ size 9303
OUTPUTS/cifnet-18-tiny-lr0.01-attention/image_classification_no_trainer/events.out.tfevents.1712011059.ids-ws-06.3573944.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1297be01c0c58a7253886eb8f4dddf1b936668db78fea5bb51bc5acb5c4dc9c2
3
+ size 7097748
OUTPUTS/cifnet-18-tiny-lr0.01-attention/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91cea81fd92b0f4a9a9b227446a74d0d849fac57f8471db014e3769f86c5a53a
3
+ size 8558960
OUTPUTS/cifnet-18-tiny-lr0.01-attention/preprocessor_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "crop_pct",
7
+ "resample",
8
+ "do_rescale",
9
+ "rescale_factor",
10
+ "do_normalize",
11
+ "image_mean",
12
+ "image_std",
13
+ "return_tensors",
14
+ "data_format",
15
+ "input_data_format"
16
+ ],
17
+ "crop_pct": 0.875,
18
+ "do_normalize": true,
19
+ "do_rescale": true,
20
+ "do_resize": true,
21
+ "image_mean": [
22
+ 0.485,
23
+ 0.456,
24
+ 0.406
25
+ ],
26
+ "image_processor_type": "ConvNextImageProcessor",
27
+ "image_std": [
28
+ 0.229,
29
+ 0.224,
30
+ 0.225
31
+ ],
32
+ "resample": 3,
33
+ "rescale_factor": 0.00392156862745098,
34
+ "size": {
35
+ "shortest_edge": 224
36
+ }
37
+ }
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.8850666666666667}
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-18",
3
+ "activation": "silu",
4
+ "architectures": [
5
+ "CifNetForImageClassification"
6
+ ],
7
+ "bottleneck_kwargs": {
8
+ "reduction": 2
9
+ },
10
+ "depths": [
11
+ 2,
12
+ 2,
13
+ 2,
14
+ 2
15
+ ],
16
+ "embedding_kwargs": {
17
+ "embedding_kernel_size_1": 7,
18
+ "embedding_kernel_size_2": 2,
19
+ "embedding_size": 64,
20
+ "embedding_stride_1": 2,
21
+ "embedding_stride_2": 2
22
+ },
23
+ "hidden_sizes": [
24
+ 128,
25
+ 128,
26
+ 128,
27
+ 128
28
+ ],
29
+ "id2label": {
30
+ "0": "airplane",
31
+ "1": "automobile",
32
+ "2": "bird",
33
+ "3": "cat",
34
+ "4": "deer",
35
+ "5": "dog",
36
+ "6": "frog",
37
+ "7": "horse",
38
+ "8": "ship",
39
+ "9": "truck"
40
+ },
41
+ "label2id": {
42
+ "airplane": "0",
43
+ "automobile": "1",
44
+ "bird": "2",
45
+ "cat": "3",
46
+ "deer": "4",
47
+ "dog": "5",
48
+ "frog": "6",
49
+ "horse": "7",
50
+ "ship": "8",
51
+ "truck": "9"
52
+ },
53
+ "layer_type": "bottleneck",
54
+ "main_kernel_size": 3,
55
+ "model_type": "resnet",
56
+ "num_channels": 3,
57
+ "problem_type": "single_label_classification",
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.39.2"
60
+ }
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/1712011004.1205187/events.out.tfevents.1712011004.ids-ws-06.3571203.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa7f8e0970a035148480c2ea26a2b5f3b52a30d30f0b6c65174a7f2e8c66c9d
3
+ size 1529
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/1712011004.1220295/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpointing_steps: null
2
+ dataset_name: cifar10
3
+ gradient_accumulation_steps: 1
4
+ hub_model_id: null
5
+ hub_token: null
6
+ ignore_mismatched_sizes: false
7
+ image_column_name: img
8
+ label_column_name: label
9
+ learning_rate: 0.01
10
+ lr_scheduler_type: cosine
11
+ max_eval_samples: null
12
+ max_train_samples: null
13
+ max_train_steps: 64000
14
+ model_name_or_path: MODELS/cifnet-18-tiny_bottleneck
15
+ num_train_epochs: 193
16
+ num_warmup_steps: 6400
17
+ num_workers: 32
18
+ output_dir: OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck
19
+ per_device_eval_batch_size: 8
20
+ per_device_train_batch_size: 128
21
+ push_to_hub: false
22
+ report_to: tensorboard
23
+ resume_from_checkpoint: null
24
+ seed: 42
25
+ train_dir: null
26
+ train_val_split: 0.15
27
+ trust_remote_code: false
28
+ validation_dir: null
29
+ weight_decay: 0.0
30
+ with_tracking: true
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712011108.ids-ws-06.3571203.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b862c1881b72d740c714a4012bcff65ce8802c94fb4e24934002c572177ad3b
3
+ size 9303
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/image_classification_no_trainer/events.out.tfevents.1712011004.ids-ws-06.3571203.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3afc00d53b3e89aa960a20cb90c030a42cfa8d5313dd03b8176c719379aea1
3
+ size 7097748
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2eae9cef23d23e579a121ddde3d08a00293aaa095704518d2c94d26cbf9b273
3
+ size 1599776
OUTPUTS/cifnet-18-tiny-lr0.01-bottleneck/preprocessor_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "crop_pct",
7
+ "resample",
8
+ "do_rescale",
9
+ "rescale_factor",
10
+ "do_normalize",
11
+ "image_mean",
12
+ "image_std",
13
+ "return_tensors",
14
+ "data_format",
15
+ "input_data_format"
16
+ ],
17
+ "crop_pct": 0.875,
18
+ "do_normalize": true,
19
+ "do_rescale": true,
20
+ "do_resize": true,
21
+ "image_mean": [
22
+ 0.485,
23
+ 0.456,
24
+ 0.406
25
+ ],
26
+ "image_processor_type": "ConvNextImageProcessor",
27
+ "image_std": [
28
+ 0.229,
29
+ 0.224,
30
+ 0.225
31
+ ],
32
+ "resample": 3,
33
+ "rescale_factor": 0.00392156862745098,
34
+ "size": {
35
+ "shortest_edge": 224
36
+ }
37
+ }
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9098666666666667}
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-18",
3
+ "activation": "silu",
4
+ "architectures": [
5
+ "CifNetForImageClassification"
6
+ ],
7
+ "bottleneck_kwargs": {},
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 2,
12
+ 2
13
+ ],
14
+ "embedding_kwargs": {
15
+ "embedding_kernel_size_1": 7,
16
+ "embedding_kernel_size_2": 2,
17
+ "embedding_size": 64,
18
+ "embedding_stride_1": 2,
19
+ "embedding_stride_2": 2
20
+ },
21
+ "hidden_sizes": [
22
+ 128,
23
+ 128,
24
+ 128,
25
+ 128
26
+ ],
27
+ "id2label": {
28
+ "0": "airplane",
29
+ "1": "automobile",
30
+ "2": "bird",
31
+ "3": "cat",
32
+ "4": "deer",
33
+ "5": "dog",
34
+ "6": "frog",
35
+ "7": "horse",
36
+ "8": "ship",
37
+ "9": "truck"
38
+ },
39
+ "label2id": {
40
+ "airplane": "0",
41
+ "automobile": "1",
42
+ "bird": "2",
43
+ "cat": "3",
44
+ "deer": "4",
45
+ "dog": "5",
46
+ "frog": "6",
47
+ "horse": "7",
48
+ "ship": "8",
49
+ "truck": "9"
50
+ },
51
+ "layer_type": "basic",
52
+ "main_kernel_size": 3,
53
+ "model_type": "resnet",
54
+ "num_channels": 3,
55
+ "problem_type": "single_label_classification",
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.39.2"
58
+ }
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/1711990816.257815/events.out.tfevents.1711990816.ids-ws-06.2843209.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a93f5edcaf2926a4d524fec33a0a0cbe2c52ed4b941073b6b525159e7dc31f0
3
+ size 1515
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/1711990816.2593696/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpointing_steps: null
2
+ dataset_name: cifar10
3
+ gradient_accumulation_steps: 1
4
+ hub_model_id: null
5
+ hub_token: null
6
+ ignore_mismatched_sizes: false
7
+ image_column_name: img
8
+ label_column_name: label
9
+ learning_rate: 0.1
10
+ lr_scheduler_type: cosine
11
+ max_eval_samples: null
12
+ max_train_samples: null
13
+ max_train_steps: 64000
14
+ model_name_or_path: MODELS/cifnet-18-tiny
15
+ num_train_epochs: 193
16
+ num_warmup_steps: 6400
17
+ num_workers: 8
18
+ output_dir: OUTPUTS/cifnet-18-tiny-lr0.1-baseline
19
+ per_device_eval_batch_size: 8
20
+ per_device_train_batch_size: 64
21
+ push_to_hub: false
22
+ report_to: tensorboard
23
+ resume_from_checkpoint: null
24
+ seed: 42
25
+ train_dir: null
26
+ train_val_split: 0.15
27
+ trust_remote_code: false
28
+ validation_dir: null
29
+ weight_decay: 0.0
30
+ with_tracking: true
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1711990897.ids-ws-06.2843209.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0fae188b2abc54739437bb9e00936e6e5e100a67d09985c5d883f172266c878
3
+ size 9303
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1711990816.ids-ws-06.2843209.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b69c4138aa6e5c4f6fc9b73297938dd01c8b91fc4612f41203a91b17736ea16
3
+ size 7097748
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005878.ids-ws-06.3270132.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9192baa8ad3c9b782adb099cffa391cc790bf488e8c72444bfeb0c0e8eac8ddf
3
+ size 5224
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005910.ids-ws-06.3270878.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e863751f1f853cf1b8b12baa014129233e18414db88999a94687f642518ecd
3
+ size 5331
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/image_classification_no_trainer/events.out.tfevents.1712005974.ids-ws-06.3271814.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:885c493ef11f32ff32116742372f067a841b42b4d0b2ceaade47a0e2ac2fb468
3
+ size 8541
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7cb9e7fb91383489b4d2d42d418250d1cd7cdafbcefd3b4d922eaa0d3797991
3
+ size 8082536
OUTPUTS/cifnet-18-tiny-lr0.1-baseline/preprocessor_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "crop_pct",
7
+ "resample",
8
+ "do_rescale",
9
+ "rescale_factor",
10
+ "do_normalize",
11
+ "image_mean",
12
+ "image_std",
13
+ "return_tensors",
14
+ "data_format",
15
+ "input_data_format"
16
+ ],
17
+ "crop_pct": 0.875,
18
+ "do_normalize": true,
19
+ "do_rescale": true,
20
+ "do_resize": true,
21
+ "image_mean": [
22
+ 0.485,
23
+ 0.456,
24
+ 0.406
25
+ ],
26
+ "image_processor_type": "ConvNextImageProcessor",
27
+ "image_std": [
28
+ 0.229,
29
+ 0.224,
30
+ 0.225
31
+ ],
32
+ "resample": 3,
33
+ "rescale_factor": 0.00392156862745098,
34
+ "size": {
35
+ "shortest_edge": 224
36
+ }
37
+ }
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.8581333333333333}
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-18",
3
+ "activation": "silu",
4
+ "architectures": [
5
+ "CifNetForImageClassification"
6
+ ],
7
+ "attention_kwargs": {
8
+ "attention_bias": true,
9
+ "attention_dropout": 0.1,
10
+ "attn_channels": 8,
11
+ "attn_kernel_size": 1,
12
+ "attn_stride": 1,
13
+ "max_position_embeddings": 784,
14
+ "num_heads": 4
15
+ },
16
+ "bottleneck_kwargs": {},
17
+ "depths": [
18
+ 2,
19
+ 2,
20
+ 2,
21
+ 2
22
+ ],
23
+ "embedding_kwargs": {
24
+ "embedding_kernel_size_1": 7,
25
+ "embedding_kernel_size_2": 2,
26
+ "embedding_size": 64,
27
+ "embedding_stride_1": 2,
28
+ "embedding_stride_2": 2
29
+ },
30
+ "hidden_sizes": [
31
+ 128,
32
+ 128,
33
+ 128,
34
+ 128
35
+ ],
36
+ "id2label": {
37
+ "0": "airplane",
38
+ "1": "automobile",
39
+ "2": "bird",
40
+ "3": "cat",
41
+ "4": "deer",
42
+ "5": "dog",
43
+ "6": "frog",
44
+ "7": "horse",
45
+ "8": "ship",
46
+ "9": "truck"
47
+ },
48
+ "label2id": {
49
+ "airplane": "0",
50
+ "automobile": "1",
51
+ "bird": "2",
52
+ "cat": "3",
53
+ "deer": "4",
54
+ "dog": "5",
55
+ "frog": "6",
56
+ "horse": "7",
57
+ "ship": "8",
58
+ "truck": "9"
59
+ },
60
+ "layer_type": "attention",
61
+ "main_kernel_size": 3,
62
+ "model_type": "resnet",
63
+ "num_channels": 3,
64
+ "problem_type": "single_label_classification",
65
+ "torch_dtype": "float32",
66
+ "transformers_version": "4.39.2"
67
+ }
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/1712166496.7234852/events.out.tfevents.1712166496.ids-ws-06.1309582.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851d6e980e9dc0742f553927b6f58f145a780c79883dcdad1ee7c6679fc04472
3
+ size 1538
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/1712166496.724743/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpointing_steps: null
2
+ dataset_name: cifar10
3
+ gradient_accumulation_steps: 1
4
+ hub_model_id: null
5
+ hub_token: null
6
+ ignore_mismatched_sizes: false
7
+ image_column_name: img
8
+ label_column_name: label
9
+ learning_rate: 0.001
10
+ lr_scheduler_type: cosine
11
+ max_eval_samples: null
12
+ max_train_samples: null
13
+ max_train_steps: 64000
14
+ model_name_or_path: MODELS/cifnet-18-tiny_attention
15
+ num_train_epochs: 193
16
+ num_warmup_steps: 6400
17
+ num_workers: 32
18
+ output_dir: OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm
19
+ per_device_eval_batch_size: 8
20
+ per_device_train_batch_size: 128
21
+ push_to_hub: false
22
+ report_to: tensorboard
23
+ resume_from_checkpoint: null
24
+ seed: 42
25
+ train_dir: null
26
+ train_val_split: 0.15
27
+ trust_remote_code: false
28
+ validation_dir: null
29
+ weight_decay: 0.0
30
+ with_tracking: true
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/accuracy_accuracy/events.out.tfevents.1712166664.ids-ws-06.1309582.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8b387e8a97d4b9ee10ed2720744cdf37fe66ad7cfd1f3f337a74c80093d3fc
3
+ size 9303
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/image_classification_no_trainer/events.out.tfevents.1712166496.ids-ws-06.1309582.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e796fb32756b831eaf9a9165a930afbfb4ce88cf21f5ad967cedc3e21f7c3c7
3
+ size 7097748
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a5c087497fc208338ea1589b401818a3fe31bd68e0ee6df5026cb691977eb05
3
+ size 8558960
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/preprocessor_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "crop_pct",
7
+ "resample",
8
+ "do_rescale",
9
+ "rescale_factor",
10
+ "do_normalize",
11
+ "image_mean",
12
+ "image_std",
13
+ "return_tensors",
14
+ "data_format",
15
+ "input_data_format"
16
+ ],
17
+ "crop_pct": 0.875,
18
+ "do_normalize": true,
19
+ "do_rescale": true,
20
+ "do_resize": true,
21
+ "image_mean": [
22
+ 0.485,
23
+ 0.456,
24
+ 0.406
25
+ ],
26
+ "image_processor_type": "ConvNextImageProcessor",
27
+ "image_std": [
28
+ 0.229,
29
+ 0.224,
30
+ 0.225
31
+ ],
32
+ "resample": 3,
33
+ "rescale_factor": 0.00392156862745098,
34
+ "size": {
35
+ "shortest_edge": 224
36
+ }
37
+ }
OUTPUTS/cifnet-18-tiny_attention--lr0.001--prenorm/test_model.log ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CifNetForImageClassification(
2
+ (resnet): CifNetModel(
3
+ (embedder): CifNetEmbeddings(
4
+ (embedder): CifNetConvLayer(
5
+ (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
6
+ (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
7
+ (activation): SiLU()
8
+ )
9
+ (pooler): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
10
+ )
11
+ (encoder): CifNetEncoder(
12
+ (stages): ModuleList(
13
+ (0): CifNetStage(
14
+ (layers): Sequential(
15
+ (0): CifNetSelfAttentionLayer(
16
+ (shortcut): CifNetShortCut(
17
+ (convolution): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
18
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
19
+ )
20
+ (in_conv): CifNetConvLayer(
21
+ (convolution): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
22
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
23
+ (activation): SiLU()
24
+ )
25
+ (attention): CifNetSelfAttention(
26
+ (q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
27
+ (k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
28
+ (v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
29
+ (o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
30
+ )
31
+ (activation): SiLU()
32
+ (attention_norm): CifNetRMSNorm()
33
+ (out_conv): CifNetConvLayer(
34
+ (convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
35
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
36
+ (activation): SiLU()
37
+ )
38
+ )
39
+ )
40
+ )
41
+ (1-3): 3 x CifNetStage(
42
+ (layers): Sequential(
43
+ (0): CifNetSelfAttentionLayer(
44
+ (shortcut): Identity()
45
+ (in_conv): CifNetConvLayer(
46
+ (convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
47
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
48
+ (activation): SiLU()
49
+ )
50
+ (attention): CifNetSelfAttention(
51
+ (q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
52
+ (k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
53
+ (v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
54
+ (o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
55
+ )
56
+ (activation): SiLU()
57
+ (attention_norm): CifNetRMSNorm()
58
+ (out_conv): CifNetConvLayer(
59
+ (convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
60
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
61
+ (activation): SiLU()
62
+ )
63
+ )
64
+ (1): CifNetSelfAttentionLayer(
65
+ (shortcut): Identity()
66
+ (in_conv): CifNetConvLayer(
67
+ (convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
68
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
69
+ (activation): SiLU()
70
+ )
71
+ (attention): CifNetSelfAttention(
72
+ (q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
73
+ (k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
74
+ (v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
75
+ (o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
76
+ )
77
+ (activation): SiLU()
78
+ (attention_norm): CifNetRMSNorm()
79
+ (out_conv): CifNetConvLayer(
80
+ (convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
81
+ (normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
82
+ (activation): SiLU()
83
+ )
84
+ )
85
+ )
86
+ )
87
+ )
88
+ )
89
+ (pooler): AdaptiveAvgPool2d(output_size=(1, 1))
90
+ )
91
+ (classifier): Sequential(
92
+ (0): Flatten(start_dim=1, end_dim=-1)
93
+ (1): Linear(in_features=128, out_features=10, bias=True)
94
+ )
95
+ )
96
+ ----------------------------------------------------------------
97
+ Layer (type) Output Shape Param #
98
+ ================================================================
99
+ Conv2d-1 [4, 64, 112, 112] 9,408
100
+ BatchNorm2d-2 [4, 64, 112, 112] 128
101
+ SiLU-3 [4, 64, 112, 112] 0
102
+ CifNetConvLayer-4 [4, 64, 112, 112] 0
103
+ MaxPool2d-5 [4, 64, 56, 56] 0
104
+ CifNetEmbeddings-6 [4, 64, 56, 56] 0
105
+ Conv2d-7 [4, 128, 28, 28] 73,728
106
+ BatchNorm2d-8 [4, 128, 28, 28] 256
107
+ SiLU-9 [4, 128, 28, 28] 0
108
+ CifNetConvLayer-10 [4, 128, 28, 28] 0
109
+ CifNetRMSNorm-11 [4, 28, 28, 128] 128
110
+ Conv2d-12 [4, 32, 28, 28] 4,128
111
+ Conv2d-13 [4, 32, 28, 28] 4,128
112
+ Conv2d-14 [4, 32, 28, 28] 4,128
113
+ Conv2d-15 [4, 128, 28, 28] 4,224
114
+ CifNetSelfAttention-16 [4, 128, 28, 28] 0
115
+ SiLU-17 [4, 128, 28, 28] 0
116
+ Conv2d-18 [4, 128, 28, 28] 147,456
117
+ BatchNorm2d-19 [4, 128, 28, 28] 256
118
+ SiLU-20 [4, 128, 28, 28] 0
119
+ CifNetConvLayer-21 [4, 128, 28, 28] 0
120
+ Conv2d-22 [4, 128, 28, 28] 8,192
121
+ BatchNorm2d-23 [4, 128, 28, 28] 256
122
+ CifNetShortCut-24 [4, 128, 28, 28] 0
123
+ CifNetSelfAttentionLayer-25 [4, 128, 28, 28] 0
124
+ CifNetStage-26 [4, 128, 28, 28] 0
125
+ Conv2d-27 [4, 128, 28, 28] 147,456
126
+ BatchNorm2d-28 [4, 128, 28, 28] 256
127
+ SiLU-29 [4, 128, 28, 28] 0
128
+ CifNetConvLayer-30 [4, 128, 28, 28] 0
129
+ CifNetRMSNorm-31 [4, 28, 28, 128] 128
130
+ Conv2d-32 [4, 32, 28, 28] 4,128
131
+ Conv2d-33 [4, 32, 28, 28] 4,128
132
+ Conv2d-34 [4, 32, 28, 28] 4,128
133
+ Conv2d-35 [4, 128, 28, 28] 4,224
134
+ CifNetSelfAttention-36 [4, 128, 28, 28] 0
135
+ SiLU-37 [4, 128, 28, 28] 0
136
+ Conv2d-38 [4, 128, 28, 28] 147,456
137
+ BatchNorm2d-39 [4, 128, 28, 28] 256
138
+ SiLU-40 [4, 128, 28, 28] 0
139
+ CifNetConvLayer-41 [4, 128, 28, 28] 0
140
+ Identity-42 [4, 128, 28, 28] 0
141
+ CifNetSelfAttentionLayer-43 [4, 128, 28, 28] 0
142
+ Conv2d-44 [4, 128, 28, 28] 147,456
143
+ BatchNorm2d-45 [4, 128, 28, 28] 256
144
+ SiLU-46 [4, 128, 28, 28] 0
145
+ CifNetConvLayer-47 [4, 128, 28, 28] 0
146
+ CifNetRMSNorm-48 [4, 28, 28, 128] 128
147
+ Conv2d-49 [4, 32, 28, 28] 4,128
148
+ Conv2d-50 [4, 32, 28, 28] 4,128
149
+ Conv2d-51 [4, 32, 28, 28] 4,128
150
+ Conv2d-52 [4, 128, 28, 28] 4,224
151
+ CifNetSelfAttention-53 [4, 128, 28, 28] 0
152
+ SiLU-54 [4, 128, 28, 28] 0
153
+ Conv2d-55 [4, 128, 28, 28] 147,456
154
+ BatchNorm2d-56 [4, 128, 28, 28] 256
155
+ SiLU-57 [4, 128, 28, 28] 0
156
+ CifNetConvLayer-58 [4, 128, 28, 28] 0
157
+ Identity-59 [4, 128, 28, 28] 0
158
+ CifNetSelfAttentionLayer-60 [4, 128, 28, 28] 0
159
+ CifNetStage-61 [4, 128, 28, 28] 0
160
+ Conv2d-62 [4, 128, 28, 28] 147,456
161
+ BatchNorm2d-63 [4, 128, 28, 28] 256
162
+ SiLU-64 [4, 128, 28, 28] 0
163
+ CifNetConvLayer-65 [4, 128, 28, 28] 0
164
+ CifNetRMSNorm-66 [4, 28, 28, 128] 128
165
+ Conv2d-67 [4, 32, 28, 28] 4,128
166
+ Conv2d-68 [4, 32, 28, 28] 4,128
167
+ Conv2d-69 [4, 32, 28, 28] 4,128
168
+ Conv2d-70 [4, 128, 28, 28] 4,224
169
+ CifNetSelfAttention-71 [4, 128, 28, 28] 0
170
+ SiLU-72 [4, 128, 28, 28] 0
171
+ Conv2d-73 [4, 128, 28, 28] 147,456
172
+ BatchNorm2d-74 [4, 128, 28, 28] 256
173
+ SiLU-75 [4, 128, 28, 28] 0
174
+ CifNetConvLayer-76 [4, 128, 28, 28] 0
175
+ Identity-77 [4, 128, 28, 28] 0
176
+ CifNetSelfAttentionLayer-78 [4, 128, 28, 28] 0
177
+ Conv2d-79 [4, 128, 28, 28] 147,456
178
+ BatchNorm2d-80 [4, 128, 28, 28] 256
179
+ SiLU-81 [4, 128, 28, 28] 0
180
+ CifNetConvLayer-82 [4, 128, 28, 28] 0
181
+ CifNetRMSNorm-83 [4, 28, 28, 128] 128
182
+ Conv2d-84 [4, 32, 28, 28] 4,128
183
+ Conv2d-85 [4, 32, 28, 28] 4,128
184
+ Conv2d-86 [4, 32, 28, 28] 4,128
185
+ Conv2d-87 [4, 128, 28, 28] 4,224
186
+ CifNetSelfAttention-88 [4, 128, 28, 28] 0
187
+ SiLU-89 [4, 128, 28, 28] 0
188
+ Conv2d-90 [4, 128, 28, 28] 147,456
189
+ BatchNorm2d-91 [4, 128, 28, 28] 256
190
+ SiLU-92 [4, 128, 28, 28] 0
191
+ CifNetConvLayer-93 [4, 128, 28, 28] 0
192
+ Identity-94 [4, 128, 28, 28] 0
193
+ CifNetSelfAttentionLayer-95 [4, 128, 28, 28] 0
194
+ CifNetStage-96 [4, 128, 28, 28] 0
195
+ Conv2d-97 [4, 128, 28, 28] 147,456
196
+ BatchNorm2d-98 [4, 128, 28, 28] 256
197
+ SiLU-99 [4, 128, 28, 28] 0
198
+ CifNetConvLayer-100 [4, 128, 28, 28] 0
199
+ CifNetRMSNorm-101 [4, 28, 28, 128] 128
200
+ Conv2d-102 [4, 32, 28, 28] 4,128
201
+ Conv2d-103 [4, 32, 28, 28] 4,128
202
+ Conv2d-104 [4, 32, 28, 28] 4,128
203
+ Conv2d-105 [4, 128, 28, 28] 4,224
204
+ CifNetSelfAttention-106 [4, 128, 28, 28] 0
205
+ SiLU-107 [4, 128, 28, 28] 0
206
+ Conv2d-108 [4, 128, 28, 28] 147,456
207
+ BatchNorm2d-109 [4, 128, 28, 28] 256
208
+ SiLU-110 [4, 128, 28, 28] 0
209
+ CifNetConvLayer-111 [4, 128, 28, 28] 0
210
+ Identity-112 [4, 128, 28, 28] 0
211
+ CifNetSelfAttentionLayer-113 [4, 128, 28, 28] 0
212
+ Conv2d-114 [4, 128, 28, 28] 147,456
213
+ BatchNorm2d-115 [4, 128, 28, 28] 256
214
+ SiLU-116 [4, 128, 28, 28] 0
215
+ CifNetConvLayer-117 [4, 128, 28, 28] 0
216
+ CifNetRMSNorm-118 [4, 28, 28, 128] 128
217
+ Conv2d-119 [4, 32, 28, 28] 4,128
218
+ Conv2d-120 [4, 32, 28, 28] 4,128
219
+ Conv2d-121 [4, 32, 28, 28] 4,128
220
+ Conv2d-122 [4, 128, 28, 28] 4,224
221
+ CifNetSelfAttention-123 [4, 128, 28, 28] 0
222
+ SiLU-124 [4, 128, 28, 28] 0
223
+ Conv2d-125 [4, 128, 28, 28] 147,456
224
+ BatchNorm2d-126 [4, 128, 28, 28] 256
225
+ SiLU-127 [4, 128, 28, 28] 0
226
+ CifNetConvLayer-128 [4, 128, 28, 28] 0
227
+ Identity-129 [4, 128, 28, 28] 0
228
+ CifNetSelfAttentionLayer-130 [4, 128, 28, 28] 0
229
+ CifNetStage-131 [4, 128, 28, 28] 0
230
+ CifNetEncoder-132 [[-1, 128, 28, 28]] 0
231
+ AdaptiveAvgPool2d-133 [4, 128, 1, 1] 0
232
+ CifNetModel-134 [[-1, 128, 28, 28], [-1, 128, 1, 1]] 0
233
+ Flatten-135 [4, 128] 0
234
+ Linear-136 [4, 10] 1,290
235
+ ================================================================
236
+ Total params: 2,130,666
237
+ Trainable params: 2,130,666
238
+ Non-trainable params: 0
239
+ ----------------------------------------------------------------
240
+ Input size (MB): 2.30
241
+ Forward/backward pass size (MB): 542.07
242
+ Params size (MB): 8.13
243
+ Estimated Total Size (MB): 552.50
244
+ ----------------------------------------------------------------