obudzecie commited on
Commit
9bea82c
1 Parent(s): 41b0f6c

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-2/checkpoint-54/model.safetensors +1 -1
  3. run-2/checkpoint-54/optimizer.pt +1 -1
  4. run-2/checkpoint-54/rng_state.pth +2 -2
  5. run-2/checkpoint-54/scheduler.pt +1 -1
  6. run-2/checkpoint-54/trainer_state.json +13 -13
  7. run-2/checkpoint-54/training_args.bin +1 -1
  8. run-2/checkpoint-81/config.json +34 -0
  9. run-2/checkpoint-81/model.safetensors +3 -0
  10. run-2/checkpoint-81/optimizer.pt +3 -0
  11. run-2/checkpoint-81/rng_state.pth +3 -0
  12. run-2/checkpoint-81/scheduler.pt +3 -0
  13. run-2/checkpoint-81/special_tokens_map.json +7 -0
  14. run-2/checkpoint-81/tokenizer.json +0 -0
  15. run-2/checkpoint-81/tokenizer_config.json +55 -0
  16. run-2/checkpoint-81/trainer_state.json +53 -0
  17. run-2/checkpoint-81/training_args.bin +3 -0
  18. run-2/checkpoint-81/vocab.txt +0 -0
  19. run-3/checkpoint-108/model.safetensors +1 -1
  20. run-3/checkpoint-108/optimizer.pt +1 -1
  21. run-3/checkpoint-108/rng_state.pth +2 -2
  22. run-3/checkpoint-108/scheduler.pt +1 -1
  23. run-3/checkpoint-108/trainer_state.json +18 -36
  24. run-3/checkpoint-108/training_args.bin +1 -1
  25. run-3/checkpoint-162/config.json +34 -0
  26. run-3/checkpoint-162/model.safetensors +3 -0
  27. run-3/checkpoint-162/optimizer.pt +3 -0
  28. run-3/checkpoint-162/rng_state.pth +3 -0
  29. run-3/checkpoint-162/scheduler.pt +3 -0
  30. run-3/checkpoint-162/special_tokens_map.json +7 -0
  31. run-3/checkpoint-162/tokenizer.json +0 -0
  32. run-3/checkpoint-162/tokenizer_config.json +55 -0
  33. run-3/checkpoint-162/trainer_state.json +53 -0
  34. run-3/checkpoint-162/training_args.bin +3 -0
  35. run-3/checkpoint-162/vocab.txt +0 -0
  36. run-3/checkpoint-216/config.json +34 -0
  37. run-3/checkpoint-216/model.safetensors +3 -0
  38. run-3/checkpoint-216/optimizer.pt +3 -0
  39. run-3/checkpoint-216/rng_state.pth +3 -0
  40. run-3/checkpoint-216/scheduler.pt +3 -0
  41. run-3/checkpoint-216/special_tokens_map.json +7 -0
  42. run-3/checkpoint-216/tokenizer.json +0 -0
  43. run-3/checkpoint-216/tokenizer_config.json +55 -0
  44. run-3/checkpoint-216/trainer_state.json +62 -0
  45. run-3/checkpoint-216/training_args.bin +3 -0
  46. run-3/checkpoint-216/vocab.txt +0 -0
  47. run-3/checkpoint-270/config.json +34 -0
  48. run-3/checkpoint-270/model.safetensors +3 -0
  49. run-3/checkpoint-270/optimizer.pt +3 -0
  50. run-3/checkpoint-270/rng_state.pth +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51fb9887879e7731535630c099dee14cb777dff78dbf5ccc6fd9176a23bea52e
3
  size 98470112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982481fd3ffbd778cdcb14273c00726a5ba3ed5a1e7cde523147cbb155bb6724
3
  size 98470112
run-2/checkpoint-54/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99ce5cba71ab627aa4d3b1694264448dd6b7c33fa58fdf00e3b68101b022d151
3
  size 98470112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540137d39291365f7824650a486a1cfac19ef52ae7195604089bd9d2c38a7bb2
3
  size 98470112
run-2/checkpoint-54/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecf9090313f1d1e637238680271b34cf9e7a19532a6474f42fe7432fe43e0ff6
3
  size 197593757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c3954f9d5c69a21026987e5a2f822665171405111208af2594a91ce0f04127
3
  size 197593757
run-2/checkpoint-54/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d07fb318ad742c1c4d329e1f07ad38c5c4a57d1f01ad4efb31257c2bccb417a
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74113234244fc59c10f06606eecdcf6b721037b8c5df7d0a6d0e8785fe5adefe
3
+ size 14308
run-2/checkpoint-54/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9b71680a09b19af463162c25c87905ddd0917ca13d590913abe6708a7d33215
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95cbb4865a285b137707b40f5ecac3bf0e184eb2c4ca71a2c50d70f30cfe2ee
3
  size 1064
run-2/checkpoint-54/trainer_state.json CHANGED
@@ -10,35 +10,35 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 18281.876953125,
14
  "eval_matthews_correlation": 0.0,
15
- "eval_runtime": 2.4635,
16
- "eval_samples_per_second": 423.379,
17
- "eval_steps_per_second": 26.791,
18
  "step": 27
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_loss": 866.8018798828125,
23
  "eval_matthews_correlation": 0.0,
24
- "eval_runtime": 2.6773,
25
- "eval_samples_per_second": 389.575,
26
- "eval_steps_per_second": 24.652,
27
  "step": 54
28
  }
29
  ],
30
  "logging_steps": 500,
31
- "max_steps": 54,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 2,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
  "train_batch_size": 32,
37
  "trial_name": null,
38
  "trial_params": {
39
- "learning_rate": 2.81582432447674e-05,
40
- "num_train_epochs": 2,
41
  "per_device_train_batch_size": 32,
42
- "seed": 5
43
  }
44
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 72.75434112548828,
14
  "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 2.5623,
16
+ "eval_samples_per_second": 407.053,
17
+ "eval_steps_per_second": 25.758,
18
  "step": 27
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_loss": 10.623336791992188,
23
  "eval_matthews_correlation": 0.0,
24
+ "eval_runtime": 2.7349,
25
+ "eval_samples_per_second": 381.366,
26
+ "eval_steps_per_second": 24.132,
27
  "step": 54
28
  }
29
  ],
30
  "logging_steps": 500,
31
+ "max_steps": 81,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 3,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
  "train_batch_size": 32,
37
  "trial_name": null,
38
  "trial_params": {
39
+ "learning_rate": 3.553146470021689e-05,
40
+ "num_train_epochs": 3,
41
  "per_device_train_batch_size": 32,
42
+ "seed": 20
43
  }
44
  }
run-2/checkpoint-54/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1b81ffb403f14950689bf76af4de34d59644f8842de05b90ea8aa4ee6730d4b
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a07d48cd4cae2aa7c1351f9ead38dc0c59969fb1a8ad67e6d55ee9559d5a17
3
  size 4920
run-2/checkpoint-81/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "architectures": [
4
+ "MobileBertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.38.2",
28
+ "trigram_input": true,
29
+ "true_hidden_size": 128,
30
+ "type_vocab_size": 2,
31
+ "use_bottleneck": true,
32
+ "use_bottleneck_attention": false,
33
+ "vocab_size": 30522
34
+ }
run-2/checkpoint-81/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3738ca6347fd1f127642d598a7b3a19a903e4433bb5efecde23ac19a3acae679
3
+ size 98470112
run-2/checkpoint-81/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7ace3ef14c24a134ce0da52b1a43ff2045995ecf02ec1f3ae6447690e7e000
3
+ size 197593757
run-2/checkpoint-81/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54952032a35a217f046acb952da399620f982376d56f2ed574a821f837d6b3d0
3
+ size 14308
run-2/checkpoint-81/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0780250fc8eb36278683f736501b86df6bf7614542c9d6a3799cc8cb351bb6
3
+ size 1064
run-2/checkpoint-81/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-81/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-81/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 1000000000000000019884624838656,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "MobileBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-2/checkpoint-81/trainer_state.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.017987145841259707,
3
+ "best_model_checkpoint": "mobilebert-uncased-finetuned-cola/run-2/checkpoint-81",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 81,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 72.75434112548828,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 2.5623,
16
+ "eval_samples_per_second": 407.053,
17
+ "eval_steps_per_second": 25.758,
18
+ "step": 27
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 10.623336791992188,
23
+ "eval_matthews_correlation": 0.0,
24
+ "eval_runtime": 2.7349,
25
+ "eval_samples_per_second": 381.366,
26
+ "eval_steps_per_second": 24.132,
27
+ "step": 54
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_loss": 2.500556707382202,
32
+ "eval_matthews_correlation": 0.017987145841259707,
33
+ "eval_runtime": 2.6161,
34
+ "eval_samples_per_second": 398.684,
35
+ "eval_steps_per_second": 25.228,
36
+ "step": 81
37
+ }
38
+ ],
39
+ "logging_steps": 500,
40
+ "max_steps": 81,
41
+ "num_input_tokens_seen": 0,
42
+ "num_train_epochs": 3,
43
+ "save_steps": 500,
44
+ "total_flos": 0,
45
+ "train_batch_size": 32,
46
+ "trial_name": null,
47
+ "trial_params": {
48
+ "learning_rate": 3.553146470021689e-05,
49
+ "num_train_epochs": 3,
50
+ "per_device_train_batch_size": 32,
51
+ "seed": 20
52
+ }
53
+ }
run-2/checkpoint-81/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a07d48cd4cae2aa7c1351f9ead38dc0c59969fb1a8ad67e6d55ee9559d5a17
3
+ size 4920
run-2/checkpoint-81/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-108/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c1a9b3f0bef5f4c5d8933f6aadf43a5d25cf1e5fd3711ad4ee734f31ff5c2c
3
  size 98470112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b355b3e5611301994aa0d7b523facf721d6461b99a5e5a493ff78a423d6c06
3
  size 98470112
run-3/checkpoint-108/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eeb150a98b573cd4f89207d5af2963be915b7797bfad8b8b75e9d6222e70369
3
  size 197593757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0427b96717b17debe88f508659c41b7c9981597d73895c6fb1c15faa2894bfd7
3
  size 197593757
run-3/checkpoint-108/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80287e3a3f24b9b81a61fdf60a7e439c30cafc7c85c637ed40d0db4ef194eee9
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6f4fa8c35e4cbe184ee3aca0c688132276e5921db6dd846d90d25701703ad5
3
+ size 14244
run-3/checkpoint-108/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77c5331ad403b52131b235442d04fdcd60e1af6ebb4f844c84f85483f181f5c5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90cd5425ddfb5a55dfddfc4e04016b7b905cbe8b3ae46ed8c40efe875a2594bc
3
  size 1064
run-3/checkpoint-108/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.1820081432026257,
3
  "best_model_checkpoint": "mobilebert-uncased-finetuned-cola/run-3/checkpoint-108",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 108,
7
  "is_hyper_param_search": true,
@@ -10,53 +10,35 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.7651640176773071,
14
- "eval_matthews_correlation": 0.00286100001416597,
15
- "eval_runtime": 2.6861,
16
- "eval_samples_per_second": 388.297,
17
- "eval_steps_per_second": 24.571,
18
- "step": 27
19
- },
20
- {
21
- "epoch": 2.0,
22
- "eval_loss": 0.6181267499923706,
23
- "eval_matthews_correlation": 0.07738463889453959,
24
- "eval_runtime": 2.7756,
25
- "eval_samples_per_second": 375.772,
26
- "eval_steps_per_second": 23.778,
27
  "step": 54
28
  },
29
  {
30
- "epoch": 3.0,
31
- "eval_loss": 0.6142112612724304,
32
- "eval_matthews_correlation": 0.10436722000562923,
33
- "eval_runtime": 2.8806,
34
- "eval_samples_per_second": 362.077,
35
- "eval_steps_per_second": 22.912,
36
- "step": 81
37
- },
38
- {
39
- "epoch": 4.0,
40
- "eval_loss": 0.6295883059501648,
41
- "eval_matthews_correlation": 0.1820081432026257,
42
- "eval_runtime": 2.6099,
43
- "eval_samples_per_second": 399.635,
44
- "eval_steps_per_second": 25.289,
45
  "step": 108
46
  }
47
  ],
48
  "logging_steps": 500,
49
- "max_steps": 135,
50
  "num_input_tokens_seen": 0,
51
  "num_train_epochs": 5,
52
  "save_steps": 500,
53
  "total_flos": 0,
54
- "train_batch_size": 32,
55
  "trial_name": null,
56
  "trial_params": {
57
- "learning_rate": 7.240804873707743e-05,
58
  "num_train_epochs": 5,
59
- "per_device_train_batch_size": 32,
60
- "seed": 26
61
  }
62
  }
 
1
  {
2
+ "best_metric": 0.04097530686838422,
3
  "best_model_checkpoint": "mobilebert-uncased-finetuned-cola/run-3/checkpoint-108",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 108,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 196.7971649169922,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 2.5752,
16
+ "eval_samples_per_second": 405.024,
17
+ "eval_steps_per_second": 25.63,
 
 
 
 
 
 
 
 
 
18
  "step": 54
19
  },
20
  {
21
+ "epoch": 2.0,
22
+ "eval_loss": 3.1003661155700684,
23
+ "eval_matthews_correlation": 0.04097530686838422,
24
+ "eval_runtime": 2.7354,
25
+ "eval_samples_per_second": 381.302,
26
+ "eval_steps_per_second": 24.128,
 
 
 
 
 
 
 
 
 
27
  "step": 108
28
  }
29
  ],
30
  "logging_steps": 500,
31
+ "max_steps": 270,
32
  "num_input_tokens_seen": 0,
33
  "num_train_epochs": 5,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
+ "train_batch_size": 16,
37
  "trial_name": null,
38
  "trial_params": {
39
+ "learning_rate": 1.6103502735474484e-05,
40
  "num_train_epochs": 5,
41
+ "per_device_train_batch_size": 16,
42
+ "seed": 6
43
  }
44
  }
run-3/checkpoint-108/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36785e41a5c6212d878c92ad6ba2710c2bfea93bb0742241dc2ee9d2b361a3a8
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc97d4fded77640ee20a5deba4627ded60c972cf60dd6d6c19e1214346bf5c66
3
  size 4920
run-3/checkpoint-162/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "architectures": [
4
+ "MobileBertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.38.2",
28
+ "trigram_input": true,
29
+ "true_hidden_size": 128,
30
+ "type_vocab_size": 2,
31
+ "use_bottleneck": true,
32
+ "use_bottleneck_attention": false,
33
+ "vocab_size": 30522
34
+ }
run-3/checkpoint-162/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad09e78a65667a6d70a803c7bf5922b369a4dc06befdb5003990e2b81f550fb
3
+ size 98470112
run-3/checkpoint-162/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c42bd7e1675c99c5d39e16233f020cbc33eba62f8249e6ae5fe6eabeb69231
3
+ size 197593757
run-3/checkpoint-162/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c02ad87af1fadff9f05de95e8b3889ce91684ff9706a07526c0936bc54f646
3
+ size 14244
run-3/checkpoint-162/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02b6f673c982dda014ce1d94bfe20abc9359cf878f1331b47a02a753187972f
3
+ size 1064
run-3/checkpoint-162/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-162/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-162/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 1000000000000000019884624838656,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "MobileBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-3/checkpoint-162/trainer_state.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.04097530686838422,
3
+ "best_model_checkpoint": "mobilebert-uncased-finetuned-cola/run-3/checkpoint-108",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 162,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 196.7971649169922,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 2.5752,
16
+ "eval_samples_per_second": 405.024,
17
+ "eval_steps_per_second": 25.63,
18
+ "step": 54
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 3.1003661155700684,
23
+ "eval_matthews_correlation": 0.04097530686838422,
24
+ "eval_runtime": 2.7354,
25
+ "eval_samples_per_second": 381.302,
26
+ "eval_steps_per_second": 24.128,
27
+ "step": 108
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_loss": 2.9685006141662598,
32
+ "eval_matthews_correlation": -0.005957385967432711,
33
+ "eval_runtime": 2.5346,
34
+ "eval_samples_per_second": 411.508,
35
+ "eval_steps_per_second": 26.04,
36
+ "step": 162
37
+ }
38
+ ],
39
+ "logging_steps": 500,
40
+ "max_steps": 270,
41
+ "num_input_tokens_seen": 0,
42
+ "num_train_epochs": 5,
43
+ "save_steps": 500,
44
+ "total_flos": 0,
45
+ "train_batch_size": 16,
46
+ "trial_name": null,
47
+ "trial_params": {
48
+ "learning_rate": 1.6103502735474484e-05,
49
+ "num_train_epochs": 5,
50
+ "per_device_train_batch_size": 16,
51
+ "seed": 6
52
+ }
53
+ }
run-3/checkpoint-162/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc97d4fded77640ee20a5deba4627ded60c972cf60dd6d6c19e1214346bf5c66
3
+ size 4920
run-3/checkpoint-162/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-216/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "architectures": [
4
+ "MobileBertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.38.2",
28
+ "trigram_input": true,
29
+ "true_hidden_size": 128,
30
+ "type_vocab_size": 2,
31
+ "use_bottleneck": true,
32
+ "use_bottleneck_attention": false,
33
+ "vocab_size": 30522
34
+ }
run-3/checkpoint-216/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65341f6c3ec2278bc5b711c0f713221af18dde894e801cd0e71b7e7447a6e371
3
+ size 98470112
run-3/checkpoint-216/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db70ea0e8015143abe7343270b839b150026b0720c86e14d573a850b14dda820
3
+ size 197593757
run-3/checkpoint-216/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee9b3783190c1317315fb76c619ca924c1add831ec42ba404c15552f94086da
3
+ size 14244
run-3/checkpoint-216/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb220ca70173bbc62a4e296abdd4ec2140fde11a49e105764310c081b50be40
3
+ size 1064
run-3/checkpoint-216/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-216/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-216/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 1000000000000000019884624838656,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "MobileBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-3/checkpoint-216/trainer_state.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.04097530686838422,
3
+ "best_model_checkpoint": "mobilebert-uncased-finetuned-cola/run-3/checkpoint-108",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 216,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 196.7971649169922,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 2.5752,
16
+ "eval_samples_per_second": 405.024,
17
+ "eval_steps_per_second": 25.63,
18
+ "step": 54
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 3.1003661155700684,
23
+ "eval_matthews_correlation": 0.04097530686838422,
24
+ "eval_runtime": 2.7354,
25
+ "eval_samples_per_second": 381.302,
26
+ "eval_steps_per_second": 24.128,
27
+ "step": 108
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_loss": 2.9685006141662598,
32
+ "eval_matthews_correlation": -0.005957385967432711,
33
+ "eval_runtime": 2.5346,
34
+ "eval_samples_per_second": 411.508,
35
+ "eval_steps_per_second": 26.04,
36
+ "step": 162
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_loss": 2.4282734394073486,
41
+ "eval_matthews_correlation": 0.00286100001416597,
42
+ "eval_runtime": 2.3378,
43
+ "eval_samples_per_second": 446.151,
44
+ "eval_steps_per_second": 28.232,
45
+ "step": 216
46
+ }
47
+ ],
48
+ "logging_steps": 500,
49
+ "max_steps": 270,
50
+ "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 5,
52
+ "save_steps": 500,
53
+ "total_flos": 0,
54
+ "train_batch_size": 16,
55
+ "trial_name": null,
56
+ "trial_params": {
57
+ "learning_rate": 1.6103502735474484e-05,
58
+ "num_train_epochs": 5,
59
+ "per_device_train_batch_size": 16,
60
+ "seed": 6
61
+ }
62
+ }
run-3/checkpoint-216/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc97d4fded77640ee20a5deba4627ded60c972cf60dd6d6c19e1214346bf5c66
3
+ size 4920
run-3/checkpoint-216/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-270/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "architectures": [
4
+ "MobileBertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_activation": false,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 128,
10
+ "hidden_act": "relu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "intra_bottleneck_size": 128,
16
+ "key_query_shared_bottleneck": true,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "mobilebert",
20
+ "normalization_type": "no_norm",
21
+ "num_attention_heads": 4,
22
+ "num_feedforward_networks": 4,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.38.2",
28
+ "trigram_input": true,
29
+ "true_hidden_size": 128,
30
+ "type_vocab_size": 2,
31
+ "use_bottleneck": true,
32
+ "use_bottleneck_attention": false,
33
+ "vocab_size": 30522
34
+ }
run-3/checkpoint-270/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2c17bc9d87b8143082f24f5be0029f79fd313c910d347877b7c483915addc6
3
+ size 98470112
run-3/checkpoint-270/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb59a0437619462e22672017e9657acea9d24429718a867fec9221d581c78a0
3
+ size 197593757
run-3/checkpoint-270/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1fc33b1ee8e300f7c89ce269a0d25460f295944da1e930073487c841a79c04c
3
+ size 14244