Ghaith Dekhili commited on
Commit
c39701d
1 Parent(s): 7a399b7

add qqp first model

Browse files
Files changed (45) hide show
  1. qqp_#1_glue_cased/checkpoint-11371/config.json +22 -0
  2. qqp_#1_glue_cased/checkpoint-11371/optimizer.pt +3 -0
  3. qqp_#1_glue_cased/checkpoint-11371/pytorch_model.bin +3 -0
  4. qqp_#1_glue_cased/checkpoint-11371/scheduler.pt +3 -0
  5. qqp_#1_glue_cased/checkpoint-11371/special_tokens_map.json +1 -0
  6. qqp_#1_glue_cased/checkpoint-11371/tokenizer_config.json +1 -0
  7. qqp_#1_glue_cased/checkpoint-11371/trainer_state.json +155 -0
  8. qqp_#1_glue_cased/checkpoint-11371/training_args.bin +3 -0
  9. qqp_#1_glue_cased/checkpoint-11371/vocab.txt +0 -0
  10. qqp_#1_glue_cased/checkpoint-22742/config.json +22 -0
  11. qqp_#1_glue_cased/checkpoint-22742/optimizer.pt +3 -0
  12. qqp_#1_glue_cased/checkpoint-22742/pytorch_model.bin +3 -0
  13. qqp_#1_glue_cased/checkpoint-22742/scheduler.pt +3 -0
  14. qqp_#1_glue_cased/checkpoint-22742/special_tokens_map.json +1 -0
  15. qqp_#1_glue_cased/checkpoint-22742/tokenizer_config.json +1 -0
  16. qqp_#1_glue_cased/checkpoint-22742/trainer_state.json +300 -0
  17. qqp_#1_glue_cased/checkpoint-22742/training_args.bin +3 -0
  18. qqp_#1_glue_cased/checkpoint-22742/vocab.txt +0 -0
  19. qqp_#1_glue_cased/checkpoint-34113/config.json +22 -0
  20. qqp_#1_glue_cased/checkpoint-34113/optimizer.pt +3 -0
  21. qqp_#1_glue_cased/checkpoint-34113/pytorch_model.bin +3 -0
  22. qqp_#1_glue_cased/checkpoint-34113/scheduler.pt +3 -0
  23. qqp_#1_glue_cased/checkpoint-34113/special_tokens_map.json +1 -0
  24. qqp_#1_glue_cased/checkpoint-34113/tokenizer_config.json +1 -0
  25. qqp_#1_glue_cased/checkpoint-34113/trainer_state.json +445 -0
  26. qqp_#1_glue_cased/checkpoint-34113/training_args.bin +3 -0
  27. qqp_#1_glue_cased/checkpoint-34113/vocab.txt +0 -0
  28. qqp_#1_glue_cased/checkpoint-45484/config.json +22 -0
  29. qqp_#1_glue_cased/checkpoint-45484/optimizer.pt +3 -0
  30. qqp_#1_glue_cased/checkpoint-45484/pytorch_model.bin +3 -0
  31. qqp_#1_glue_cased/checkpoint-45484/scheduler.pt +3 -0
  32. qqp_#1_glue_cased/checkpoint-45484/special_tokens_map.json +1 -0
  33. qqp_#1_glue_cased/checkpoint-45484/tokenizer_config.json +1 -0
  34. qqp_#1_glue_cased/checkpoint-45484/trainer_state.json +584 -0
  35. qqp_#1_glue_cased/checkpoint-45484/training_args.bin +3 -0
  36. qqp_#1_glue_cased/checkpoint-45484/vocab.txt +0 -0
  37. qqp_#1_glue_cased/checkpoint-56855/config.json +22 -0
  38. qqp_#1_glue_cased/checkpoint-56855/optimizer.pt +3 -0
  39. qqp_#1_glue_cased/checkpoint-56855/pytorch_model.bin +3 -0
  40. qqp_#1_glue_cased/checkpoint-56855/scheduler.pt +3 -0
  41. qqp_#1_glue_cased/checkpoint-56855/special_tokens_map.json +1 -0
  42. qqp_#1_glue_cased/checkpoint-56855/tokenizer_config.json +1 -0
  43. qqp_#1_glue_cased/checkpoint-56855/trainer_state.json +729 -0
  44. qqp_#1_glue_cased/checkpoint-56855/training_args.bin +3 -0
  45. qqp_#1_glue_cased/checkpoint-56855/vocab.txt +0 -0
qqp_#1_glue_cased/checkpoint-11371/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "type_vocab_size": 2,
21
+ "vocab_size": 30522
22
+ }
qqp_#1_glue_cased/checkpoint-11371/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f15d2880a57a078d7960afa87712ffcbfe2cca2aed40b9e8c5ce024643288e0
3
+ size 875930250
qqp_#1_glue_cased/checkpoint-11371/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc88dc08cf1dfb8aa3cb052fd19da8b6b3f7735fd933dfea3b549c4a6d691aa0
3
+ size 437989677
qqp_#1_glue_cased/checkpoint-11371/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808218acdfcc7574a13c9698f4b4d48157b70d73351334181971c0dce7c28e0a
3
+ size 326
qqp_#1_glue_cased/checkpoint-11371/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
qqp_#1_glue_cased/checkpoint-11371/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
qqp_#1_glue_cased/checkpoint-11371/trainer_state.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8963888201830325,
3
+ "best_model_checkpoint": "qqp_#1_glue_cased/checkpoint-11371",
4
+ "epoch": 1.0,
5
+ "global_step": 11371,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04397150646381145,
12
+ "learning_rate": 1.9824113974144757e-05,
13
+ "loss": 0.43561614990234376,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.0879430129276229,
18
+ "learning_rate": 1.9648227948289512e-05,
19
+ "loss": 0.3685419006347656,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.13191451939143434,
24
+ "learning_rate": 1.9472341922434264e-05,
25
+ "loss": 0.3377880249023438,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.1758860258552458,
30
+ "learning_rate": 1.929645589657902e-05,
31
+ "loss": 0.327943115234375,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.21985753231905725,
36
+ "learning_rate": 1.9120569870723774e-05,
37
+ "loss": 0.3118376159667969,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.2638290387828687,
42
+ "learning_rate": 1.894468384486853e-05,
43
+ "loss": 0.3047716064453125,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.30780054524668016,
48
+ "learning_rate": 1.876879781901328e-05,
49
+ "loss": 0.3038143005371094,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.3517720517104916,
54
+ "learning_rate": 1.8592911793158036e-05,
55
+ "loss": 0.2949424438476563,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.39574355817430307,
60
+ "learning_rate": 1.8417025767302788e-05,
61
+ "loss": 0.2912021789550781,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4397150646381145,
66
+ "learning_rate": 1.8241139741447543e-05,
67
+ "loss": 0.2867056884765625,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.483686571101926,
72
+ "learning_rate": 1.80652537155923e-05,
73
+ "loss": 0.27814862060546874,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.5276580775657373,
78
+ "learning_rate": 1.788936768973705e-05,
79
+ "loss": 0.2777033996582031,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.5716295840295489,
84
+ "learning_rate": 1.7713481663881805e-05,
85
+ "loss": 0.2808096618652344,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.6156010904933603,
90
+ "learning_rate": 1.753759563802656e-05,
91
+ "loss": 0.2740480041503906,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.6595725969571717,
96
+ "learning_rate": 1.7361709612171312e-05,
97
+ "loss": 0.2704828796386719,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.7035441034209832,
102
+ "learning_rate": 1.7185823586316067e-05,
103
+ "loss": 0.26128680419921874,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.7475156098847947,
108
+ "learning_rate": 1.7009937560460823e-05,
109
+ "loss": 0.26227166748046876,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.7914871163486061,
114
+ "learning_rate": 1.6834051534605578e-05,
115
+ "loss": 0.257682861328125,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.8354586228124176,
120
+ "learning_rate": 1.665816550875033e-05,
121
+ "loss": 0.25748046875,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.879430129276229,
126
+ "learning_rate": 1.6482279482895085e-05,
127
+ "loss": 0.26029693603515625,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.9234016357400404,
132
+ "learning_rate": 1.630639345703984e-05,
133
+ "loss": 0.25330767822265626,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.967373142203852,
138
+ "learning_rate": 1.6130507431184595e-05,
139
+ "loss": 0.24725408935546875,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_accuracy": 0.8963888201830325,
145
+ "eval_f1": 0.8609460580912863,
146
+ "eval_loss": 0.2415674775838852,
147
+ "step": 11371
148
+ }
149
+ ],
150
+ "max_steps": 56855,
151
+ "num_train_epochs": 5,
152
+ "total_flos": 16411968232354488,
153
+ "trial_name": null,
154
+ "trial_params": null
155
+ }
qqp_#1_glue_cased/checkpoint-11371/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd0df48bb459380a4288a73c472c5ce3866bb691c774490d2ba99b80b24e42f
3
+ size 1639
qqp_#1_glue_cased/checkpoint-11371/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
qqp_#1_glue_cased/checkpoint-22742/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "type_vocab_size": 2,
21
+ "vocab_size": 30522
22
+ }
qqp_#1_glue_cased/checkpoint-22742/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb49ada201914f3fcd7f726062bbc4aea16628890b77ac29020bf889386fda1
3
+ size 875930250
qqp_#1_glue_cased/checkpoint-22742/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770cc0990e56c1ea2ab3d79909fa2ec8d44a00415cfe3f79bbf204b16cfcc7d0
3
+ size 437989677
qqp_#1_glue_cased/checkpoint-22742/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ae497f71c5216722a424f4365b28e4dc6d2eb0ab71d80e7664b3278d89fd67
3
+ size 326
qqp_#1_glue_cased/checkpoint-22742/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
qqp_#1_glue_cased/checkpoint-22742/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
qqp_#1_glue_cased/checkpoint-22742/trainer_state.json ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9078407123423201,
3
+ "best_model_checkpoint": "qqp_#1_glue_cased/checkpoint-22742",
4
+ "epoch": 2.0,
5
+ "global_step": 22742,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04397150646381145,
12
+ "learning_rate": 1.9824113974144757e-05,
13
+ "loss": 0.43561614990234376,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.0879430129276229,
18
+ "learning_rate": 1.9648227948289512e-05,
19
+ "loss": 0.3685419006347656,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.13191451939143434,
24
+ "learning_rate": 1.9472341922434264e-05,
25
+ "loss": 0.3377880249023438,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.1758860258552458,
30
+ "learning_rate": 1.929645589657902e-05,
31
+ "loss": 0.327943115234375,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.21985753231905725,
36
+ "learning_rate": 1.9120569870723774e-05,
37
+ "loss": 0.3118376159667969,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.2638290387828687,
42
+ "learning_rate": 1.894468384486853e-05,
43
+ "loss": 0.3047716064453125,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.30780054524668016,
48
+ "learning_rate": 1.876879781901328e-05,
49
+ "loss": 0.3038143005371094,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.3517720517104916,
54
+ "learning_rate": 1.8592911793158036e-05,
55
+ "loss": 0.2949424438476563,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.39574355817430307,
60
+ "learning_rate": 1.8417025767302788e-05,
61
+ "loss": 0.2912021789550781,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4397150646381145,
66
+ "learning_rate": 1.8241139741447543e-05,
67
+ "loss": 0.2867056884765625,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.483686571101926,
72
+ "learning_rate": 1.80652537155923e-05,
73
+ "loss": 0.27814862060546874,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.5276580775657373,
78
+ "learning_rate": 1.788936768973705e-05,
79
+ "loss": 0.2777033996582031,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.5716295840295489,
84
+ "learning_rate": 1.7713481663881805e-05,
85
+ "loss": 0.2808096618652344,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.6156010904933603,
90
+ "learning_rate": 1.753759563802656e-05,
91
+ "loss": 0.2740480041503906,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.6595725969571717,
96
+ "learning_rate": 1.7361709612171312e-05,
97
+ "loss": 0.2704828796386719,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.7035441034209832,
102
+ "learning_rate": 1.7185823586316067e-05,
103
+ "loss": 0.26128680419921874,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.7475156098847947,
108
+ "learning_rate": 1.7009937560460823e-05,
109
+ "loss": 0.26227166748046876,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.7914871163486061,
114
+ "learning_rate": 1.6834051534605578e-05,
115
+ "loss": 0.257682861328125,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.8354586228124176,
120
+ "learning_rate": 1.665816550875033e-05,
121
+ "loss": 0.25748046875,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.879430129276229,
126
+ "learning_rate": 1.6482279482895085e-05,
127
+ "loss": 0.26029693603515625,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.9234016357400404,
132
+ "learning_rate": 1.630639345703984e-05,
133
+ "loss": 0.25330767822265626,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.967373142203852,
138
+ "learning_rate": 1.6130507431184595e-05,
139
+ "loss": 0.24725408935546875,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_accuracy": 0.8963888201830325,
145
+ "eval_f1": 0.8609460580912863,
146
+ "eval_loss": 0.2415674775838852,
147
+ "step": 11371
148
+ },
149
+ {
150
+ "epoch": 1.0113446486676634,
151
+ "learning_rate": 1.5954621405329347e-05,
152
+ "loss": 0.22799398803710938,
153
+ "step": 11500
154
+ },
155
+ {
156
+ "epoch": 1.0553161551314747,
157
+ "learning_rate": 1.5778735379474102e-05,
158
+ "loss": 0.1779903106689453,
159
+ "step": 12000
160
+ },
161
+ {
162
+ "epoch": 1.0992876615952862,
163
+ "learning_rate": 1.5602849353618857e-05,
164
+ "loss": 0.18363124084472657,
165
+ "step": 12500
166
+ },
167
+ {
168
+ "epoch": 1.1432591680590978,
169
+ "learning_rate": 1.5426963327763612e-05,
170
+ "loss": 0.183293212890625,
171
+ "step": 13000
172
+ },
173
+ {
174
+ "epoch": 1.187230674522909,
175
+ "learning_rate": 1.5251077301908364e-05,
176
+ "loss": 0.1881647186279297,
177
+ "step": 13500
178
+ },
179
+ {
180
+ "epoch": 1.2312021809867206,
181
+ "learning_rate": 1.5075191276053118e-05,
182
+ "loss": 0.193143798828125,
183
+ "step": 14000
184
+ },
185
+ {
186
+ "epoch": 1.2751736874505322,
187
+ "learning_rate": 1.4899305250197873e-05,
188
+ "loss": 0.186142333984375,
189
+ "step": 14500
190
+ },
191
+ {
192
+ "epoch": 1.3191451939143435,
193
+ "learning_rate": 1.4723419224342628e-05,
194
+ "loss": 0.18537522888183594,
195
+ "step": 15000
196
+ },
197
+ {
198
+ "epoch": 1.363116700378155,
199
+ "learning_rate": 1.4547533198487383e-05,
200
+ "loss": 0.18841236877441406,
201
+ "step": 15500
202
+ },
203
+ {
204
+ "epoch": 1.4070882068419663,
205
+ "learning_rate": 1.4371647172632135e-05,
206
+ "loss": 0.18419113159179687,
207
+ "step": 16000
208
+ },
209
+ {
210
+ "epoch": 1.4510597133057779,
211
+ "learning_rate": 1.419576114677689e-05,
212
+ "loss": 0.18361325073242188,
213
+ "step": 16500
214
+ },
215
+ {
216
+ "epoch": 1.4950312197695892,
217
+ "learning_rate": 1.4019875120921644e-05,
218
+ "loss": 0.18803219604492188,
219
+ "step": 17000
220
+ },
221
+ {
222
+ "epoch": 1.5390027262334007,
223
+ "learning_rate": 1.3843989095066399e-05,
224
+ "loss": 0.1816302032470703,
225
+ "step": 17500
226
+ },
227
+ {
228
+ "epoch": 1.5829742326972123,
229
+ "learning_rate": 1.3668103069211152e-05,
230
+ "loss": 0.1842498016357422,
231
+ "step": 18000
232
+ },
233
+ {
234
+ "epoch": 1.6269457391610236,
235
+ "learning_rate": 1.3492217043355906e-05,
236
+ "loss": 0.17726910400390625,
237
+ "step": 18500
238
+ },
239
+ {
240
+ "epoch": 1.6709172456248351,
241
+ "learning_rate": 1.3316331017500661e-05,
242
+ "loss": 0.17991775512695313,
243
+ "step": 19000
244
+ },
245
+ {
246
+ "epoch": 1.7148887520886467,
247
+ "learning_rate": 1.3140444991645416e-05,
248
+ "loss": 0.1825124053955078,
249
+ "step": 19500
250
+ },
251
+ {
252
+ "epoch": 1.758860258552458,
253
+ "learning_rate": 1.2964558965790168e-05,
254
+ "loss": 0.17683036804199218,
255
+ "step": 20000
256
+ },
257
+ {
258
+ "epoch": 1.8028317650162693,
259
+ "learning_rate": 1.2788672939934923e-05,
260
+ "loss": 0.1868966827392578,
261
+ "step": 20500
262
+ },
263
+ {
264
+ "epoch": 1.846803271480081,
265
+ "learning_rate": 1.2612786914079678e-05,
266
+ "loss": 0.18423307800292968,
267
+ "step": 21000
268
+ },
269
+ {
270
+ "epoch": 1.8907747779438924,
271
+ "learning_rate": 1.2436900888224432e-05,
272
+ "loss": 0.17814576721191405,
273
+ "step": 21500
274
+ },
275
+ {
276
+ "epoch": 1.9347462844077037,
277
+ "learning_rate": 1.2261014862369185e-05,
278
+ "loss": 0.17719683837890626,
279
+ "step": 22000
280
+ },
281
+ {
282
+ "epoch": 1.9787177908715152,
283
+ "learning_rate": 1.208512883651394e-05,
284
+ "loss": 0.17974095153808595,
285
+ "step": 22500
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "eval_accuracy": 0.9078407123423201,
290
+ "eval_f1": 0.8756839717069265,
291
+ "eval_loss": 0.23735392093658447,
292
+ "step": 22742
293
+ }
294
+ ],
295
+ "max_steps": 56855,
296
+ "num_train_epochs": 5,
297
+ "total_flos": 32861393054838336,
298
+ "trial_name": null,
299
+ "trial_params": null
300
+ }
qqp_#1_glue_cased/checkpoint-22742/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd0df48bb459380a4288a73c472c5ce3866bb691c774490d2ba99b80b24e42f
3
+ size 1639
qqp_#1_glue_cased/checkpoint-22742/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
qqp_#1_glue_cased/checkpoint-34113/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "type_vocab_size": 2,
21
+ "vocab_size": 30522
22
+ }
qqp_#1_glue_cased/checkpoint-34113/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f55036300da096598b64c7617dcce5347f9877f7af17bbadac25f2c79412c7
3
+ size 875930250
qqp_#1_glue_cased/checkpoint-34113/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31408c930fccb7b742183a35da30f2c3bf3a791b51f9dfcbdf5abd5e99aea5d7
3
+ size 437989677
qqp_#1_glue_cased/checkpoint-34113/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f3d5bfd6111af5ea2211f616e41b17f55c394c5f52f1247836190393cb533c
3
+ size 326
qqp_#1_glue_cased/checkpoint-34113/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
qqp_#1_glue_cased/checkpoint-34113/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
qqp_#1_glue_cased/checkpoint-34113/trainer_state.json ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9108088053425674,
3
+ "best_model_checkpoint": "qqp_#1_glue_cased/checkpoint-34113",
4
+ "epoch": 3.0,
5
+ "global_step": 34113,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04397150646381145,
12
+ "learning_rate": 1.9824113974144757e-05,
13
+ "loss": 0.43561614990234376,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.0879430129276229,
18
+ "learning_rate": 1.9648227948289512e-05,
19
+ "loss": 0.3685419006347656,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.13191451939143434,
24
+ "learning_rate": 1.9472341922434264e-05,
25
+ "loss": 0.3377880249023438,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.1758860258552458,
30
+ "learning_rate": 1.929645589657902e-05,
31
+ "loss": 0.327943115234375,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.21985753231905725,
36
+ "learning_rate": 1.9120569870723774e-05,
37
+ "loss": 0.3118376159667969,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.2638290387828687,
42
+ "learning_rate": 1.894468384486853e-05,
43
+ "loss": 0.3047716064453125,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.30780054524668016,
48
+ "learning_rate": 1.876879781901328e-05,
49
+ "loss": 0.3038143005371094,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.3517720517104916,
54
+ "learning_rate": 1.8592911793158036e-05,
55
+ "loss": 0.2949424438476563,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.39574355817430307,
60
+ "learning_rate": 1.8417025767302788e-05,
61
+ "loss": 0.2912021789550781,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4397150646381145,
66
+ "learning_rate": 1.8241139741447543e-05,
67
+ "loss": 0.2867056884765625,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.483686571101926,
72
+ "learning_rate": 1.80652537155923e-05,
73
+ "loss": 0.27814862060546874,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.5276580775657373,
78
+ "learning_rate": 1.788936768973705e-05,
79
+ "loss": 0.2777033996582031,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.5716295840295489,
84
+ "learning_rate": 1.7713481663881805e-05,
85
+ "loss": 0.2808096618652344,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.6156010904933603,
90
+ "learning_rate": 1.753759563802656e-05,
91
+ "loss": 0.2740480041503906,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.6595725969571717,
96
+ "learning_rate": 1.7361709612171312e-05,
97
+ "loss": 0.2704828796386719,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.7035441034209832,
102
+ "learning_rate": 1.7185823586316067e-05,
103
+ "loss": 0.26128680419921874,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.7475156098847947,
108
+ "learning_rate": 1.7009937560460823e-05,
109
+ "loss": 0.26227166748046876,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.7914871163486061,
114
+ "learning_rate": 1.6834051534605578e-05,
115
+ "loss": 0.257682861328125,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.8354586228124176,
120
+ "learning_rate": 1.665816550875033e-05,
121
+ "loss": 0.25748046875,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.879430129276229,
126
+ "learning_rate": 1.6482279482895085e-05,
127
+ "loss": 0.26029693603515625,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.9234016357400404,
132
+ "learning_rate": 1.630639345703984e-05,
133
+ "loss": 0.25330767822265626,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.967373142203852,
138
+ "learning_rate": 1.6130507431184595e-05,
139
+ "loss": 0.24725408935546875,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_accuracy": 0.8963888201830325,
145
+ "eval_f1": 0.8609460580912863,
146
+ "eval_loss": 0.2415674775838852,
147
+ "step": 11371
148
+ },
149
+ {
150
+ "epoch": 1.0113446486676634,
151
+ "learning_rate": 1.5954621405329347e-05,
152
+ "loss": 0.22799398803710938,
153
+ "step": 11500
154
+ },
155
+ {
156
+ "epoch": 1.0553161551314747,
157
+ "learning_rate": 1.5778735379474102e-05,
158
+ "loss": 0.1779903106689453,
159
+ "step": 12000
160
+ },
161
+ {
162
+ "epoch": 1.0992876615952862,
163
+ "learning_rate": 1.5602849353618857e-05,
164
+ "loss": 0.18363124084472657,
165
+ "step": 12500
166
+ },
167
+ {
168
+ "epoch": 1.1432591680590978,
169
+ "learning_rate": 1.5426963327763612e-05,
170
+ "loss": 0.183293212890625,
171
+ "step": 13000
172
+ },
173
+ {
174
+ "epoch": 1.187230674522909,
175
+ "learning_rate": 1.5251077301908364e-05,
176
+ "loss": 0.1881647186279297,
177
+ "step": 13500
178
+ },
179
+ {
180
+ "epoch": 1.2312021809867206,
181
+ "learning_rate": 1.5075191276053118e-05,
182
+ "loss": 0.193143798828125,
183
+ "step": 14000
184
+ },
185
+ {
186
+ "epoch": 1.2751736874505322,
187
+ "learning_rate": 1.4899305250197873e-05,
188
+ "loss": 0.186142333984375,
189
+ "step": 14500
190
+ },
191
+ {
192
+ "epoch": 1.3191451939143435,
193
+ "learning_rate": 1.4723419224342628e-05,
194
+ "loss": 0.18537522888183594,
195
+ "step": 15000
196
+ },
197
+ {
198
+ "epoch": 1.363116700378155,
199
+ "learning_rate": 1.4547533198487383e-05,
200
+ "loss": 0.18841236877441406,
201
+ "step": 15500
202
+ },
203
+ {
204
+ "epoch": 1.4070882068419663,
205
+ "learning_rate": 1.4371647172632135e-05,
206
+ "loss": 0.18419113159179687,
207
+ "step": 16000
208
+ },
209
+ {
210
+ "epoch": 1.4510597133057779,
211
+ "learning_rate": 1.419576114677689e-05,
212
+ "loss": 0.18361325073242188,
213
+ "step": 16500
214
+ },
215
+ {
216
+ "epoch": 1.4950312197695892,
217
+ "learning_rate": 1.4019875120921644e-05,
218
+ "loss": 0.18803219604492188,
219
+ "step": 17000
220
+ },
221
+ {
222
+ "epoch": 1.5390027262334007,
223
+ "learning_rate": 1.3843989095066399e-05,
224
+ "loss": 0.1816302032470703,
225
+ "step": 17500
226
+ },
227
+ {
228
+ "epoch": 1.5829742326972123,
229
+ "learning_rate": 1.3668103069211152e-05,
230
+ "loss": 0.1842498016357422,
231
+ "step": 18000
232
+ },
233
+ {
234
+ "epoch": 1.6269457391610236,
235
+ "learning_rate": 1.3492217043355906e-05,
236
+ "loss": 0.17726910400390625,
237
+ "step": 18500
238
+ },
239
+ {
240
+ "epoch": 1.6709172456248351,
241
+ "learning_rate": 1.3316331017500661e-05,
242
+ "loss": 0.17991775512695313,
243
+ "step": 19000
244
+ },
245
+ {
246
+ "epoch": 1.7148887520886467,
247
+ "learning_rate": 1.3140444991645416e-05,
248
+ "loss": 0.1825124053955078,
249
+ "step": 19500
250
+ },
251
+ {
252
+ "epoch": 1.758860258552458,
253
+ "learning_rate": 1.2964558965790168e-05,
254
+ "loss": 0.17683036804199218,
255
+ "step": 20000
256
+ },
257
+ {
258
+ "epoch": 1.8028317650162693,
259
+ "learning_rate": 1.2788672939934923e-05,
260
+ "loss": 0.1868966827392578,
261
+ "step": 20500
262
+ },
263
+ {
264
+ "epoch": 1.846803271480081,
265
+ "learning_rate": 1.2612786914079678e-05,
266
+ "loss": 0.18423307800292968,
267
+ "step": 21000
268
+ },
269
+ {
270
+ "epoch": 1.8907747779438924,
271
+ "learning_rate": 1.2436900888224432e-05,
272
+ "loss": 0.17814576721191405,
273
+ "step": 21500
274
+ },
275
+ {
276
+ "epoch": 1.9347462844077037,
277
+ "learning_rate": 1.2261014862369185e-05,
278
+ "loss": 0.17719683837890626,
279
+ "step": 22000
280
+ },
281
+ {
282
+ "epoch": 1.9787177908715152,
283
+ "learning_rate": 1.208512883651394e-05,
284
+ "loss": 0.17974095153808595,
285
+ "step": 22500
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "eval_accuracy": 0.9078407123423201,
290
+ "eval_f1": 0.8756839717069265,
291
+ "eval_loss": 0.23735392093658447,
292
+ "step": 22742
293
+ },
294
+ {
295
+ "epoch": 2.0226892973353268,
296
+ "learning_rate": 1.1909242810658694e-05,
297
+ "loss": 0.14513198852539064,
298
+ "step": 23000
299
+ },
300
+ {
301
+ "epoch": 2.066660803799138,
302
+ "learning_rate": 1.1733356784803449e-05,
303
+ "loss": 0.11556282806396484,
304
+ "step": 23500
305
+ },
306
+ {
307
+ "epoch": 2.1106323102629494,
308
+ "learning_rate": 1.15574707589482e-05,
309
+ "loss": 0.12064967346191406,
310
+ "step": 24000
311
+ },
312
+ {
313
+ "epoch": 2.154603816726761,
314
+ "learning_rate": 1.1381584733092956e-05,
315
+ "loss": 0.11743055725097656,
316
+ "step": 24500
317
+ },
318
+ {
319
+ "epoch": 2.1985753231905725,
320
+ "learning_rate": 1.1205698707237711e-05,
321
+ "loss": 0.11710655212402343,
322
+ "step": 25000
323
+ },
324
+ {
325
+ "epoch": 2.242546829654384,
326
+ "learning_rate": 1.1029812681382466e-05,
327
+ "loss": 0.1169861831665039,
328
+ "step": 25500
329
+ },
330
+ {
331
+ "epoch": 2.2865183361181955,
332
+ "learning_rate": 1.0853926655527218e-05,
333
+ "loss": 0.12172984313964844,
334
+ "step": 26000
335
+ },
336
+ {
337
+ "epoch": 2.330489842582007,
338
+ "learning_rate": 1.0678040629671973e-05,
339
+ "loss": 0.11213615417480469,
340
+ "step": 26500
341
+ },
342
+ {
343
+ "epoch": 2.374461349045818,
344
+ "learning_rate": 1.0502154603816728e-05,
345
+ "loss": 0.12687485504150392,
346
+ "step": 27000
347
+ },
348
+ {
349
+ "epoch": 2.41843285550963,
350
+ "learning_rate": 1.0326268577961482e-05,
351
+ "loss": 0.12601725006103515,
352
+ "step": 27500
353
+ },
354
+ {
355
+ "epoch": 2.4624043619734413,
356
+ "learning_rate": 1.0150382552106235e-05,
357
+ "loss": 0.11948597717285156,
358
+ "step": 28000
359
+ },
360
+ {
361
+ "epoch": 2.5063758684372526,
362
+ "learning_rate": 9.974496526250989e-06,
363
+ "loss": 0.12668427276611327,
364
+ "step": 28500
365
+ },
366
+ {
367
+ "epoch": 2.5503473749010643,
368
+ "learning_rate": 9.798610500395744e-06,
369
+ "loss": 0.12504535675048828,
370
+ "step": 29000
371
+ },
372
+ {
373
+ "epoch": 2.5943188813648757,
374
+ "learning_rate": 9.6227244745405e-06,
375
+ "loss": 0.12663780975341796,
376
+ "step": 29500
377
+ },
378
+ {
379
+ "epoch": 2.638290387828687,
380
+ "learning_rate": 9.446838448685253e-06,
381
+ "loss": 0.122175048828125,
382
+ "step": 30000
383
+ },
384
+ {
385
+ "epoch": 2.6822618942924983,
386
+ "learning_rate": 9.270952422830008e-06,
387
+ "loss": 0.12571288299560546,
388
+ "step": 30500
389
+ },
390
+ {
391
+ "epoch": 2.72623340075631,
392
+ "learning_rate": 9.095066396974761e-06,
393
+ "loss": 0.12774851989746094,
394
+ "step": 31000
395
+ },
396
+ {
397
+ "epoch": 2.7702049072201214,
398
+ "learning_rate": 8.919180371119516e-06,
399
+ "loss": 0.12356333923339843,
400
+ "step": 31500
401
+ },
402
+ {
403
+ "epoch": 2.8141764136839327,
404
+ "learning_rate": 8.74329434526427e-06,
405
+ "loss": 0.1209749755859375,
406
+ "step": 32000
407
+ },
408
+ {
409
+ "epoch": 2.8581479201477444,
410
+ "learning_rate": 8.567408319409023e-06,
411
+ "loss": 0.12022649383544921,
412
+ "step": 32500
413
+ },
414
+ {
415
+ "epoch": 2.9021194266115558,
416
+ "learning_rate": 8.391522293553777e-06,
417
+ "loss": 0.12490190887451172,
418
+ "step": 33000
419
+ },
420
+ {
421
+ "epoch": 2.946090933075367,
422
+ "learning_rate": 8.215636267698532e-06,
423
+ "loss": 0.12208123779296876,
424
+ "step": 33500
425
+ },
426
+ {
427
+ "epoch": 2.9900624395391784,
428
+ "learning_rate": 8.039750241843286e-06,
429
+ "loss": 0.12288145446777343,
430
+ "step": 34000
431
+ },
432
+ {
433
+ "epoch": 3.0,
434
+ "eval_accuracy": 0.9108088053425674,
435
+ "eval_f1": 0.8790176474535328,
436
+ "eval_loss": 0.2841033935546875,
437
+ "step": 34113
438
+ }
439
+ ],
440
+ "max_steps": 56855,
441
+ "num_train_epochs": 5,
442
+ "total_flos": 49275990211670160,
443
+ "trial_name": null,
444
+ "trial_params": null
445
+ }
qqp_#1_glue_cased/checkpoint-34113/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd0df48bb459380a4288a73c472c5ce3866bb691c774490d2ba99b80b24e42f
3
+ size 1639
qqp_#1_glue_cased/checkpoint-34113/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
qqp_#1_glue_cased/checkpoint-45484/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "type_vocab_size": 2,
21
+ "vocab_size": 30522
22
+ }
qqp_#1_glue_cased/checkpoint-45484/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c16ff82fc1e419541505f0061fcadcb24d43f085384d2fc14d45a6843316d2
3
+ size 875930250
qqp_#1_glue_cased/checkpoint-45484/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66896485ebe82edffc47f076012b4826f542d2fac998316b87ce36297cec5f1
3
+ size 437989677
qqp_#1_glue_cased/checkpoint-45484/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7b51ff95a275c6325ec33069a85e549cd6f085ba4ad2f878e6a917eac2cf7f
3
+ size 326
qqp_#1_glue_cased/checkpoint-45484/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
qqp_#1_glue_cased/checkpoint-45484/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
qqp_#1_glue_cased/checkpoint-45484/trainer_state.json ADDED
@@ -0,0 +1,584 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9115508285926293,
3
+ "best_model_checkpoint": "qqp_#1_glue_cased/checkpoint-45484",
4
+ "epoch": 4.0,
5
+ "global_step": 45484,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04397150646381145,
12
+ "learning_rate": 1.9824113974144757e-05,
13
+ "loss": 0.43561614990234376,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.0879430129276229,
18
+ "learning_rate": 1.9648227948289512e-05,
19
+ "loss": 0.3685419006347656,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.13191451939143434,
24
+ "learning_rate": 1.9472341922434264e-05,
25
+ "loss": 0.3377880249023438,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.1758860258552458,
30
+ "learning_rate": 1.929645589657902e-05,
31
+ "loss": 0.327943115234375,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.21985753231905725,
36
+ "learning_rate": 1.9120569870723774e-05,
37
+ "loss": 0.3118376159667969,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.2638290387828687,
42
+ "learning_rate": 1.894468384486853e-05,
43
+ "loss": 0.3047716064453125,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.30780054524668016,
48
+ "learning_rate": 1.876879781901328e-05,
49
+ "loss": 0.3038143005371094,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.3517720517104916,
54
+ "learning_rate": 1.8592911793158036e-05,
55
+ "loss": 0.2949424438476563,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.39574355817430307,
60
+ "learning_rate": 1.8417025767302788e-05,
61
+ "loss": 0.2912021789550781,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4397150646381145,
66
+ "learning_rate": 1.8241139741447543e-05,
67
+ "loss": 0.2867056884765625,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.483686571101926,
72
+ "learning_rate": 1.80652537155923e-05,
73
+ "loss": 0.27814862060546874,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.5276580775657373,
78
+ "learning_rate": 1.788936768973705e-05,
79
+ "loss": 0.2777033996582031,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.5716295840295489,
84
+ "learning_rate": 1.7713481663881805e-05,
85
+ "loss": 0.2808096618652344,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.6156010904933603,
90
+ "learning_rate": 1.753759563802656e-05,
91
+ "loss": 0.2740480041503906,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.6595725969571717,
96
+ "learning_rate": 1.7361709612171312e-05,
97
+ "loss": 0.2704828796386719,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.7035441034209832,
102
+ "learning_rate": 1.7185823586316067e-05,
103
+ "loss": 0.26128680419921874,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.7475156098847947,
108
+ "learning_rate": 1.7009937560460823e-05,
109
+ "loss": 0.26227166748046876,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.7914871163486061,
114
+ "learning_rate": 1.6834051534605578e-05,
115
+ "loss": 0.257682861328125,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.8354586228124176,
120
+ "learning_rate": 1.665816550875033e-05,
121
+ "loss": 0.25748046875,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.879430129276229,
126
+ "learning_rate": 1.6482279482895085e-05,
127
+ "loss": 0.26029693603515625,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.9234016357400404,
132
+ "learning_rate": 1.630639345703984e-05,
133
+ "loss": 0.25330767822265626,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.967373142203852,
138
+ "learning_rate": 1.6130507431184595e-05,
139
+ "loss": 0.24725408935546875,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_accuracy": 0.8963888201830325,
145
+ "eval_f1": 0.8609460580912863,
146
+ "eval_loss": 0.2415674775838852,
147
+ "step": 11371
148
+ },
149
+ {
150
+ "epoch": 1.0113446486676634,
151
+ "learning_rate": 1.5954621405329347e-05,
152
+ "loss": 0.22799398803710938,
153
+ "step": 11500
154
+ },
155
+ {
156
+ "epoch": 1.0553161551314747,
157
+ "learning_rate": 1.5778735379474102e-05,
158
+ "loss": 0.1779903106689453,
159
+ "step": 12000
160
+ },
161
+ {
162
+ "epoch": 1.0992876615952862,
163
+ "learning_rate": 1.5602849353618857e-05,
164
+ "loss": 0.18363124084472657,
165
+ "step": 12500
166
+ },
167
+ {
168
+ "epoch": 1.1432591680590978,
169
+ "learning_rate": 1.5426963327763612e-05,
170
+ "loss": 0.183293212890625,
171
+ "step": 13000
172
+ },
173
+ {
174
+ "epoch": 1.187230674522909,
175
+ "learning_rate": 1.5251077301908364e-05,
176
+ "loss": 0.1881647186279297,
177
+ "step": 13500
178
+ },
179
+ {
180
+ "epoch": 1.2312021809867206,
181
+ "learning_rate": 1.5075191276053118e-05,
182
+ "loss": 0.193143798828125,
183
+ "step": 14000
184
+ },
185
+ {
186
+ "epoch": 1.2751736874505322,
187
+ "learning_rate": 1.4899305250197873e-05,
188
+ "loss": 0.186142333984375,
189
+ "step": 14500
190
+ },
191
+ {
192
+ "epoch": 1.3191451939143435,
193
+ "learning_rate": 1.4723419224342628e-05,
194
+ "loss": 0.18537522888183594,
195
+ "step": 15000
196
+ },
197
+ {
198
+ "epoch": 1.363116700378155,
199
+ "learning_rate": 1.4547533198487383e-05,
200
+ "loss": 0.18841236877441406,
201
+ "step": 15500
202
+ },
203
+ {
204
+ "epoch": 1.4070882068419663,
205
+ "learning_rate": 1.4371647172632135e-05,
206
+ "loss": 0.18419113159179687,
207
+ "step": 16000
208
+ },
209
+ {
210
+ "epoch": 1.4510597133057779,
211
+ "learning_rate": 1.419576114677689e-05,
212
+ "loss": 0.18361325073242188,
213
+ "step": 16500
214
+ },
215
+ {
216
+ "epoch": 1.4950312197695892,
217
+ "learning_rate": 1.4019875120921644e-05,
218
+ "loss": 0.18803219604492188,
219
+ "step": 17000
220
+ },
221
+ {
222
+ "epoch": 1.5390027262334007,
223
+ "learning_rate": 1.3843989095066399e-05,
224
+ "loss": 0.1816302032470703,
225
+ "step": 17500
226
+ },
227
+ {
228
+ "epoch": 1.5829742326972123,
229
+ "learning_rate": 1.3668103069211152e-05,
230
+ "loss": 0.1842498016357422,
231
+ "step": 18000
232
+ },
233
+ {
234
+ "epoch": 1.6269457391610236,
235
+ "learning_rate": 1.3492217043355906e-05,
236
+ "loss": 0.17726910400390625,
237
+ "step": 18500
238
+ },
239
+ {
240
+ "epoch": 1.6709172456248351,
241
+ "learning_rate": 1.3316331017500661e-05,
242
+ "loss": 0.17991775512695313,
243
+ "step": 19000
244
+ },
245
+ {
246
+ "epoch": 1.7148887520886467,
247
+ "learning_rate": 1.3140444991645416e-05,
248
+ "loss": 0.1825124053955078,
249
+ "step": 19500
250
+ },
251
+ {
252
+ "epoch": 1.758860258552458,
253
+ "learning_rate": 1.2964558965790168e-05,
254
+ "loss": 0.17683036804199218,
255
+ "step": 20000
256
+ },
257
+ {
258
+ "epoch": 1.8028317650162693,
259
+ "learning_rate": 1.2788672939934923e-05,
260
+ "loss": 0.1868966827392578,
261
+ "step": 20500
262
+ },
263
+ {
264
+ "epoch": 1.846803271480081,
265
+ "learning_rate": 1.2612786914079678e-05,
266
+ "loss": 0.18423307800292968,
267
+ "step": 21000
268
+ },
269
+ {
270
+ "epoch": 1.8907747779438924,
271
+ "learning_rate": 1.2436900888224432e-05,
272
+ "loss": 0.17814576721191405,
273
+ "step": 21500
274
+ },
275
+ {
276
+ "epoch": 1.9347462844077037,
277
+ "learning_rate": 1.2261014862369185e-05,
278
+ "loss": 0.17719683837890626,
279
+ "step": 22000
280
+ },
281
+ {
282
+ "epoch": 1.9787177908715152,
283
+ "learning_rate": 1.208512883651394e-05,
284
+ "loss": 0.17974095153808595,
285
+ "step": 22500
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "eval_accuracy": 0.9078407123423201,
290
+ "eval_f1": 0.8756839717069265,
291
+ "eval_loss": 0.23735392093658447,
292
+ "step": 22742
293
+ },
294
+ {
295
+ "epoch": 2.0226892973353268,
296
+ "learning_rate": 1.1909242810658694e-05,
297
+ "loss": 0.14513198852539064,
298
+ "step": 23000
299
+ },
300
+ {
301
+ "epoch": 2.066660803799138,
302
+ "learning_rate": 1.1733356784803449e-05,
303
+ "loss": 0.11556282806396484,
304
+ "step": 23500
305
+ },
306
+ {
307
+ "epoch": 2.1106323102629494,
308
+ "learning_rate": 1.15574707589482e-05,
309
+ "loss": 0.12064967346191406,
310
+ "step": 24000
311
+ },
312
+ {
313
+ "epoch": 2.154603816726761,
314
+ "learning_rate": 1.1381584733092956e-05,
315
+ "loss": 0.11743055725097656,
316
+ "step": 24500
317
+ },
318
+ {
319
+ "epoch": 2.1985753231905725,
320
+ "learning_rate": 1.1205698707237711e-05,
321
+ "loss": 0.11710655212402343,
322
+ "step": 25000
323
+ },
324
+ {
325
+ "epoch": 2.242546829654384,
326
+ "learning_rate": 1.1029812681382466e-05,
327
+ "loss": 0.1169861831665039,
328
+ "step": 25500
329
+ },
330
+ {
331
+ "epoch": 2.2865183361181955,
332
+ "learning_rate": 1.0853926655527218e-05,
333
+ "loss": 0.12172984313964844,
334
+ "step": 26000
335
+ },
336
+ {
337
+ "epoch": 2.330489842582007,
338
+ "learning_rate": 1.0678040629671973e-05,
339
+ "loss": 0.11213615417480469,
340
+ "step": 26500
341
+ },
342
+ {
343
+ "epoch": 2.374461349045818,
344
+ "learning_rate": 1.0502154603816728e-05,
345
+ "loss": 0.12687485504150392,
346
+ "step": 27000
347
+ },
348
+ {
349
+ "epoch": 2.41843285550963,
350
+ "learning_rate": 1.0326268577961482e-05,
351
+ "loss": 0.12601725006103515,
352
+ "step": 27500
353
+ },
354
+ {
355
+ "epoch": 2.4624043619734413,
356
+ "learning_rate": 1.0150382552106235e-05,
357
+ "loss": 0.11948597717285156,
358
+ "step": 28000
359
+ },
360
+ {
361
+ "epoch": 2.5063758684372526,
362
+ "learning_rate": 9.974496526250989e-06,
363
+ "loss": 0.12668427276611327,
364
+ "step": 28500
365
+ },
366
+ {
367
+ "epoch": 2.5503473749010643,
368
+ "learning_rate": 9.798610500395744e-06,
369
+ "loss": 0.12504535675048828,
370
+ "step": 29000
371
+ },
372
+ {
373
+ "epoch": 2.5943188813648757,
374
+ "learning_rate": 9.6227244745405e-06,
375
+ "loss": 0.12663780975341796,
376
+ "step": 29500
377
+ },
378
+ {
379
+ "epoch": 2.638290387828687,
380
+ "learning_rate": 9.446838448685253e-06,
381
+ "loss": 0.122175048828125,
382
+ "step": 30000
383
+ },
384
+ {
385
+ "epoch": 2.6822618942924983,
386
+ "learning_rate": 9.270952422830008e-06,
387
+ "loss": 0.12571288299560546,
388
+ "step": 30500
389
+ },
390
+ {
391
+ "epoch": 2.72623340075631,
392
+ "learning_rate": 9.095066396974761e-06,
393
+ "loss": 0.12774851989746094,
394
+ "step": 31000
395
+ },
396
+ {
397
+ "epoch": 2.7702049072201214,
398
+ "learning_rate": 8.919180371119516e-06,
399
+ "loss": 0.12356333923339843,
400
+ "step": 31500
401
+ },
402
+ {
403
+ "epoch": 2.8141764136839327,
404
+ "learning_rate": 8.74329434526427e-06,
405
+ "loss": 0.1209749755859375,
406
+ "step": 32000
407
+ },
408
+ {
409
+ "epoch": 2.8581479201477444,
410
+ "learning_rate": 8.567408319409023e-06,
411
+ "loss": 0.12022649383544921,
412
+ "step": 32500
413
+ },
414
+ {
415
+ "epoch": 2.9021194266115558,
416
+ "learning_rate": 8.391522293553777e-06,
417
+ "loss": 0.12490190887451172,
418
+ "step": 33000
419
+ },
420
+ {
421
+ "epoch": 2.946090933075367,
422
+ "learning_rate": 8.215636267698532e-06,
423
+ "loss": 0.12208123779296876,
424
+ "step": 33500
425
+ },
426
+ {
427
+ "epoch": 2.9900624395391784,
428
+ "learning_rate": 8.039750241843286e-06,
429
+ "loss": 0.12288145446777343,
430
+ "step": 34000
431
+ },
432
+ {
433
+ "epoch": 3.0,
434
+ "eval_accuracy": 0.9108088053425674,
435
+ "eval_f1": 0.8790176474535328,
436
+ "eval_loss": 0.2841033935546875,
437
+ "step": 34113
438
+ },
439
+ {
440
+ "epoch": 3.03403394600299,
441
+ "learning_rate": 7.86386421598804e-06,
442
+ "loss": 0.08970191955566406,
443
+ "step": 34500
444
+ },
445
+ {
446
+ "epoch": 3.0780054524668015,
447
+ "learning_rate": 7.687978190132794e-06,
448
+ "loss": 0.08841686248779297,
449
+ "step": 35000
450
+ },
451
+ {
452
+ "epoch": 3.1219769589306128,
453
+ "learning_rate": 7.512092164277549e-06,
454
+ "loss": 0.07938870239257813,
455
+ "step": 35500
456
+ },
457
+ {
458
+ "epoch": 3.1659484653944245,
459
+ "learning_rate": 7.336206138422303e-06,
460
+ "loss": 0.08218861389160156,
461
+ "step": 36000
462
+ },
463
+ {
464
+ "epoch": 3.209919971858236,
465
+ "learning_rate": 7.160320112567057e-06,
466
+ "loss": 0.0876485595703125,
467
+ "step": 36500
468
+ },
469
+ {
470
+ "epoch": 3.253891478322047,
471
+ "learning_rate": 6.984434086711811e-06,
472
+ "loss": 0.08727376556396485,
473
+ "step": 37000
474
+ },
475
+ {
476
+ "epoch": 3.297862984785859,
477
+ "learning_rate": 6.808548060856566e-06,
478
+ "loss": 0.07741403961181641,
479
+ "step": 37500
480
+ },
481
+ {
482
+ "epoch": 3.3418344912496702,
483
+ "learning_rate": 6.632662035001319e-06,
484
+ "loss": 0.08279672241210938,
485
+ "step": 38000
486
+ },
487
+ {
488
+ "epoch": 3.3858059977134816,
489
+ "learning_rate": 6.456776009146074e-06,
490
+ "loss": 0.08051201629638671,
491
+ "step": 38500
492
+ },
493
+ {
494
+ "epoch": 3.4297775041772933,
495
+ "learning_rate": 6.280889983290828e-06,
496
+ "loss": 0.0875191879272461,
497
+ "step": 39000
498
+ },
499
+ {
500
+ "epoch": 3.4737490106411046,
501
+ "learning_rate": 6.105003957435582e-06,
502
+ "loss": 0.0875638885498047,
503
+ "step": 39500
504
+ },
505
+ {
506
+ "epoch": 3.517720517104916,
507
+ "learning_rate": 5.929117931580336e-06,
508
+ "loss": 0.09117823791503907,
509
+ "step": 40000
510
+ },
511
+ {
512
+ "epoch": 3.5616920235687273,
513
+ "learning_rate": 5.753231905725091e-06,
514
+ "loss": 0.08640509796142579,
515
+ "step": 40500
516
+ },
517
+ {
518
+ "epoch": 3.605663530032539,
519
+ "learning_rate": 5.5773458798698444e-06,
520
+ "loss": 0.08709436798095703,
521
+ "step": 41000
522
+ },
523
+ {
524
+ "epoch": 3.6496350364963503,
525
+ "learning_rate": 5.401459854014599e-06,
526
+ "loss": 0.09037484741210937,
527
+ "step": 41500
528
+ },
529
+ {
530
+ "epoch": 3.6936065429601617,
531
+ "learning_rate": 5.225573828159353e-06,
532
+ "loss": 0.08582682800292969,
533
+ "step": 42000
534
+ },
535
+ {
536
+ "epoch": 3.7375780494239734,
537
+ "learning_rate": 5.049687802304107e-06,
538
+ "loss": 0.07674098968505859,
539
+ "step": 42500
540
+ },
541
+ {
542
+ "epoch": 3.7815495558877847,
543
+ "learning_rate": 4.873801776448862e-06,
544
+ "loss": 0.08345184326171876,
545
+ "step": 43000
546
+ },
547
+ {
548
+ "epoch": 3.825521062351596,
549
+ "learning_rate": 4.697915750593616e-06,
550
+ "loss": 0.08551445007324218,
551
+ "step": 43500
552
+ },
553
+ {
554
+ "epoch": 3.8694925688154074,
555
+ "learning_rate": 4.5220297247383695e-06,
556
+ "loss": 0.08454890441894532,
557
+ "step": 44000
558
+ },
559
+ {
560
+ "epoch": 3.913464075279219,
561
+ "learning_rate": 4.346143698883124e-06,
562
+ "loss": 0.0829298324584961,
563
+ "step": 44500
564
+ },
565
+ {
566
+ "epoch": 3.9574355817430305,
567
+ "learning_rate": 4.170257673027878e-06,
568
+ "loss": 0.08503248596191407,
569
+ "step": 45000
570
+ },
571
+ {
572
+ "epoch": 4.0,
573
+ "eval_accuracy": 0.9115508285926293,
574
+ "eval_f1": 0.8807761552310462,
575
+ "eval_loss": 0.3467446565628052,
576
+ "step": 45484
577
+ }
578
+ ],
579
+ "max_steps": 56855,
580
+ "num_train_epochs": 5,
581
+ "total_flos": 65703021222201888,
582
+ "trial_name": null,
583
+ "trial_params": null
584
+ }
qqp_#1_glue_cased/checkpoint-45484/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd0df48bb459380a4288a73c472c5ce3866bb691c774490d2ba99b80b24e42f
3
+ size 1639
qqp_#1_glue_cased/checkpoint-45484/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
qqp_#1_glue_cased/checkpoint-56855/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "type_vocab_size": 2,
21
+ "vocab_size": 30522
22
+ }
qqp_#1_glue_cased/checkpoint-56855/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:894b7b162b86539e8ec06b616c56652a81d9867f7816bab3c2f7942f62ad284b
3
+ size 875930250
qqp_#1_glue_cased/checkpoint-56855/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68b5303cce45e2e1d1718a0caa5a847cb258317d7fffb2538b1ca1b8bfd4700
3
+ size 437989677
qqp_#1_glue_cased/checkpoint-56855/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f1d91df29084ce1288bba9343c96c9461b7fd867a2ff5171c12db2648af8c5
3
+ size 326
qqp_#1_glue_cased/checkpoint-56855/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
qqp_#1_glue_cased/checkpoint-56855/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-uncased"}
qqp_#1_glue_cased/checkpoint-56855/trainer_state.json ADDED
@@ -0,0 +1,729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9132574820677715,
3
+ "best_model_checkpoint": "qqp_#1_glue_cased/checkpoint-56855",
4
+ "epoch": 5.0,
5
+ "global_step": 56855,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04397150646381145,
12
+ "learning_rate": 1.9824113974144757e-05,
13
+ "loss": 0.43561614990234376,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.0879430129276229,
18
+ "learning_rate": 1.9648227948289512e-05,
19
+ "loss": 0.3685419006347656,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.13191451939143434,
24
+ "learning_rate": 1.9472341922434264e-05,
25
+ "loss": 0.3377880249023438,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.1758860258552458,
30
+ "learning_rate": 1.929645589657902e-05,
31
+ "loss": 0.327943115234375,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.21985753231905725,
36
+ "learning_rate": 1.9120569870723774e-05,
37
+ "loss": 0.3118376159667969,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.2638290387828687,
42
+ "learning_rate": 1.894468384486853e-05,
43
+ "loss": 0.3047716064453125,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.30780054524668016,
48
+ "learning_rate": 1.876879781901328e-05,
49
+ "loss": 0.3038143005371094,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.3517720517104916,
54
+ "learning_rate": 1.8592911793158036e-05,
55
+ "loss": 0.2949424438476563,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.39574355817430307,
60
+ "learning_rate": 1.8417025767302788e-05,
61
+ "loss": 0.2912021789550781,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.4397150646381145,
66
+ "learning_rate": 1.8241139741447543e-05,
67
+ "loss": 0.2867056884765625,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.483686571101926,
72
+ "learning_rate": 1.80652537155923e-05,
73
+ "loss": 0.27814862060546874,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.5276580775657373,
78
+ "learning_rate": 1.788936768973705e-05,
79
+ "loss": 0.2777033996582031,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.5716295840295489,
84
+ "learning_rate": 1.7713481663881805e-05,
85
+ "loss": 0.2808096618652344,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.6156010904933603,
90
+ "learning_rate": 1.753759563802656e-05,
91
+ "loss": 0.2740480041503906,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.6595725969571717,
96
+ "learning_rate": 1.7361709612171312e-05,
97
+ "loss": 0.2704828796386719,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.7035441034209832,
102
+ "learning_rate": 1.7185823586316067e-05,
103
+ "loss": 0.26128680419921874,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.7475156098847947,
108
+ "learning_rate": 1.7009937560460823e-05,
109
+ "loss": 0.26227166748046876,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.7914871163486061,
114
+ "learning_rate": 1.6834051534605578e-05,
115
+ "loss": 0.257682861328125,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.8354586228124176,
120
+ "learning_rate": 1.665816550875033e-05,
121
+ "loss": 0.25748046875,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.879430129276229,
126
+ "learning_rate": 1.6482279482895085e-05,
127
+ "loss": 0.26029693603515625,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.9234016357400404,
132
+ "learning_rate": 1.630639345703984e-05,
133
+ "loss": 0.25330767822265626,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.967373142203852,
138
+ "learning_rate": 1.6130507431184595e-05,
139
+ "loss": 0.24725408935546875,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_accuracy": 0.8963888201830325,
145
+ "eval_f1": 0.8609460580912863,
146
+ "eval_loss": 0.2415674775838852,
147
+ "step": 11371
148
+ },
149
+ {
150
+ "epoch": 1.0113446486676634,
151
+ "learning_rate": 1.5954621405329347e-05,
152
+ "loss": 0.22799398803710938,
153
+ "step": 11500
154
+ },
155
+ {
156
+ "epoch": 1.0553161551314747,
157
+ "learning_rate": 1.5778735379474102e-05,
158
+ "loss": 0.1779903106689453,
159
+ "step": 12000
160
+ },
161
+ {
162
+ "epoch": 1.0992876615952862,
163
+ "learning_rate": 1.5602849353618857e-05,
164
+ "loss": 0.18363124084472657,
165
+ "step": 12500
166
+ },
167
+ {
168
+ "epoch": 1.1432591680590978,
169
+ "learning_rate": 1.5426963327763612e-05,
170
+ "loss": 0.183293212890625,
171
+ "step": 13000
172
+ },
173
+ {
174
+ "epoch": 1.187230674522909,
175
+ "learning_rate": 1.5251077301908364e-05,
176
+ "loss": 0.1881647186279297,
177
+ "step": 13500
178
+ },
179
+ {
180
+ "epoch": 1.2312021809867206,
181
+ "learning_rate": 1.5075191276053118e-05,
182
+ "loss": 0.193143798828125,
183
+ "step": 14000
184
+ },
185
+ {
186
+ "epoch": 1.2751736874505322,
187
+ "learning_rate": 1.4899305250197873e-05,
188
+ "loss": 0.186142333984375,
189
+ "step": 14500
190
+ },
191
+ {
192
+ "epoch": 1.3191451939143435,
193
+ "learning_rate": 1.4723419224342628e-05,
194
+ "loss": 0.18537522888183594,
195
+ "step": 15000
196
+ },
197
+ {
198
+ "epoch": 1.363116700378155,
199
+ "learning_rate": 1.4547533198487383e-05,
200
+ "loss": 0.18841236877441406,
201
+ "step": 15500
202
+ },
203
+ {
204
+ "epoch": 1.4070882068419663,
205
+ "learning_rate": 1.4371647172632135e-05,
206
+ "loss": 0.18419113159179687,
207
+ "step": 16000
208
+ },
209
+ {
210
+ "epoch": 1.4510597133057779,
211
+ "learning_rate": 1.419576114677689e-05,
212
+ "loss": 0.18361325073242188,
213
+ "step": 16500
214
+ },
215
+ {
216
+ "epoch": 1.4950312197695892,
217
+ "learning_rate": 1.4019875120921644e-05,
218
+ "loss": 0.18803219604492188,
219
+ "step": 17000
220
+ },
221
+ {
222
+ "epoch": 1.5390027262334007,
223
+ "learning_rate": 1.3843989095066399e-05,
224
+ "loss": 0.1816302032470703,
225
+ "step": 17500
226
+ },
227
+ {
228
+ "epoch": 1.5829742326972123,
229
+ "learning_rate": 1.3668103069211152e-05,
230
+ "loss": 0.1842498016357422,
231
+ "step": 18000
232
+ },
233
+ {
234
+ "epoch": 1.6269457391610236,
235
+ "learning_rate": 1.3492217043355906e-05,
236
+ "loss": 0.17726910400390625,
237
+ "step": 18500
238
+ },
239
+ {
240
+ "epoch": 1.6709172456248351,
241
+ "learning_rate": 1.3316331017500661e-05,
242
+ "loss": 0.17991775512695313,
243
+ "step": 19000
244
+ },
245
+ {
246
+ "epoch": 1.7148887520886467,
247
+ "learning_rate": 1.3140444991645416e-05,
248
+ "loss": 0.1825124053955078,
249
+ "step": 19500
250
+ },
251
+ {
252
+ "epoch": 1.758860258552458,
253
+ "learning_rate": 1.2964558965790168e-05,
254
+ "loss": 0.17683036804199218,
255
+ "step": 20000
256
+ },
257
+ {
258
+ "epoch": 1.8028317650162693,
259
+ "learning_rate": 1.2788672939934923e-05,
260
+ "loss": 0.1868966827392578,
261
+ "step": 20500
262
+ },
263
+ {
264
+ "epoch": 1.846803271480081,
265
+ "learning_rate": 1.2612786914079678e-05,
266
+ "loss": 0.18423307800292968,
267
+ "step": 21000
268
+ },
269
+ {
270
+ "epoch": 1.8907747779438924,
271
+ "learning_rate": 1.2436900888224432e-05,
272
+ "loss": 0.17814576721191405,
273
+ "step": 21500
274
+ },
275
+ {
276
+ "epoch": 1.9347462844077037,
277
+ "learning_rate": 1.2261014862369185e-05,
278
+ "loss": 0.17719683837890626,
279
+ "step": 22000
280
+ },
281
+ {
282
+ "epoch": 1.9787177908715152,
283
+ "learning_rate": 1.208512883651394e-05,
284
+ "loss": 0.17974095153808595,
285
+ "step": 22500
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "eval_accuracy": 0.9078407123423201,
290
+ "eval_f1": 0.8756839717069265,
291
+ "eval_loss": 0.23735392093658447,
292
+ "step": 22742
293
+ },
294
+ {
295
+ "epoch": 2.0226892973353268,
296
+ "learning_rate": 1.1909242810658694e-05,
297
+ "loss": 0.14513198852539064,
298
+ "step": 23000
299
+ },
300
+ {
301
+ "epoch": 2.066660803799138,
302
+ "learning_rate": 1.1733356784803449e-05,
303
+ "loss": 0.11556282806396484,
304
+ "step": 23500
305
+ },
306
+ {
307
+ "epoch": 2.1106323102629494,
308
+ "learning_rate": 1.15574707589482e-05,
309
+ "loss": 0.12064967346191406,
310
+ "step": 24000
311
+ },
312
+ {
313
+ "epoch": 2.154603816726761,
314
+ "learning_rate": 1.1381584733092956e-05,
315
+ "loss": 0.11743055725097656,
316
+ "step": 24500
317
+ },
318
+ {
319
+ "epoch": 2.1985753231905725,
320
+ "learning_rate": 1.1205698707237711e-05,
321
+ "loss": 0.11710655212402343,
322
+ "step": 25000
323
+ },
324
+ {
325
+ "epoch": 2.242546829654384,
326
+ "learning_rate": 1.1029812681382466e-05,
327
+ "loss": 0.1169861831665039,
328
+ "step": 25500
329
+ },
330
+ {
331
+ "epoch": 2.2865183361181955,
332
+ "learning_rate": 1.0853926655527218e-05,
333
+ "loss": 0.12172984313964844,
334
+ "step": 26000
335
+ },
336
+ {
337
+ "epoch": 2.330489842582007,
338
+ "learning_rate": 1.0678040629671973e-05,
339
+ "loss": 0.11213615417480469,
340
+ "step": 26500
341
+ },
342
+ {
343
+ "epoch": 2.374461349045818,
344
+ "learning_rate": 1.0502154603816728e-05,
345
+ "loss": 0.12687485504150392,
346
+ "step": 27000
347
+ },
348
+ {
349
+ "epoch": 2.41843285550963,
350
+ "learning_rate": 1.0326268577961482e-05,
351
+ "loss": 0.12601725006103515,
352
+ "step": 27500
353
+ },
354
+ {
355
+ "epoch": 2.4624043619734413,
356
+ "learning_rate": 1.0150382552106235e-05,
357
+ "loss": 0.11948597717285156,
358
+ "step": 28000
359
+ },
360
+ {
361
+ "epoch": 2.5063758684372526,
362
+ "learning_rate": 9.974496526250989e-06,
363
+ "loss": 0.12668427276611327,
364
+ "step": 28500
365
+ },
366
+ {
367
+ "epoch": 2.5503473749010643,
368
+ "learning_rate": 9.798610500395744e-06,
369
+ "loss": 0.12504535675048828,
370
+ "step": 29000
371
+ },
372
+ {
373
+ "epoch": 2.5943188813648757,
374
+ "learning_rate": 9.6227244745405e-06,
375
+ "loss": 0.12663780975341796,
376
+ "step": 29500
377
+ },
378
+ {
379
+ "epoch": 2.638290387828687,
380
+ "learning_rate": 9.446838448685253e-06,
381
+ "loss": 0.122175048828125,
382
+ "step": 30000
383
+ },
384
+ {
385
+ "epoch": 2.6822618942924983,
386
+ "learning_rate": 9.270952422830008e-06,
387
+ "loss": 0.12571288299560546,
388
+ "step": 30500
389
+ },
390
+ {
391
+ "epoch": 2.72623340075631,
392
+ "learning_rate": 9.095066396974761e-06,
393
+ "loss": 0.12774851989746094,
394
+ "step": 31000
395
+ },
396
+ {
397
+ "epoch": 2.7702049072201214,
398
+ "learning_rate": 8.919180371119516e-06,
399
+ "loss": 0.12356333923339843,
400
+ "step": 31500
401
+ },
402
+ {
403
+ "epoch": 2.8141764136839327,
404
+ "learning_rate": 8.74329434526427e-06,
405
+ "loss": 0.1209749755859375,
406
+ "step": 32000
407
+ },
408
+ {
409
+ "epoch": 2.8581479201477444,
410
+ "learning_rate": 8.567408319409023e-06,
411
+ "loss": 0.12022649383544921,
412
+ "step": 32500
413
+ },
414
+ {
415
+ "epoch": 2.9021194266115558,
416
+ "learning_rate": 8.391522293553777e-06,
417
+ "loss": 0.12490190887451172,
418
+ "step": 33000
419
+ },
420
+ {
421
+ "epoch": 2.946090933075367,
422
+ "learning_rate": 8.215636267698532e-06,
423
+ "loss": 0.12208123779296876,
424
+ "step": 33500
425
+ },
426
+ {
427
+ "epoch": 2.9900624395391784,
428
+ "learning_rate": 8.039750241843286e-06,
429
+ "loss": 0.12288145446777343,
430
+ "step": 34000
431
+ },
432
+ {
433
+ "epoch": 3.0,
434
+ "eval_accuracy": 0.9108088053425674,
435
+ "eval_f1": 0.8790176474535328,
436
+ "eval_loss": 0.2841033935546875,
437
+ "step": 34113
438
+ },
439
+ {
440
+ "epoch": 3.03403394600299,
441
+ "learning_rate": 7.86386421598804e-06,
442
+ "loss": 0.08970191955566406,
443
+ "step": 34500
444
+ },
445
+ {
446
+ "epoch": 3.0780054524668015,
447
+ "learning_rate": 7.687978190132794e-06,
448
+ "loss": 0.08841686248779297,
449
+ "step": 35000
450
+ },
451
+ {
452
+ "epoch": 3.1219769589306128,
453
+ "learning_rate": 7.512092164277549e-06,
454
+ "loss": 0.07938870239257813,
455
+ "step": 35500
456
+ },
457
+ {
458
+ "epoch": 3.1659484653944245,
459
+ "learning_rate": 7.336206138422303e-06,
460
+ "loss": 0.08218861389160156,
461
+ "step": 36000
462
+ },
463
+ {
464
+ "epoch": 3.209919971858236,
465
+ "learning_rate": 7.160320112567057e-06,
466
+ "loss": 0.0876485595703125,
467
+ "step": 36500
468
+ },
469
+ {
470
+ "epoch": 3.253891478322047,
471
+ "learning_rate": 6.984434086711811e-06,
472
+ "loss": 0.08727376556396485,
473
+ "step": 37000
474
+ },
475
+ {
476
+ "epoch": 3.297862984785859,
477
+ "learning_rate": 6.808548060856566e-06,
478
+ "loss": 0.07741403961181641,
479
+ "step": 37500
480
+ },
481
+ {
482
+ "epoch": 3.3418344912496702,
483
+ "learning_rate": 6.632662035001319e-06,
484
+ "loss": 0.08279672241210938,
485
+ "step": 38000
486
+ },
487
+ {
488
+ "epoch": 3.3858059977134816,
489
+ "learning_rate": 6.456776009146074e-06,
490
+ "loss": 0.08051201629638671,
491
+ "step": 38500
492
+ },
493
+ {
494
+ "epoch": 3.4297775041772933,
495
+ "learning_rate": 6.280889983290828e-06,
496
+ "loss": 0.0875191879272461,
497
+ "step": 39000
498
+ },
499
+ {
500
+ "epoch": 3.4737490106411046,
501
+ "learning_rate": 6.105003957435582e-06,
502
+ "loss": 0.0875638885498047,
503
+ "step": 39500
504
+ },
505
+ {
506
+ "epoch": 3.517720517104916,
507
+ "learning_rate": 5.929117931580336e-06,
508
+ "loss": 0.09117823791503907,
509
+ "step": 40000
510
+ },
511
+ {
512
+ "epoch": 3.5616920235687273,
513
+ "learning_rate": 5.753231905725091e-06,
514
+ "loss": 0.08640509796142579,
515
+ "step": 40500
516
+ },
517
+ {
518
+ "epoch": 3.605663530032539,
519
+ "learning_rate": 5.5773458798698444e-06,
520
+ "loss": 0.08709436798095703,
521
+ "step": 41000
522
+ },
523
+ {
524
+ "epoch": 3.6496350364963503,
525
+ "learning_rate": 5.401459854014599e-06,
526
+ "loss": 0.09037484741210937,
527
+ "step": 41500
528
+ },
529
+ {
530
+ "epoch": 3.6936065429601617,
531
+ "learning_rate": 5.225573828159353e-06,
532
+ "loss": 0.08582682800292969,
533
+ "step": 42000
534
+ },
535
+ {
536
+ "epoch": 3.7375780494239734,
537
+ "learning_rate": 5.049687802304107e-06,
538
+ "loss": 0.07674098968505859,
539
+ "step": 42500
540
+ },
541
+ {
542
+ "epoch": 3.7815495558877847,
543
+ "learning_rate": 4.873801776448862e-06,
544
+ "loss": 0.08345184326171876,
545
+ "step": 43000
546
+ },
547
+ {
548
+ "epoch": 3.825521062351596,
549
+ "learning_rate": 4.697915750593616e-06,
550
+ "loss": 0.08551445007324218,
551
+ "step": 43500
552
+ },
553
+ {
554
+ "epoch": 3.8694925688154074,
555
+ "learning_rate": 4.5220297247383695e-06,
556
+ "loss": 0.08454890441894532,
557
+ "step": 44000
558
+ },
559
+ {
560
+ "epoch": 3.913464075279219,
561
+ "learning_rate": 4.346143698883124e-06,
562
+ "loss": 0.0829298324584961,
563
+ "step": 44500
564
+ },
565
+ {
566
+ "epoch": 3.9574355817430305,
567
+ "learning_rate": 4.170257673027878e-06,
568
+ "loss": 0.08503248596191407,
569
+ "step": 45000
570
+ },
571
+ {
572
+ "epoch": 4.0,
573
+ "eval_accuracy": 0.9115508285926293,
574
+ "eval_f1": 0.8807761552310462,
575
+ "eval_loss": 0.3467446565628052,
576
+ "step": 45484
577
+ },
578
+ {
579
+ "epoch": 4.001407088206842,
580
+ "learning_rate": 3.9943716471726325e-06,
581
+ "loss": 0.08694480133056641,
582
+ "step": 45500
583
+ },
584
+ {
585
+ "epoch": 4.0453785946706535,
586
+ "learning_rate": 3.818485621317387e-06,
587
+ "loss": 0.058864639282226565,
588
+ "step": 46000
589
+ },
590
+ {
591
+ "epoch": 4.089350101134465,
592
+ "learning_rate": 3.6425995954621407e-06,
593
+ "loss": 0.06160499572753906,
594
+ "step": 46500
595
+ },
596
+ {
597
+ "epoch": 4.133321607598276,
598
+ "learning_rate": 3.466713569606895e-06,
599
+ "loss": 0.052556198120117185,
600
+ "step": 47000
601
+ },
602
+ {
603
+ "epoch": 4.1772931140620875,
604
+ "learning_rate": 3.290827543751649e-06,
605
+ "loss": 0.05974016189575195,
606
+ "step": 47500
607
+ },
608
+ {
609
+ "epoch": 4.221264620525899,
610
+ "learning_rate": 3.1149415178964033e-06,
611
+ "loss": 0.055861122131347654,
612
+ "step": 48000
613
+ },
614
+ {
615
+ "epoch": 4.265236126989711,
616
+ "learning_rate": 2.939055492041157e-06,
617
+ "loss": 0.058933868408203124,
618
+ "step": 48500
619
+ },
620
+ {
621
+ "epoch": 4.309207633453522,
622
+ "learning_rate": 2.7631694661859115e-06,
623
+ "loss": 0.05975740432739258,
624
+ "step": 49000
625
+ },
626
+ {
627
+ "epoch": 4.353179139917334,
628
+ "learning_rate": 2.587283440330666e-06,
629
+ "loss": 0.057000186920166014,
630
+ "step": 49500
631
+ },
632
+ {
633
+ "epoch": 4.397150646381145,
634
+ "learning_rate": 2.41139741447542e-06,
635
+ "loss": 0.05673225784301758,
636
+ "step": 50000
637
+ },
638
+ {
639
+ "epoch": 4.441122152844956,
640
+ "learning_rate": 2.2355113886201745e-06,
641
+ "loss": 0.05510025024414063,
642
+ "step": 50500
643
+ },
644
+ {
645
+ "epoch": 4.485093659308768,
646
+ "learning_rate": 2.0596253627649284e-06,
647
+ "loss": 0.05809032821655274,
648
+ "step": 51000
649
+ },
650
+ {
651
+ "epoch": 4.529065165772579,
652
+ "learning_rate": 1.8837393369096827e-06,
653
+ "loss": 0.06893916320800782,
654
+ "step": 51500
655
+ },
656
+ {
657
+ "epoch": 4.573036672236391,
658
+ "learning_rate": 1.7078533110544368e-06,
659
+ "loss": 0.059110233306884766,
660
+ "step": 52000
661
+ },
662
+ {
663
+ "epoch": 4.617008178700202,
664
+ "learning_rate": 1.531967285199191e-06,
665
+ "loss": 0.06264583587646484,
666
+ "step": 52500
667
+ },
668
+ {
669
+ "epoch": 4.660979685164014,
670
+ "learning_rate": 1.356081259343945e-06,
671
+ "loss": 0.058041461944580076,
672
+ "step": 53000
673
+ },
674
+ {
675
+ "epoch": 4.704951191627825,
676
+ "learning_rate": 1.1801952334886994e-06,
677
+ "loss": 0.06378980255126954,
678
+ "step": 53500
679
+ },
680
+ {
681
+ "epoch": 4.748922698091636,
682
+ "learning_rate": 1.0043092076334537e-06,
683
+ "loss": 0.06099491500854492,
684
+ "step": 54000
685
+ },
686
+ {
687
+ "epoch": 4.792894204555449,
688
+ "learning_rate": 8.284231817782078e-07,
689
+ "loss": 0.06348543548583985,
690
+ "step": 54500
691
+ },
692
+ {
693
+ "epoch": 4.83686571101926,
694
+ "learning_rate": 6.525371559229619e-07,
695
+ "loss": 0.05673025131225586,
696
+ "step": 55000
697
+ },
698
+ {
699
+ "epoch": 4.880837217483071,
700
+ "learning_rate": 4.7665113006771615e-07,
701
+ "loss": 0.06026851272583008,
702
+ "step": 55500
703
+ },
704
+ {
705
+ "epoch": 4.9248087239468825,
706
+ "learning_rate": 3.007651042124703e-07,
707
+ "loss": 0.06751934814453125,
708
+ "step": 56000
709
+ },
710
+ {
711
+ "epoch": 4.968780230410694,
712
+ "learning_rate": 1.2487907835722453e-07,
713
+ "loss": 0.05790270614624023,
714
+ "step": 56500
715
+ },
716
+ {
717
+ "epoch": 5.0,
718
+ "eval_accuracy": 0.9132574820677715,
719
+ "eval_f1": 0.8828305101733988,
720
+ "eval_loss": 0.41615524888038635,
721
+ "step": 56855
722
+ }
723
+ ],
724
+ "max_steps": 56855,
725
+ "num_train_epochs": 5,
726
+ "total_flos": 82104141363897024,
727
+ "trial_name": null,
728
+ "trial_params": null
729
+ }
qqp_#1_glue_cased/checkpoint-56855/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd0df48bb459380a4288a73c472c5ce3866bb691c774490d2ba99b80b24e42f
3
+ size 1639
qqp_#1_glue_cased/checkpoint-56855/vocab.txt ADDED
The diff for this file is too large to render. See raw diff