ShengdingHu commited on
Commit
23c6635
1 Parent(s): 5f07a6e

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_average_metrics": 0.0,
4
- "eval_loss": 0.27982431650161743,
5
- "eval_matthews_correlation": 0.0,
6
- "eval_runtime": 0.7644,
7
- "eval_samples_per_second": 681.568,
8
- "test_average_metrics": 0.0,
9
- "test_loss": 0.27551037073135376,
10
- "test_matthews_correlation": 0.0,
11
- "test_runtime": 0.6746,
12
- "test_samples_per_second": 773.807,
13
- "train_loss": 0.2779904210290243,
14
- "train_runtime": 435.9382,
15
  "train_samples": 8551,
16
- "train_samples_per_second": 392.303,
17
- "train_steps_per_second": 3.946
18
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_average_metrics": 52.359840871035765,
4
+ "eval_loss": 0.2282242476940155,
5
+ "eval_matthews_correlation": 52.359840871035765,
6
+ "eval_runtime": 0.9075,
7
+ "eval_samples_per_second": 574.107,
8
+ "test_average_metrics": 63.24519496421077,
9
+ "test_loss": 0.18458954989910126,
10
+ "test_matthews_correlation": 63.24519496421077,
11
+ "test_runtime": 0.8502,
12
+ "test_samples_per_second": 614.001,
13
+ "train_loss": 0.5282898814179177,
14
+ "train_runtime": 424.1274,
15
  "train_samples": 8551,
16
+ "train_samples_per_second": 403.228,
17
+ "train_steps_per_second": 4.055
18
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_average_metrics": 0.0,
4
- "eval_loss": 0.27982431650161743,
5
- "eval_matthews_correlation": 0.0,
6
- "eval_runtime": 0.7644,
7
- "eval_samples_per_second": 681.568
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_average_metrics": 52.359840871035765,
4
+ "eval_loss": 0.2282242476940155,
5
+ "eval_matthews_correlation": 52.359840871035765,
6
+ "eval_runtime": 0.9075,
7
+ "eval_samples_per_second": 574.107
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb66da5e6b61c2b4cc2034fe8b508ecce6ee8ee9982941b9c7e22e9f5eeb30fa
3
  size 1084131
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a589ded531d3bbb72a7a6e9aeb707c0b45b2aa234b814b877f8e496c28af491
3
  size 1084131
runs/Jan31_19-25-22_node1/events.out.tfevents.1643628441.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b457c8425912279942596ceb9e0ac6c8b7ca2a9cf5b3b17a8978188864b4e6b9
3
- size 11116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334a48cecac110d2e28a68203c3fa53bc97487bcdf69a630c7610b701c5afa43
3
+ size 11804
runs/Jan31_19-25-22_node1/events.out.tfevents.1643628866.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b254740812b3b5b0cf5053822c0316b14a252f438bcb603dd521c2369e286dd
3
+ size 708
runs/Jan31_20-22-53_node1/1643631902.337454/events.out.tfevents.1643631902.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ea619e40e78d2e08e5492feceb7fc0905a3e73ac274ebea205b9352f84dcb
3
+ size 5011
runs/Jan31_20-22-53_node1/events.out.tfevents.1643631902.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a2143aa1f51e84731a49af5406404480d81036c9e3de46611ba73399f9f75ed
3
+ size 4304
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "test_average_metrics": 0.0,
4
- "test_loss": 0.27551037073135376,
5
- "test_matthews_correlation": 0.0,
6
- "test_runtime": 0.6746,
7
- "test_samples_per_second": 773.807
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_average_metrics": 63.24519496421077,
4
+ "test_loss": 0.18458954989910126,
5
+ "test_matthews_correlation": 63.24519496421077,
6
+ "test_runtime": 0.8502,
7
+ "test_samples_per_second": 614.001
8
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2779904210290243,
4
- "train_runtime": 435.9382,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 392.303,
7
- "train_steps_per_second": 3.946
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.5282898814179177,
4
+ "train_runtime": 424.1274,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 403.228,
7
+ "train_steps_per_second": 4.055
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.0,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-86",
4
  "epoch": 20.0,
5
  "global_step": 1720,
6
  "is_hyper_param_search": false,
@@ -10,209 +10,209 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_average_metrics": 0.0,
13
- "eval_loss": 0.27982431650161743,
14
  "eval_matthews_correlation": 0.0,
15
- "eval_runtime": 0.8066,
16
- "eval_samples_per_second": 645.909,
17
  "step": 86
18
  },
19
  {
20
  "epoch": 2.0,
21
- "eval_average_metrics": 0.0,
22
- "eval_loss": 0.27295124530792236,
23
- "eval_matthews_correlation": 0.0,
24
- "eval_runtime": 0.8088,
25
- "eval_samples_per_second": 644.148,
26
  "step": 172
27
  },
28
  {
29
  "epoch": 3.0,
30
- "eval_average_metrics": 0.0,
31
- "eval_loss": 0.27825212478637695,
32
- "eval_matthews_correlation": 0.0,
33
- "eval_runtime": 0.7356,
34
- "eval_samples_per_second": 708.276,
35
  "step": 258
36
  },
37
  {
38
  "epoch": 4.0,
39
- "eval_average_metrics": 0.0,
40
- "eval_loss": 0.2720172107219696,
41
- "eval_matthews_correlation": 0.0,
42
- "eval_runtime": 0.7952,
43
- "eval_samples_per_second": 655.185,
44
  "step": 344
45
  },
46
  {
47
  "epoch": 5.0,
48
- "eval_average_metrics": 0.0,
49
- "eval_loss": 0.2690984904766083,
50
- "eval_matthews_correlation": 0.0,
51
- "eval_runtime": 0.7358,
52
- "eval_samples_per_second": 708.105,
53
  "step": 430
54
  },
55
  {
56
  "epoch": 5.81,
57
- "learning_rate": 0.002127906976744186,
58
- "loss": 0.3,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 6.0,
63
- "eval_average_metrics": 0.0,
64
- "eval_loss": 0.2694063186645508,
65
- "eval_matthews_correlation": 0.0,
66
- "eval_runtime": 0.7511,
67
- "eval_samples_per_second": 693.659,
68
  "step": 516
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_average_metrics": 0.0,
73
- "eval_loss": 0.28033140301704407,
74
- "eval_matthews_correlation": 0.0,
75
- "eval_runtime": 0.7448,
76
- "eval_samples_per_second": 699.547,
77
  "step": 602
78
  },
79
  {
80
  "epoch": 8.0,
81
- "eval_average_metrics": 0.0,
82
- "eval_loss": 0.2737719714641571,
83
- "eval_matthews_correlation": 0.0,
84
- "eval_runtime": 0.7024,
85
- "eval_samples_per_second": 741.703,
86
  "step": 688
87
  },
88
  {
89
  "epoch": 9.0,
90
- "eval_average_metrics": 0.0,
91
- "eval_loss": 0.26990658044815063,
92
- "eval_matthews_correlation": 0.0,
93
- "eval_runtime": 0.7557,
94
- "eval_samples_per_second": 689.411,
95
  "step": 774
96
  },
97
  {
98
  "epoch": 10.0,
99
- "eval_average_metrics": 0.0,
100
- "eval_loss": 0.269914448261261,
101
- "eval_matthews_correlation": 0.0,
102
- "eval_runtime": 0.7515,
103
- "eval_samples_per_second": 693.272,
104
  "step": 860
105
  },
106
  {
107
  "epoch": 11.0,
108
- "eval_average_metrics": 0.0,
109
- "eval_loss": 0.2703316807746887,
110
- "eval_matthews_correlation": 0.0,
111
- "eval_runtime": 0.7544,
112
- "eval_samples_per_second": 690.634,
113
  "step": 946
114
  },
115
  {
116
  "epoch": 11.63,
117
- "learning_rate": 0.0012558139534883722,
118
- "loss": 0.2698,
119
  "step": 1000
120
  },
121
  {
122
  "epoch": 12.0,
123
- "eval_average_metrics": 0.0,
124
- "eval_loss": 0.2690633535385132,
125
- "eval_matthews_correlation": 0.0,
126
- "eval_runtime": 0.8328,
127
- "eval_samples_per_second": 625.617,
128
  "step": 1032
129
  },
130
  {
131
  "epoch": 13.0,
132
- "eval_average_metrics": 0.0,
133
- "eval_loss": 0.2688796818256378,
134
- "eval_matthews_correlation": 0.0,
135
- "eval_runtime": 0.7677,
136
- "eval_samples_per_second": 678.682,
137
  "step": 1118
138
  },
139
  {
140
  "epoch": 14.0,
141
- "eval_average_metrics": 0.0,
142
- "eval_loss": 0.26915279030799866,
143
- "eval_matthews_correlation": 0.0,
144
- "eval_runtime": 0.7677,
145
- "eval_samples_per_second": 678.671,
146
  "step": 1204
147
  },
148
  {
149
  "epoch": 15.0,
150
- "eval_average_metrics": 0.0,
151
- "eval_loss": 0.27187806367874146,
152
- "eval_matthews_correlation": 0.0,
153
- "eval_runtime": 0.7511,
154
- "eval_samples_per_second": 693.632,
155
  "step": 1290
156
  },
157
  {
158
  "epoch": 16.0,
159
- "eval_average_metrics": 0.0,
160
- "eval_loss": 0.27496567368507385,
161
- "eval_matthews_correlation": 0.0,
162
- "eval_runtime": 0.7145,
163
- "eval_samples_per_second": 729.175,
164
  "step": 1376
165
  },
166
  {
167
  "epoch": 17.0,
168
- "eval_average_metrics": 0.0,
169
- "eval_loss": 0.26937127113342285,
170
- "eval_matthews_correlation": 0.0,
171
- "eval_runtime": 0.7488,
172
- "eval_samples_per_second": 695.759,
173
  "step": 1462
174
  },
175
  {
176
  "epoch": 17.44,
177
- "learning_rate": 0.0003837209302325582,
178
- "loss": 0.2686,
179
  "step": 1500
180
  },
181
  {
182
  "epoch": 18.0,
183
- "eval_average_metrics": 0.0,
184
- "eval_loss": 0.26900675892829895,
185
- "eval_matthews_correlation": 0.0,
186
- "eval_runtime": 0.7556,
187
- "eval_samples_per_second": 689.484,
188
  "step": 1548
189
  },
190
  {
191
  "epoch": 19.0,
192
- "eval_average_metrics": 0.0,
193
- "eval_loss": 0.2690284252166748,
194
- "eval_matthews_correlation": 0.0,
195
- "eval_runtime": 0.7419,
196
- "eval_samples_per_second": 702.253,
197
  "step": 1634
198
  },
199
  {
200
  "epoch": 20.0,
201
- "eval_average_metrics": 0.0,
202
- "eval_loss": 0.26897749304771423,
203
- "eval_matthews_correlation": 0.0,
204
- "eval_runtime": 0.7455,
205
- "eval_samples_per_second": 698.863,
206
  "step": 1720
207
  },
208
  {
209
  "epoch": 20.0,
210
  "step": 1720,
211
  "total_flos": 6646762904679216.0,
212
- "train_loss": 0.2779904210290243,
213
- "train_runtime": 435.9382,
214
- "train_samples_per_second": 392.303,
215
- "train_steps_per_second": 3.946
216
  }
217
  ],
218
  "max_steps": 1720,
 
1
  {
2
+ "best_metric": 52.359840871035765,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-1290",
4
  "epoch": 20.0,
5
  "global_step": 1720,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_average_metrics": 0.0,
13
+ "eval_loss": 4.8104567527771,
14
  "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 0.763,
16
+ "eval_samples_per_second": 682.828,
17
  "step": 86
18
  },
19
  {
20
  "epoch": 2.0,
21
+ "eval_average_metrics": 8.869450752831899,
22
+ "eval_loss": 0.27582260966300964,
23
+ "eval_matthews_correlation": 8.869450752831899,
24
+ "eval_runtime": 0.8417,
25
+ "eval_samples_per_second": 619.016,
26
  "step": 172
27
  },
28
  {
29
  "epoch": 3.0,
30
+ "eval_average_metrics": 43.97683870357735,
31
+ "eval_loss": 0.21786993741989136,
32
+ "eval_matthews_correlation": 43.97683870357735,
33
+ "eval_runtime": 0.8315,
34
+ "eval_samples_per_second": 626.599,
35
  "step": 258
36
  },
37
  {
38
  "epoch": 4.0,
39
+ "eval_average_metrics": 46.78131759250157,
40
+ "eval_loss": 0.22430144250392914,
41
+ "eval_matthews_correlation": 46.78131759250157,
42
+ "eval_runtime": 0.8615,
43
+ "eval_samples_per_second": 604.782,
44
  "step": 344
45
  },
46
  {
47
  "epoch": 5.0,
48
+ "eval_average_metrics": 49.53306157776009,
49
+ "eval_loss": 0.21720413863658905,
50
+ "eval_matthews_correlation": 49.53306157776009,
51
+ "eval_runtime": 0.8515,
52
+ "eval_samples_per_second": 611.84,
53
  "step": 430
54
  },
55
  {
56
  "epoch": 5.81,
57
+ "learning_rate": 0.0003,
58
+ "loss": 1.3817,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 6.0,
63
+ "eval_average_metrics": 48.16295515684713,
64
+ "eval_loss": 0.22913119196891785,
65
+ "eval_matthews_correlation": 48.16295515684713,
66
+ "eval_runtime": 0.8751,
67
+ "eval_samples_per_second": 595.357,
68
  "step": 516
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_average_metrics": 47.85938832793908,
73
+ "eval_loss": 0.23619267344474792,
74
+ "eval_matthews_correlation": 47.85938832793908,
75
+ "eval_runtime": 0.8864,
76
+ "eval_samples_per_second": 587.749,
77
  "step": 602
78
  },
79
  {
80
  "epoch": 8.0,
81
+ "eval_average_metrics": 49.724093419139464,
82
+ "eval_loss": 0.2390584498643875,
83
+ "eval_matthews_correlation": 49.724093419139464,
84
+ "eval_runtime": 0.8668,
85
+ "eval_samples_per_second": 601.078,
86
  "step": 688
87
  },
88
  {
89
  "epoch": 9.0,
90
+ "eval_average_metrics": 48.86384938654592,
91
+ "eval_loss": 0.22121600806713104,
92
+ "eval_matthews_correlation": 48.86384938654592,
93
+ "eval_runtime": 0.8908,
94
+ "eval_samples_per_second": 584.879,
95
  "step": 774
96
  },
97
  {
98
  "epoch": 10.0,
99
+ "eval_average_metrics": 48.08237616875362,
100
+ "eval_loss": 0.2251322716474533,
101
+ "eval_matthews_correlation": 48.08237616875362,
102
+ "eval_runtime": 0.8537,
103
+ "eval_samples_per_second": 610.303,
104
  "step": 860
105
  },
106
  {
107
  "epoch": 11.0,
108
+ "eval_average_metrics": 47.083822335710934,
109
+ "eval_loss": 0.23612065613269806,
110
+ "eval_matthews_correlation": 47.083822335710934,
111
+ "eval_runtime": 0.852,
112
+ "eval_samples_per_second": 611.521,
113
  "step": 946
114
  },
115
  {
116
  "epoch": 11.63,
117
+ "learning_rate": 0.00017704918032786883,
118
+ "loss": 0.1859,
119
  "step": 1000
120
  },
121
  {
122
  "epoch": 12.0,
123
+ "eval_average_metrics": 49.76615268960096,
124
+ "eval_loss": 0.2268751710653305,
125
+ "eval_matthews_correlation": 49.76615268960096,
126
+ "eval_runtime": 0.8867,
127
+ "eval_samples_per_second": 587.54,
128
  "step": 1032
129
  },
130
  {
131
  "epoch": 13.0,
132
+ "eval_average_metrics": 50.45736339822007,
133
+ "eval_loss": 0.22725746035575867,
134
+ "eval_matthews_correlation": 50.45736339822007,
135
+ "eval_runtime": 0.848,
136
+ "eval_samples_per_second": 614.383,
137
  "step": 1118
138
  },
139
  {
140
  "epoch": 14.0,
141
+ "eval_average_metrics": 51.11915736464516,
142
+ "eval_loss": 0.22515951097011566,
143
+ "eval_matthews_correlation": 51.11915736464516,
144
+ "eval_runtime": 0.8495,
145
+ "eval_samples_per_second": 613.291,
146
  "step": 1204
147
  },
148
  {
149
  "epoch": 15.0,
150
+ "eval_average_metrics": 52.359840871035765,
151
+ "eval_loss": 0.2282242476940155,
152
+ "eval_matthews_correlation": 52.359840871035765,
153
+ "eval_runtime": 0.9084,
154
+ "eval_samples_per_second": 573.516,
155
  "step": 1290
156
  },
157
  {
158
  "epoch": 16.0,
159
+ "eval_average_metrics": 50.78390137665081,
160
+ "eval_loss": 0.24213995039463043,
161
+ "eval_matthews_correlation": 50.78390137665081,
162
+ "eval_runtime": 0.7976,
163
+ "eval_samples_per_second": 653.201,
164
  "step": 1376
165
  },
166
  {
167
  "epoch": 17.0,
168
+ "eval_average_metrics": 51.452415683714314,
169
+ "eval_loss": 0.22793160378932953,
170
+ "eval_matthews_correlation": 51.452415683714314,
171
+ "eval_runtime": 0.8964,
172
+ "eval_samples_per_second": 581.215,
173
  "step": 1462
174
  },
175
  {
176
  "epoch": 17.44,
177
+ "learning_rate": 5.40983606557377e-05,
178
+ "loss": 0.1748,
179
  "step": 1500
180
  },
181
  {
182
  "epoch": 18.0,
183
+ "eval_average_metrics": 51.452415683714314,
184
+ "eval_loss": 0.23017793893814087,
185
+ "eval_matthews_correlation": 51.452415683714314,
186
+ "eval_runtime": 0.8935,
187
+ "eval_samples_per_second": 583.099,
188
  "step": 1548
189
  },
190
  {
191
  "epoch": 19.0,
192
+ "eval_average_metrics": 50.9850664467699,
193
+ "eval_loss": 0.2308470457792282,
194
+ "eval_matthews_correlation": 50.9850664467699,
195
+ "eval_runtime": 0.8691,
196
+ "eval_samples_per_second": 599.475,
197
  "step": 1634
198
  },
199
  {
200
  "epoch": 20.0,
201
+ "eval_average_metrics": 50.9850664467699,
202
+ "eval_loss": 0.22891275584697723,
203
+ "eval_matthews_correlation": 50.9850664467699,
204
+ "eval_runtime": 0.8366,
205
+ "eval_samples_per_second": 622.759,
206
  "step": 1720
207
  },
208
  {
209
  "epoch": 20.0,
210
  "step": 1720,
211
  "total_flos": 6646762904679216.0,
212
+ "train_loss": 0.5282898814179177,
213
+ "train_runtime": 424.1274,
214
+ "train_samples_per_second": 403.228,
215
+ "train_steps_per_second": 4.055
216
  }
217
  ],
218
  "max_steps": 1720,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2a6c9356208e65c852641943db784f8160e696fb6cd5e449c34a8681ea7871e
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a646a3fb66d1560b545d08f84e93ab16cd3153ff19e20982a22b554a34ec127
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"dataset_config_name": ["en"], "delta_type": "bitfit", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "cola", "evaluation_strategy": "epoch", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/cola", "overwrite_output_dir": true, "per_device_eval_batch_size": 100, "per_device_train_batch_size": 100, "predict_with_generate": true, "push_to_hub": true, "save_strategy": "epoch", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "cola", "test_dataset_config_name": ["en"], "test_dataset_name": "cola", "tokenizer_name": "../../../../plm_cache/t5-base", "warmup_steps": 500}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "bitfit", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "cola", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/cola", "overwrite_output_dir": true, "per_device_eval_batch_size": 100, "per_device_train_batch_size": 100, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "cola", "test_dataset_config_name": ["en"], "test_dataset_name": "cola", "tokenizer_name": "../../../../plm_cache/t5-base", "warmup_steps": 500}