Kashob commited on
Commit
3304768
1 Parent(s): 903274a

Upload 9 files

Browse files
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25502057cbbcf590727f8c51c87d294fcea58480453a6c55b9a214be65bc8b70
3
+ size 874819589
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7c8a0de22bf9d3b23a24f579fdd35a82f8aea7e5e9d37aa4b24363bfdd37eb
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a1381ec8f0b04f9655e96215ad18d00d1e3055868e58a6612866788b2a1599
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "./saved-model-all-data/checkpoint-882",
4
+ "epoch": 25.0,
5
+ "global_step": 1575,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.8670662363608421,
13
+ "eval_f1": 0.5017103762827823,
14
+ "eval_loss": 0.42577993869781494,
15
+ "eval_precision": 0.4592901878914405,
16
+ "eval_recall": 0.5527638190954773,
17
+ "eval_runtime": 0.6534,
18
+ "eval_samples_per_second": 76.527,
19
+ "eval_steps_per_second": 10.714,
20
+ "step": 63
21
+ },
22
+ {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.9293069002612571,
25
+ "eval_f1": 0.7072289156626506,
26
+ "eval_loss": 0.22081975638866425,
27
+ "eval_precision": 0.6793981481481481,
28
+ "eval_recall": 0.7374371859296482,
29
+ "eval_runtime": 0.5804,
30
+ "eval_samples_per_second": 86.142,
31
+ "eval_steps_per_second": 12.06,
32
+ "step": 126
33
+ },
34
+ {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.9549715690794529,
37
+ "eval_f1": 0.8209500609013398,
38
+ "eval_loss": 0.13680364191532135,
39
+ "eval_precision": 0.7966903073286052,
40
+ "eval_recall": 0.8467336683417085,
41
+ "eval_runtime": 0.5883,
42
+ "eval_samples_per_second": 84.997,
43
+ "eval_steps_per_second": 11.9,
44
+ "step": 189
45
+ },
46
+ {
47
+ "epoch": 4.0,
48
+ "eval_accuracy": 0.9778699861687413,
49
+ "eval_f1": 0.8866995073891626,
50
+ "eval_loss": 0.07373423129320145,
51
+ "eval_precision": 0.8695652173913043,
52
+ "eval_recall": 0.9045226130653267,
53
+ "eval_runtime": 0.6005,
54
+ "eval_samples_per_second": 83.259,
55
+ "eval_steps_per_second": 11.656,
56
+ "step": 252
57
+ },
58
+ {
59
+ "epoch": 5.0,
60
+ "eval_accuracy": 0.9855540187490395,
61
+ "eval_f1": 0.9316239316239316,
62
+ "eval_loss": 0.04712964966893196,
63
+ "eval_precision": 0.9061757719714965,
64
+ "eval_recall": 0.9585427135678392,
65
+ "eval_runtime": 0.6021,
66
+ "eval_samples_per_second": 83.049,
67
+ "eval_steps_per_second": 11.627,
68
+ "step": 315
69
+ },
70
+ {
71
+ "epoch": 6.0,
72
+ "eval_accuracy": 0.9929306900261257,
73
+ "eval_f1": 0.9652605459057072,
74
+ "eval_loss": 0.02583940699696541,
75
+ "eval_precision": 0.9534313725490197,
76
+ "eval_recall": 0.9773869346733668,
77
+ "eval_runtime": 0.5826,
78
+ "eval_samples_per_second": 85.82,
79
+ "eval_steps_per_second": 12.015,
80
+ "step": 378
81
+ },
82
+ {
83
+ "epoch": 7.0,
84
+ "eval_accuracy": 0.9967727063162748,
85
+ "eval_f1": 0.9849435382685068,
86
+ "eval_loss": 0.015803957358002663,
87
+ "eval_precision": 0.9837092731829574,
88
+ "eval_recall": 0.9861809045226131,
89
+ "eval_runtime": 0.5833,
90
+ "eval_samples_per_second": 85.718,
91
+ "eval_steps_per_second": 12.0,
92
+ "step": 441
93
+ },
94
+ {
95
+ "epoch": 7.94,
96
+ "learning_rate": 3.644067796610169e-05,
97
+ "loss": 0.268,
98
+ "step": 500
99
+ },
100
+ {
101
+ "epoch": 8.0,
102
+ "eval_accuracy": 0.9972337482710927,
103
+ "eval_f1": 0.9880578252671276,
104
+ "eval_loss": 0.012770027853548527,
105
+ "eval_precision": 0.9886792452830189,
106
+ "eval_recall": 0.9874371859296482,
107
+ "eval_runtime": 0.5723,
108
+ "eval_samples_per_second": 87.36,
109
+ "eval_steps_per_second": 12.23,
110
+ "step": 504
111
+ },
112
+ {
113
+ "epoch": 9.0,
114
+ "eval_accuracy": 0.9978484708775165,
115
+ "eval_f1": 0.9861809045226131,
116
+ "eval_loss": 0.008633743040263653,
117
+ "eval_precision": 0.9861809045226131,
118
+ "eval_recall": 0.9861809045226131,
119
+ "eval_runtime": 0.5754,
120
+ "eval_samples_per_second": 86.898,
121
+ "eval_steps_per_second": 12.166,
122
+ "step": 567
123
+ },
124
+ {
125
+ "epoch": 10.0,
126
+ "eval_accuracy": 0.9983095128323344,
127
+ "eval_f1": 0.9899497487437185,
128
+ "eval_loss": 0.006678320933133364,
129
+ "eval_precision": 0.9899497487437185,
130
+ "eval_recall": 0.9899497487437185,
131
+ "eval_runtime": 0.5717,
132
+ "eval_samples_per_second": 87.461,
133
+ "eval_steps_per_second": 12.245,
134
+ "step": 630
135
+ },
136
+ {
137
+ "epoch": 11.0,
138
+ "eval_accuracy": 0.9990779160903642,
139
+ "eval_f1": 0.9962311557788944,
140
+ "eval_loss": 0.004529367666691542,
141
+ "eval_precision": 0.9962311557788944,
142
+ "eval_recall": 0.9962311557788944,
143
+ "eval_runtime": 0.5852,
144
+ "eval_samples_per_second": 85.445,
145
+ "eval_steps_per_second": 11.962,
146
+ "step": 693
147
+ },
148
+ {
149
+ "epoch": 12.0,
150
+ "eval_accuracy": 0.999692638696788,
151
+ "eval_f1": 0.9987437185929648,
152
+ "eval_loss": 0.0013231054181233048,
153
+ "eval_precision": 0.9987437185929648,
154
+ "eval_recall": 0.9987437185929648,
155
+ "eval_runtime": 0.5808,
156
+ "eval_samples_per_second": 86.093,
157
+ "eval_steps_per_second": 12.053,
158
+ "step": 756
159
+ },
160
+ {
161
+ "epoch": 13.0,
162
+ "eval_accuracy": 0.9990779160903642,
163
+ "eval_f1": 0.9956112852664577,
164
+ "eval_loss": 0.0035528314765542746,
165
+ "eval_precision": 0.9937421777221527,
166
+ "eval_recall": 0.9974874371859297,
167
+ "eval_runtime": 0.6108,
168
+ "eval_samples_per_second": 81.86,
169
+ "eval_steps_per_second": 11.46,
170
+ "step": 819
171
+ },
172
+ {
173
+ "epoch": 14.0,
174
+ "eval_accuracy": 1.0,
175
+ "eval_f1": 1.0,
176
+ "eval_loss": 0.00045324634993448853,
177
+ "eval_precision": 1.0,
178
+ "eval_recall": 1.0,
179
+ "eval_runtime": 0.5759,
180
+ "eval_samples_per_second": 86.817,
181
+ "eval_steps_per_second": 12.154,
182
+ "step": 882
183
+ },
184
+ {
185
+ "epoch": 15.0,
186
+ "eval_accuracy": 1.0,
187
+ "eval_f1": 1.0,
188
+ "eval_loss": 0.00044708128552883863,
189
+ "eval_precision": 1.0,
190
+ "eval_recall": 1.0,
191
+ "eval_runtime": 0.575,
192
+ "eval_samples_per_second": 86.954,
193
+ "eval_steps_per_second": 12.174,
194
+ "step": 945
195
+ },
196
+ {
197
+ "epoch": 15.87,
198
+ "learning_rate": 1.9491525423728814e-05,
199
+ "loss": 0.0097,
200
+ "step": 1000
201
+ },
202
+ {
203
+ "epoch": 16.0,
204
+ "eval_accuracy": 1.0,
205
+ "eval_f1": 1.0,
206
+ "eval_loss": 0.00018117745639756322,
207
+ "eval_precision": 1.0,
208
+ "eval_recall": 1.0,
209
+ "eval_runtime": 0.5823,
210
+ "eval_samples_per_second": 85.872,
211
+ "eval_steps_per_second": 12.022,
212
+ "step": 1008
213
+ },
214
+ {
215
+ "epoch": 17.0,
216
+ "eval_accuracy": 1.0,
217
+ "eval_f1": 1.0,
218
+ "eval_loss": 0.00018171708506997675,
219
+ "eval_precision": 1.0,
220
+ "eval_recall": 1.0,
221
+ "eval_runtime": 0.6909,
222
+ "eval_samples_per_second": 72.373,
223
+ "eval_steps_per_second": 10.132,
224
+ "step": 1071
225
+ },
226
+ {
227
+ "epoch": 18.0,
228
+ "eval_accuracy": 1.0,
229
+ "eval_f1": 1.0,
230
+ "eval_loss": 0.0002043112035607919,
231
+ "eval_precision": 1.0,
232
+ "eval_recall": 1.0,
233
+ "eval_runtime": 0.6068,
234
+ "eval_samples_per_second": 82.399,
235
+ "eval_steps_per_second": 11.536,
236
+ "step": 1134
237
+ },
238
+ {
239
+ "epoch": 19.0,
240
+ "eval_accuracy": 1.0,
241
+ "eval_f1": 1.0,
242
+ "eval_loss": 0.00015518060536123812,
243
+ "eval_precision": 1.0,
244
+ "eval_recall": 1.0,
245
+ "eval_runtime": 0.594,
246
+ "eval_samples_per_second": 84.176,
247
+ "eval_steps_per_second": 11.785,
248
+ "step": 1197
249
+ },
250
+ {
251
+ "epoch": 20.0,
252
+ "eval_accuracy": 1.0,
253
+ "eval_f1": 1.0,
254
+ "eval_loss": 0.00014525999722536653,
255
+ "eval_precision": 1.0,
256
+ "eval_recall": 1.0,
257
+ "eval_runtime": 0.5981,
258
+ "eval_samples_per_second": 83.6,
259
+ "eval_steps_per_second": 11.704,
260
+ "step": 1260
261
+ },
262
+ {
263
+ "epoch": 21.0,
264
+ "eval_accuracy": 1.0,
265
+ "eval_f1": 1.0,
266
+ "eval_loss": 0.00013857503654435277,
267
+ "eval_precision": 1.0,
268
+ "eval_recall": 1.0,
269
+ "eval_runtime": 0.5825,
270
+ "eval_samples_per_second": 85.832,
271
+ "eval_steps_per_second": 12.017,
272
+ "step": 1323
273
+ },
274
+ {
275
+ "epoch": 22.0,
276
+ "eval_accuracy": 1.0,
277
+ "eval_f1": 1.0,
278
+ "eval_loss": 0.00013503569061867893,
279
+ "eval_precision": 1.0,
280
+ "eval_recall": 1.0,
281
+ "eval_runtime": 0.573,
282
+ "eval_samples_per_second": 87.267,
283
+ "eval_steps_per_second": 12.217,
284
+ "step": 1386
285
+ },
286
+ {
287
+ "epoch": 23.0,
288
+ "eval_accuracy": 1.0,
289
+ "eval_f1": 1.0,
290
+ "eval_loss": 0.00012487478670664132,
291
+ "eval_precision": 1.0,
292
+ "eval_recall": 1.0,
293
+ "eval_runtime": 0.5904,
294
+ "eval_samples_per_second": 84.687,
295
+ "eval_steps_per_second": 11.856,
296
+ "step": 1449
297
+ },
298
+ {
299
+ "epoch": 23.81,
300
+ "learning_rate": 2.5423728813559323e-06,
301
+ "loss": 0.0019,
302
+ "step": 1500
303
+ },
304
+ {
305
+ "epoch": 24.0,
306
+ "eval_accuracy": 1.0,
307
+ "eval_f1": 1.0,
308
+ "eval_loss": 0.00012091387179680169,
309
+ "eval_precision": 1.0,
310
+ "eval_recall": 1.0,
311
+ "eval_runtime": 0.5792,
312
+ "eval_samples_per_second": 86.322,
313
+ "eval_steps_per_second": 12.085,
314
+ "step": 1512
315
+ },
316
+ {
317
+ "epoch": 25.0,
318
+ "eval_accuracy": 1.0,
319
+ "eval_f1": 1.0,
320
+ "eval_loss": 0.00011829046707134694,
321
+ "eval_precision": 1.0,
322
+ "eval_recall": 1.0,
323
+ "eval_runtime": 0.6205,
324
+ "eval_samples_per_second": 80.581,
325
+ "eval_steps_per_second": 11.281,
326
+ "step": 1575
327
+ }
328
+ ],
329
+ "max_steps": 1575,
330
+ "num_train_epochs": 25,
331
+ "total_flos": 1550715696271320.0,
332
+ "trial_name": null,
333
+ "trial_params": null
334
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf590ad94ed3e06f7ac7bc684fa3a96e42ad6f633084f37c17334fcb46ae88a
3
+ size 3835
vocab.txt ADDED
The diff for this file is too large to render. See raw diff