GlycerinLOL commited on
Commit
b7af886
1 Parent(s): 4115343

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +402 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.99,
3
+ "train_loss": 0.8767006197992594,
4
+ "train_runtime": 37037.5595,
5
+ "train_samples_per_second": 43.199,
6
+ "train_steps_per_second": 0.3
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.99,
3
+ "train_loss": 0.8767006197992594,
4
+ "train_runtime": 37037.5595,
5
+ "train_samples_per_second": 43.199,
6
+ "train_steps_per_second": 0.3
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.992800575953924,
5
+ "eval_steps": 500,
6
+ "global_step": 11104,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.48,
13
+ "learning_rate": 1.9399615754082615e-05,
14
+ "loss": 2.0304,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "learning_rate": 1.879923150816523e-05,
20
+ "loss": 1.781,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_f1": 0.9088,
26
+ "eval_gen_len": 26.88909090909091,
27
+ "eval_loss": 1.579687476158142,
28
+ "eval_precision": 0.908,
29
+ "eval_recall": 0.91,
30
+ "eval_rouge1": 0.4708,
31
+ "eval_rouge2": 0.2219,
32
+ "eval_rougeL": 0.3892,
33
+ "eval_rougeLsum": 0.389,
34
+ "eval_runtime": 1186.1406,
35
+ "eval_samples_per_second": 4.637,
36
+ "eval_steps_per_second": 0.29,
37
+ "step": 1388
38
+ },
39
+ {
40
+ "epoch": 2.16,
41
+ "learning_rate": 1.729827089337176e-05,
42
+ "loss": 1.7026,
43
+ "step": 1500
44
+ },
45
+ {
46
+ "epoch": 2.88,
47
+ "learning_rate": 1.6397694524495677e-05,
48
+ "loss": 1.6618,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 3.0,
53
+ "eval_f1": 0.91,
54
+ "eval_gen_len": 26.728181818181817,
55
+ "eval_loss": 1.5411016941070557,
56
+ "eval_precision": 0.9094,
57
+ "eval_recall": 0.9111,
58
+ "eval_rouge1": 0.4776,
59
+ "eval_rouge2": 0.2303,
60
+ "eval_rougeL": 0.3977,
61
+ "eval_rougeLsum": 0.3973,
62
+ "eval_runtime": 1083.838,
63
+ "eval_samples_per_second": 5.075,
64
+ "eval_steps_per_second": 0.317,
65
+ "step": 2083
66
+ },
67
+ {
68
+ "epoch": 3.6,
69
+ "learning_rate": 1.5497118155619597e-05,
70
+ "loss": 1.626,
71
+ "step": 2500
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_f1": 0.911,
76
+ "eval_gen_len": 26.759636363636364,
77
+ "eval_loss": 1.5170917510986328,
78
+ "eval_precision": 0.9102,
79
+ "eval_recall": 0.9121,
80
+ "eval_rouge1": 0.4834,
81
+ "eval_rouge2": 0.2345,
82
+ "eval_rougeL": 0.402,
83
+ "eval_rougeLsum": 0.402,
84
+ "eval_runtime": 1053.82,
85
+ "eval_samples_per_second": 5.219,
86
+ "eval_steps_per_second": 0.326,
87
+ "step": 2776
88
+ },
89
+ {
90
+ "epoch": 4.32,
91
+ "learning_rate": 1.4596541786743516e-05,
92
+ "loss": 1.5918,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 5.0,
97
+ "eval_f1": 0.9112,
98
+ "eval_gen_len": 26.647636363636362,
99
+ "eval_loss": 1.500138521194458,
100
+ "eval_precision": 0.9106,
101
+ "eval_recall": 0.9122,
102
+ "eval_rouge1": 0.4853,
103
+ "eval_rouge2": 0.2365,
104
+ "eval_rougeL": 0.4045,
105
+ "eval_rougeLsum": 0.4045,
106
+ "eval_runtime": 1079.0919,
107
+ "eval_samples_per_second": 5.097,
108
+ "eval_steps_per_second": 0.319,
109
+ "step": 3471
110
+ },
111
+ {
112
+ "epoch": 5.04,
113
+ "learning_rate": 1.3695965417867436e-05,
114
+ "loss": 1.5798,
115
+ "step": 3500
116
+ },
117
+ {
118
+ "epoch": 5.76,
119
+ "learning_rate": 1.2795389048991355e-05,
120
+ "loss": 1.5586,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 6.0,
125
+ "eval_f1": 0.9116,
126
+ "eval_gen_len": 26.777818181818184,
127
+ "eval_loss": 1.4880452156066895,
128
+ "eval_precision": 0.9108,
129
+ "eval_recall": 0.9127,
130
+ "eval_rouge1": 0.4875,
131
+ "eval_rouge2": 0.2373,
132
+ "eval_rougeL": 0.4063,
133
+ "eval_rougeLsum": 0.4063,
134
+ "eval_runtime": 1027.5441,
135
+ "eval_samples_per_second": 5.353,
136
+ "eval_steps_per_second": 0.335,
137
+ "step": 4164
138
+ },
139
+ {
140
+ "epoch": 6.48,
141
+ "learning_rate": 1.1894812680115276e-05,
142
+ "loss": 1.5375,
143
+ "step": 4500
144
+ },
145
+ {
146
+ "epoch": 7.0,
147
+ "eval_f1": 0.912,
148
+ "eval_gen_len": 26.39909090909091,
149
+ "eval_loss": 1.4768402576446533,
150
+ "eval_precision": 0.9116,
151
+ "eval_recall": 0.9128,
152
+ "eval_rouge1": 0.4898,
153
+ "eval_rouge2": 0.24,
154
+ "eval_rougeL": 0.4083,
155
+ "eval_rougeLsum": 0.4083,
156
+ "eval_runtime": 922.1893,
157
+ "eval_samples_per_second": 5.964,
158
+ "eval_steps_per_second": 0.373,
159
+ "step": 4858
160
+ },
161
+ {
162
+ "epoch": 7.2,
163
+ "learning_rate": 1.0994236311239194e-05,
164
+ "loss": 1.5228,
165
+ "step": 5000
166
+ },
167
+ {
168
+ "epoch": 7.92,
169
+ "learning_rate": 1.0093659942363115e-05,
170
+ "loss": 1.5146,
171
+ "step": 5500
172
+ },
173
+ {
174
+ "epoch": 8.0,
175
+ "eval_f1": 0.9126,
176
+ "eval_gen_len": 26.156,
177
+ "eval_loss": 1.4685654640197754,
178
+ "eval_precision": 0.9123,
179
+ "eval_recall": 0.9133,
180
+ "eval_rouge1": 0.4907,
181
+ "eval_rouge2": 0.241,
182
+ "eval_rougeL": 0.4088,
183
+ "eval_rougeLsum": 0.4089,
184
+ "eval_runtime": 865.3485,
185
+ "eval_samples_per_second": 6.356,
186
+ "eval_steps_per_second": 0.398,
187
+ "step": 5553
188
+ },
189
+ {
190
+ "epoch": 8.64,
191
+ "learning_rate": 9.193083573487034e-06,
192
+ "loss": 1.5006,
193
+ "step": 6000
194
+ },
195
+ {
196
+ "epoch": 9.0,
197
+ "eval_f1": 0.9127,
198
+ "eval_gen_len": 26.26290909090909,
199
+ "eval_loss": 1.4636152982711792,
200
+ "eval_precision": 0.9122,
201
+ "eval_recall": 0.9135,
202
+ "eval_rouge1": 0.4914,
203
+ "eval_rouge2": 0.2419,
204
+ "eval_rougeL": 0.4097,
205
+ "eval_rougeLsum": 0.4099,
206
+ "eval_runtime": 874.612,
207
+ "eval_samples_per_second": 6.289,
208
+ "eval_steps_per_second": 0.393,
209
+ "step": 6247
210
+ },
211
+ {
212
+ "epoch": 9.36,
213
+ "learning_rate": 8.29250720461095e-06,
214
+ "loss": 1.49,
215
+ "step": 6500
216
+ },
217
+ {
218
+ "epoch": 10.0,
219
+ "eval_f1": 0.9127,
220
+ "eval_gen_len": 26.027272727272727,
221
+ "eval_loss": 1.4580360651016235,
222
+ "eval_precision": 0.9125,
223
+ "eval_recall": 0.9133,
224
+ "eval_rouge1": 0.4911,
225
+ "eval_rouge2": 0.2429,
226
+ "eval_rougeL": 0.4109,
227
+ "eval_rougeLsum": 0.411,
228
+ "eval_runtime": 855.8845,
229
+ "eval_samples_per_second": 6.426,
230
+ "eval_steps_per_second": 0.402,
231
+ "step": 6942
232
+ },
233
+ {
234
+ "epoch": 10.08,
235
+ "learning_rate": 7.391930835734871e-06,
236
+ "loss": 1.485,
237
+ "step": 7000
238
+ },
239
+ {
240
+ "epoch": 10.8,
241
+ "learning_rate": 6.491354466858791e-06,
242
+ "loss": 1.4749,
243
+ "step": 7500
244
+ },
245
+ {
246
+ "epoch": 11.0,
247
+ "eval_f1": 0.9131,
248
+ "eval_gen_len": 26.230363636363638,
249
+ "eval_loss": 1.4546109437942505,
250
+ "eval_precision": 0.9127,
251
+ "eval_recall": 0.9138,
252
+ "eval_rouge1": 0.4932,
253
+ "eval_rouge2": 0.244,
254
+ "eval_rougeL": 0.4121,
255
+ "eval_rougeLsum": 0.4123,
256
+ "eval_runtime": 871.4205,
257
+ "eval_samples_per_second": 6.312,
258
+ "eval_steps_per_second": 0.395,
259
+ "step": 7636
260
+ },
261
+ {
262
+ "epoch": 11.52,
263
+ "learning_rate": 5.590778097982709e-06,
264
+ "loss": 1.4661,
265
+ "step": 8000
266
+ },
267
+ {
268
+ "epoch": 12.0,
269
+ "eval_f1": 0.9132,
270
+ "eval_gen_len": 25.87781818181818,
271
+ "eval_loss": 1.4514495134353638,
272
+ "eval_precision": 0.9133,
273
+ "eval_recall": 0.9136,
274
+ "eval_rouge1": 0.4937,
275
+ "eval_rouge2": 0.2448,
276
+ "eval_rougeL": 0.4126,
277
+ "eval_rougeLsum": 0.4127,
278
+ "eval_runtime": 867.3574,
279
+ "eval_samples_per_second": 6.341,
280
+ "eval_steps_per_second": 0.397,
281
+ "step": 8331
282
+ },
283
+ {
284
+ "epoch": 12.24,
285
+ "learning_rate": 4.690201729106629e-06,
286
+ "loss": 1.4626,
287
+ "step": 8500
288
+ },
289
+ {
290
+ "epoch": 12.96,
291
+ "learning_rate": 3.7896253602305477e-06,
292
+ "loss": 1.4575,
293
+ "step": 9000
294
+ },
295
+ {
296
+ "epoch": 13.0,
297
+ "eval_f1": 0.9133,
298
+ "eval_gen_len": 26.11509090909091,
299
+ "eval_loss": 1.4499082565307617,
300
+ "eval_precision": 0.913,
301
+ "eval_recall": 0.914,
302
+ "eval_rouge1": 0.4947,
303
+ "eval_rouge2": 0.2453,
304
+ "eval_rougeL": 0.4139,
305
+ "eval_rougeLsum": 0.414,
306
+ "eval_runtime": 860.9844,
307
+ "eval_samples_per_second": 6.388,
308
+ "eval_steps_per_second": 0.4,
309
+ "step": 9025
310
+ },
311
+ {
312
+ "epoch": 13.68,
313
+ "learning_rate": 2.8890489913544673e-06,
314
+ "loss": 1.4511,
315
+ "step": 9500
316
+ },
317
+ {
318
+ "epoch": 14.0,
319
+ "eval_f1": 0.9133,
320
+ "eval_gen_len": 26.028727272727274,
321
+ "eval_loss": 1.44780433177948,
322
+ "eval_precision": 0.9131,
323
+ "eval_recall": 0.9138,
324
+ "eval_rouge1": 0.4939,
325
+ "eval_rouge2": 0.2451,
326
+ "eval_rougeL": 0.4133,
327
+ "eval_rougeLsum": 0.4134,
328
+ "eval_runtime": 862.0827,
329
+ "eval_samples_per_second": 6.38,
330
+ "eval_steps_per_second": 0.399,
331
+ "step": 9720
332
+ },
333
+ {
334
+ "epoch": 14.4,
335
+ "learning_rate": 1.988472622478386e-06,
336
+ "loss": 1.4519,
337
+ "step": 10000
338
+ },
339
+ {
340
+ "epoch": 15.0,
341
+ "eval_f1": 0.9133,
342
+ "eval_gen_len": 25.907818181818183,
343
+ "eval_loss": 1.4471020698547363,
344
+ "eval_precision": 0.9132,
345
+ "eval_recall": 0.9137,
346
+ "eval_rouge1": 0.4938,
347
+ "eval_rouge2": 0.2451,
348
+ "eval_rougeL": 0.4134,
349
+ "eval_rougeLsum": 0.4134,
350
+ "eval_runtime": 855.2673,
351
+ "eval_samples_per_second": 6.431,
352
+ "eval_steps_per_second": 0.402,
353
+ "step": 10414
354
+ },
355
+ {
356
+ "epoch": 15.12,
357
+ "learning_rate": 1.0878962536023055e-06,
358
+ "loss": 1.4475,
359
+ "step": 10500
360
+ },
361
+ {
362
+ "epoch": 15.84,
363
+ "learning_rate": 1.8731988472622478e-07,
364
+ "loss": 1.4439,
365
+ "step": 11000
366
+ },
367
+ {
368
+ "epoch": 15.99,
369
+ "eval_f1": 0.9134,
370
+ "eval_gen_len": 25.96290909090909,
371
+ "eval_loss": 1.4468724727630615,
372
+ "eval_precision": 0.9133,
373
+ "eval_recall": 0.9138,
374
+ "eval_rouge1": 0.4939,
375
+ "eval_rouge2": 0.2453,
376
+ "eval_rougeL": 0.4133,
377
+ "eval_rougeLsum": 0.4134,
378
+ "eval_runtime": 864.4194,
379
+ "eval_samples_per_second": 6.363,
380
+ "eval_steps_per_second": 0.398,
381
+ "step": 11104
382
+ },
383
+ {
384
+ "epoch": 15.99,
385
+ "step": 11104,
386
+ "total_flos": 2.2405705733792072e+18,
387
+ "train_loss": 0.8767006197992594,
388
+ "train_runtime": 37037.5595,
389
+ "train_samples_per_second": 43.199,
390
+ "train_steps_per_second": 0.3
391
+ }
392
+ ],
393
+ "logging_steps": 500,
394
+ "max_steps": 11104,
395
+ "num_input_tokens_seen": 0,
396
+ "num_train_epochs": 16,
397
+ "save_steps": 500,
398
+ "total_flos": 2.2405705733792072e+18,
399
+ "train_batch_size": 24,
400
+ "trial_name": null,
401
+ "trial_params": null
402
+ }